In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [8]:
def generate_traffic_patterns():
    """Generate diverse network traffic patterns including sophisticated attacks"""
    
    def generate_ip(type='internal'):
        if type == 'internal':
            return f"192.168.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}"
        else:
            # External IPs excluding private ranges
            first_octet = np.random.choice([
                np.random.randint(1, 192),
                np.random.randint(193, 223)
            ])
            return f"{first_octet}.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}"

    patterns = {
        # Normal Traffic Patterns
        'normal_web': {
            'source_ip': 'external',
            'ports': [80, 443],
            'protocols': ['TCP'],
            'bytes_range': (500, 2000),
            'packets_range': (5, 20),
            'duration_range': (0.1, 2.0),
            'is_attack': 0
        },
        'normal_internal': {
            'source_ip': 'internal',
            'ports': [80, 443, 53, 123],
            'protocols': ['TCP', 'UDP'],
            'bytes_range': (100, 1500),
            'packets_range': (2, 15),
            'duration_range': (0.05, 1.0),
            'is_attack': 0
        },
        
        # Attack Patterns
        'ddos_flood': {
            'source_ip': 'external',
            'ports': [80, 443],
            'protocols': ['TCP', 'UDP'],
            'bytes_range': (15000, 50000),
            'packets_range': (500, 2000),
            'duration_range': (0.01, 0.1),
            'is_attack': 1
        },
        'port_scan_stealth': {
            'source_ip': 'external',
            'ports': list(range(1, 1024)),  # Common ports
            'protocols': ['TCP'],
            'bytes_range': (40, 100),
            'packets_range': (1, 3),
            'duration_range': (0.001, 0.01),
            'is_attack': 1
        },
        'brute_force_ssh': {
            'source_ip': 'external',
            'ports': [22],
            'protocols': ['TCP'],
            'bytes_range': (300, 800),
            'packets_range': (5, 15),
            'duration_range': (0.05, 0.2),
            'is_attack': 1
        },
        'data_exfiltration': {
            'source_ip': 'internal',
            'ports': [443, 8080, 53],
            'protocols': ['TCP', 'UDP'],
            'bytes_range': (50000, 100000),
            'packets_range': (100, 300),
            'duration_range': (1.0, 5.0),
            'is_attack': 1
        },
        'sql_injection': {
            'source_ip': 'external',
            'ports': [80, 443, 8080],
            'protocols': ['TCP'],
            'bytes_range': (2000, 5000),
            'packets_range': (10, 30),
            'duration_range': (0.2, 1.0),
            'is_attack': 1
        }
    }
    
    def generate_single_traffic(pattern_name, pattern):
        source_ip_type = pattern['source_ip']
        return {
            'timestamp': datetime.now() - timedelta(seconds=np.random.randint(0, 86400)),
            'source_ip': generate_ip(source_ip_type),
            'dest_ip': generate_ip('internal'),
            'protocol': np.random.choice(pattern['protocols']),
            'port': np.random.choice(pattern['ports']),
            'bytes': np.random.randint(*pattern['bytes_range']),
            'packets': np.random.randint(*pattern['packets_range']),
            'duration': np.random.uniform(*pattern['duration_range']),
            'is_attack': pattern['is_attack'],
            'pattern_type': pattern_name if pattern['is_attack'] == 1 else 'normal'
        }

    # Generate dataset with distribution of patterns
    traffic_data = []
    
    # Normal traffic (70% of total)
    normal_samples = 7000
    for _ in range(normal_samples):
        pattern_name = np.random.choice(['normal_web', 'normal_internal'], p=[0.6, 0.4])
        traffic_data.append(generate_single_traffic(pattern_name, patterns[pattern_name]))
    
    # Attack traffic (30% of total)
    attack_samples = 3000
    attack_patterns = ['ddos_flood', 'port_scan_stealth', 'brute_force_ssh', 'data_exfiltration', 'sql_injection']
    for _ in range(attack_samples):
        pattern_name = np.random.choice(attack_patterns)
        traffic_data.append(generate_single_traffic(pattern_name, patterns[pattern_name]))
    
    return pd.DataFrame(traffic_data)

In [3]:
def generate_attack_traffic():
    """Generate more realistic attack traffic data"""
    attack_types = {
        'ddos': {
            # Add more overlap with normal traffic
            'bytes': np.random.normal(8000, 3000),  # More variance
            'packets': np.random.randint(100, 1000),
            'duration': np.random.exponential(5) + np.random.normal(0, 2)
        },
        'port_scan': {
            'bytes': np.random.normal(200, 100),
            'packets': np.random.randint(1, 10),
            'duration': np.random.exponential(0.1) + np.random.normal(0, 0.05)
        },
        'brute_force': {
            'bytes': np.random.normal(800, 300),
            'packets': np.random.randint(5, 100),
            'duration': np.random.exponential(2) + np.random.normal(0, 1)
        }
    }
    
    attack_type = np.random.choice(list(attack_types.keys()))
    attack_params = attack_types[attack_type]
    
    return {
        'timestamp': datetime.now() - timedelta(seconds=np.random.randint(0, 86400)),
        # Attackers might use any IP range
        'source_ip': f"{np.random.randint(1, 255)}.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}",
        'dest_ip': f"{np.random.choice(['192.168', '10.0', '172.16'])}.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}",
        'protocol': np.random.choice(['TCP', 'UDP', 'ICMP'], p=[0.8, 0.1, 0.1]),
        'port': np.random.randint(1, 65535),
        'bytes': attack_params['bytes'],
        'packets': attack_params['packets'],
        'duration': attack_params['duration'],
        'is_attack': 1,
        'attack_type': attack_type
    }

In [10]:
# Generate training and test datasets
train_data = generate_traffic_patterns()
test_data = generate_traffic_patterns()


In [11]:
# Save datasets
train_data.to_csv('dataset/network_traffic_training_new.csv', index=False)
test_data.to_csv('dataset/network_traffic_test_new.csv', index=False)