In [1]:
# Generate synthetic dataset for network traffic
# !pip install pandas
# !pip install numpy
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
def generate_normal_traffic():
    """Generate normal network traffic data"""
    return {
        'timestamp': datetime.now() - timedelta(seconds=np.random.randint(0, 86400)),
        'source_ip': f"192.168.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}",
        'dest_ip': f"10.0.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}",
        'protocol': np.random.choice(['TCP', 'UDP', 'ICMP'], p=[0.7, 0.2, 0.1]),
        'port': np.random.randint(1, 65535),
        'bytes': np.random.normal(1500, 500),
        'packets': np.random.randint(1, 100),
        'duration': np.random.exponential(30),
        'is_attack': 0
    }


In [3]:
def generate_attack_traffic():
    """Generate attack network traffic data"""
    attack_types = {
        'ddos': {
            'bytes': np.random.normal(15000, 1000),
            'packets': np.random.randint(500, 1000),
            'duration': np.random.exponential(5)
        },
        'port_scan': {
            'bytes': np.random.normal(100, 20),
            'packets': np.random.randint(1, 3),
            'duration': np.random.exponential(0.1)
        },
        'brute_force': {
            'bytes': np.random.normal(500, 100),
            'packets': np.random.randint(10, 50),
            'duration': np.random.exponential(1)
        }
    }
    
    attack_type = np.random.choice(list(attack_types.keys()))
    attack_params = attack_types[attack_type]
    
    return {
        'timestamp': datetime.now() - timedelta(seconds=np.random.randint(0, 86400)),
        'source_ip': f"45.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}",
        'dest_ip': f"10.0.{np.random.randint(1, 255)}.{np.random.randint(1, 255)}",
        'protocol': np.random.choice(['TCP', 'UDP', 'ICMP'], p=[0.8, 0.1, 0.1]),
        'port': np.random.randint(1, 65535),
        'bytes': attack_params['bytes'],
        'packets': attack_params['packets'],
        'duration': attack_params['duration'],
        'is_attack': 1,
        'attack_type': attack_type
    }


In [4]:
def generate_dataset(normal_samples=10000, attack_samples=1000):
    normal_traffic = [generate_normal_traffic() for _ in range(normal_samples)]
    attack_traffic = [generate_attack_traffic() for _ in range(attack_samples)]
    
    all_traffic = normal_traffic + attack_traffic
    df = pd.DataFrame(all_traffic)
    return df


In [9]:
# Save dataset
training_data = generate_dataset()
training_data.to_csv('dataset/network_traffic_training.csv', index=False)

In [11]:
# Generate smaller test dataset
test_data = generate_dataset(normal_samples=1000, attack_samples=100)
test_data.to_csv('dataset/network_traffic_test.csv', index=False)