In [3]:
print(f"The sample generates {len(traffic_data)} packets.")

The sample generates 1000 packets.


In [None]:
import numpy as np
import pandas as pd

# Set a random seed for reproducibility
np.random.seed(42)

# Number of data points
num_data_points = 1000

# Simulate timestamps with a general increasing trend
timestamps = pd.to_datetime('2023-01-01') + pd.to_timedelta(np.arange(num_data_points), unit='s')

# Simulate source and destination IP addresses (randomly selected from a pool)
ip_pool = [f'192.168.1.{i}' for i in range(1, 255)] + [f'10.0.0.{i}' for i in range(1, 255)]
source_ips = np.random.choice(ip_pool, num_data_points)
destination_ips = np.random.choice(ip_pool, num_data_points)

# Simulate packet sizes (e.g., between 64 and 1500 bytes)
packet_sizes = np.random.randint(64, 1501, num_data_points)

# Simulate protocols (e.g., TCP, UDP, ICMP)
protocols = np.random.choice(['TCP', 'UDP', 'ICMP'], num_data_points, p=[0.7, 0.2, 0.1])

# Simulate ports (common ports and random ones)
common_ports = [80, 443, 22, 21, 25, 53]
source_ports = np.random.choice(common_ports + list(range(1024, 65536)), num_data_points)
destination_ports = np.random.choice(common_ports + list(range(1024, 65536)), num_data_points)


# Simulate flags (for TCP)
tcp_flags = np.random.choice(['SYN', 'ACK', 'FIN', 'RST', 'PSH', 'URG'], num_data_points)
flags = np.where(protocols == 'TCP', tcp_flags, None)

# Create a pandas DataFrame
traffic_data = pd.DataFrame({
    'timestamp': timestamps,
    'source_ip': source_ips,
    'destination_ip': destination_ips,
    'packet_size': packet_sizes,
    'protocol': protocols,
    'source_port': source_ports,
    'destination_port': destination_ports,
    'flags': flags
})

# Display the first few rows of the generated data
display(traffic_data.head())

Unnamed: 0,timestamp,source_ip,destination_ip,packet_size,protocol,source_port,destination_port,flags
0,2023-01-01 00:00:00,192.168.1.103,192.168.1.253,552,TCP,54572,25708,ACK
1,2023-01-01 00:00:01,10.0.0.182,10.0.0.136,110,TCP,21954,44957,FIN
2,2023-01-01 00:00:02,10.0.0.95,192.168.1.82,1375,TCP,52377,30363,FIN
3,2023-01-01 00:00:03,10.0.0.17,10.0.0.117,841,UDP,5444,60098,
4,2023-01-01 00:00:04,192.168.1.107,10.0.0.2,1103,TCP,24321,47331,FIN


# Analyze the distribution of packet sizes

In [None]:
# Analyze the distribution of packet sizes
print("Packet Size Distribution:")
display(traffic_data['packet_size'].describe())

# Analyze the distribution of protocols
print("\nProtocol Distribution:")
display(traffic_data['protocol'].value_counts())

Packet Size Distribution:


Unnamed: 0,packet_size
count,1000.0
mean,802.422
std,409.815225
min,65.0
25%,444.0
50%,818.0
75%,1156.25
max,1499.0



Protocol Distribution:


Unnamed: 0_level_0,count
protocol,Unnamed: 1_level_1
TCP,693
UDP,218
ICMP,89


# Basic Data Analysis

In [4]:
# Analyze the distribution of packet sizes
print("Packet Size Distribution:")
display(traffic_data['packet_size'].describe())

# Analyze the distribution of protocols
print("\nProtocol Distribution:")
display(traffic_data['protocol'].value_counts())

Packet Size Distribution:


Unnamed: 0,packet_size
count,1000.0
mean,802.422
std,409.815225
min,65.0
25%,444.0
50%,818.0
75%,1156.25
max,1499.0



Protocol Distribution:


Unnamed: 0_level_0,count
protocol,Unnamed: 1_level_1
TCP,693
UDP,218
ICMP,89
