In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
import tensorflow as tf
tf.test.gpu_device_name()

''

In [3]:
!pip install scapy

Collecting scapy
  Downloading scapy-2.5.0.tar.gz (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scapy
  Building wheel for scapy (setup.py) ... [?25l[?25hdone
  Created wheel for scapy: filename=scapy-2.5.0-py2.py3-none-any.whl size=1444323 sha256=3aec121e525ecfee7e2df655821a8a58392e4149d122c414a0f939a75de4d3f9
  Stored in directory: /root/.cache/pip/wheels/82/b7/03/8344d8cf6695624746311bc0d389e9d05535ca83c35f90241d
Successfully built scapy
Installing collected packages: scapy
Successfully installed scapy-2.5.0


In [None]:
# Import scapy and pandas
from scapy.all import *
import pandas as pd
from collections import defaultdict

def calculate_iats(packets):
    iats = [(packets[i].time - packets[i - 1].time) * 10**6 for i in range(1, len(packets))]
    return iats

def calculate_stats(data):
    return {
        'Max': data.max(),
        'Mean': data.mean(),
        'Std': data.std(),
        'Min': data.min()  # Add 'Min' to the stats dictionary
    }

def calculate_packets_per_second(count, duration):
    return count / (duration / 10**6) if duration else 0

def extract_protocol(packet):
    return packet.sprintf('%IP.proto%')

def extract_src_port(packet):
    return packet.sprintf('%IP.sport%')

# Read the DAPT dataset as a pcap file
dapt = rdpcap("/content/drive/My Drive/projet/FILE_PCAP/dos-synflooding-2-dec.pcap")

# Create an empty list to store the feature dictionaries
features_list = []

# Loop through each flow in the dataset
for flow in dapt.sessions():

    # Get the list of packets in the flow
    packets = dapt.sessions()[flow]

    # Get the start and end time of the flow (in seconds)
    start_time = packets[0].time
    end_time = packets[-1].time

    # Calculate the flow duration (in microseconds)
    flow_duration = (end_time - start_time) * 10**6

    # Calculate the flow inter-arrival times and idle times
    flow_iats = calculate_iats(packets)
    active_times = [flow_iats[i] for i in range(len(flow_iats)) if packets[i].sprintf('%IP.src%') != packets[0].sprintf('%IP.src%')]
    idle_times = [flow_iats[i] for i in range(len(flow_iats)) if packets[i].sprintf('%IP.src%') == packets[0].sprintf('%IP.src%')]

    # Calculate statistics for various features
    flow_iat_stats = calculate_stats(pd.Series(flow_iats))
    fwd_iat_stats = calculate_stats(pd.Series(active_times))
    bwd_iat_stats = calculate_stats(pd.Series(idle_times))
    pkt_len_min = min(len(pkt) for pkt in packets)

    # Get the protocol and src port of the flow
    protocol = extract_protocol(packets[0])
    src_port = extract_src_port(packets[0])

    # Create a dictionary to store the feature values for the current flow
    feature_dict = {
        "ACK Flag Count": sum(1 for pkt in packets if pkt.haslayer(TCP) and pkt[TCP].flags & 0x10),
        "Active Max": fwd_iat_stats['Max'],
        "Active Mean": fwd_iat_stats['Mean'],
        "Active Std": fwd_iat_stats['Std'],
        "Bwd IAT Max": bwd_iat_stats['Max'],
        "Bwd IAT Mean": bwd_iat_stats['Mean'],
        "Bwd IAT Std": bwd_iat_stats['Std'],
        "Bwd IAT Total": sum(idle_times),
        "Bwd PSH Flags": sum(1 for pkt in packets if pkt.haslayer(TCP) and pkt[TCP].flags & 0x08 and packets[0].sprintf('%IP.src%') != pkt.sprintf('%IP.src%')),
        "Bwd Packet Length Min": pkt_len_min if packets[0].sprintf('%IP.src%') != packets[-1].sprintf('%IP.src%') else 0,
        "Bwd Packets/s": calculate_packets_per_second(len(idle_times), sum(idle_times)),
        "Flow Duration": flow_duration,
        "Flow IAT Max": flow_iat_stats['Max'],
        "Flow IAT Mean": flow_iat_stats['Mean'],
        "Flow IAT Std": flow_iat_stats['Std'],
        "Flow Packets/s": calculate_packets_per_second(len(packets), flow_duration),
        "Fwd IAT Max": fwd_iat_stats['Max'],
        "Fwd IAT Mean": fwd_iat_stats['Mean'],
        "Fwd IAT Std": fwd_iat_stats['Std'],
        "Fwd IAT Total": sum(active_times),
        "Fwd Packet Length Min": pkt_len_min if packets[0].sprintf('%IP.src%') == packets[-1].sprintf('%IP.src%') else 0,
        "Fwd Packets/s": calculate_packets_per_second(len(active_times), sum(active_times)),
        "Idle Max": bwd_iat_stats['Max'],
        "Idle Mean": bwd_iat_stats['Mean'],
        "Idle Min": bwd_iat_stats['Min'],  # Fix the key name here to access the minimum value
        "Idle Std": bwd_iat_stats['Std'],
        "PSH Flag Count": sum(1 for pkt in packets if pkt.haslayer(TCP) and pkt[TCP].flags & 0x08),
        "Packet Length Min": pkt_len_min,
        "Protocol": protocol,
        "SYN Flag Count": sum(1 for pkt in packets if pkt.haslayer(TCP) and pkt[TCP].flags & 0x02),
        "Src Port": src_port
    }

    # Append the feature dictionary to the list
    features_list.append(feature_dict)

# Create the final dataframe
features = pd.DataFrame(features_list)

# Replace NaN values with 0
features.fillna(0, inplace=True)


print(features)
features.to_csv('/content/drive/My Drive/projet/extracted.csv', index=False)

  fwd_iat_stats = calculate_stats(pd.Series(active_times))
  flow_iat_stats = calculate_stats(pd.Series(flow_iats))
  bwd_iat_stats = calculate_stats(pd.Series(idle_times))
