In [294]:
# import requests
# url = "http://localhost:5000/detect/LGBM"
# files = {
#     'file': open('F:/Semesters/Year3_Term2/MHUD/Network-IDS/DetectorModels/data/archive/UNSW_NB15_testing-set.csv', 'rb')
# }

# response = requests.post(url, files=files)
# # response.json()

In [295]:
import pandas as pd
from scapy.all import rdpcap, TCP, IP, IPv6, UDP, ICMP, ARP
import datetime


In [296]:
def get_service(port):
    services = {
        80: 'http',
        21: 'ftp',
        25: 'smtp',
        22: 'ssh',
        53: 'dns',
        20: 'ftp-data',
        194: 'irc',
        443: 'https',
        110: 'pop3',
        995: 'pop3s',
        143: 'imap',
        993: 'imaps'
    }
    return services.get(port, '-')

def get_state(tcp_flags):
    # Dictionary to map TCP flag combinations to state names
    state_mapping = {
        'F': 'FIN',     # Finish
        'S': 'SYN',     # Synchronize
        'R': 'RST',     # Reset
        'P': 'PUSH',    # Push
        'A': 'ACK',     # Acknowledge to FIN, for more synchronize with dataset, we don't use ACK, so we use FIN
        'U': 'URG',     # Urgent
        'E': 'ECE',     # ECN-Echo
        'C': 'CWR',     # Congestion Window Reduced
        
        # For combinations with multiple flags, we just use the first one
        'FS': 'FIN', 
        'FA': 'FIN',
        'SA': 'SYN-ACK',
        'RA': 'RST',
        'PA': 'PUSH',
        'UA': 'URG',
        'EA': 'ECE',
        'CA': 'CWR', 
    }

    # Map the flags to the corresponding state
    return state_mapping.get(str(tcp_flags), '-')

def get_ip_layer(packet):
    if IP in packet:
        return packet[IP]
    elif IPv6 in packet:
        return packet[IPv6]
    return None

def get_proto_layer(packet):
    if TCP in packet:
        return packet[TCP]
    elif UDP in packet:
        return packet[UDP]
    elif ICMP in packet:
        return packet[ICMP]
    elif ARP in packet:
        return packet[ARP]
    return None

def parse_proto(proto_num):
    protocols = {
        1: 'icmp',
        6: 'tcp',
        17: 'udp',
        0x0806: 'arp',
    }
    return protocols.get(proto_num, '-')

def is_http_request(packet):
    try:
        # Extract payload from packet
        payload = packet.payload
        if not payload:
            return False
        
        # Convert payload to string and check for HTTP request patterns
        payload_str = str(payload, 'utf-8', errors='ignore')
        
        # Check if the payload starts with a common HTTP request method
        request_methods = ['GET ', 'POST ', 'PUT ', 'DELETE ', 'HEAD ', 'OPTIONS ', 'PATCH ']
        return any(payload_str.startswith(method) for method in request_methods)
    
    except Exception as e:
        print(f"Error checking HTTP request: {e}")
        return False

def is_http_response(packet):
    try:
        # Extract payload from packet
        payload = packet.payload
        if not payload:
            return False
        
        # Convert payload to string and check for HTTP response patterns
        payload_str = str(payload, 'utf-8', errors='ignore')
        
        # Check if the payload starts with a common HTTP response status line
        response_status_prefixes = ['HTTP/', 'HTTP/1.0 ', 'HTTP/1.1 ', 'HTTP/2.0 ']
        return any(payload_str.startswith(prefix) for prefix in response_status_prefixes)
    
    except Exception as e:
        print(f"Error checking HTTP response: {e}")
        return False


In [297]:

def initialize_connections(packets):
    connections = {}
    
    for packet in packets:
        try:
            ip_layer = get_ip_layer(packet)
            pro_layer = get_proto_layer(packet)
            
            if ip_layer is None or pro_layer is None:
                continue
            
            src_ip = ip_layer.src
            dst_ip = ip_layer.dst
            src_port = pro_layer.sport
            dst_port = pro_layer.dport
            
            proto = parse_proto(ip_layer.proto)
            if proto == 'udp':
                state = 'INT'
            else:
                state = get_state(pro_layer.flags) if hasattr(pro_layer, 'flags') else '-'
                
            service = get_service(dst_port)
            
            ttl = ip_layer.ttl if hasattr(ip_layer, 'ttl') else 0
            
            swin = pro_layer.window if hasattr(pro_layer, 'window') else 0
            stcpb = pro_layer.seq if hasattr(pro_layer, 'seq') else 0
            
            timestamp = packet.time
            
            conn_key = (src_ip, dst_ip, src_port, dst_port, proto)
            rev_conn_key = (dst_ip, src_ip, dst_port, src_port, proto)
            
            if conn_key not in connections:
                connections[conn_key] = {
                    'src_ip': src_ip,
                    'dst_ip': dst_ip,
                    'src_port': src_port,
                    'dst_port': dst_port,
                    
                    
                    'start_time': timestamp,
                    'end_time': timestamp,
                    
                    'proto': proto,
                    'state': state,
                    'service': service,
                    
                    'spkts': 0,             # Source to destination packet count 
                    'dpkts': 0,             # Destination to source packet count
                    
                    'sbytes': 0,            # Source to destination transaction bytes 
                    'dbytes': 0,            # Destination to source transaction bytes
                    
                    'rate': 0.0,
                    
                    'sttl': ttl,            # Source to destination time to live value 
                    'dttl': 0,              # Destination to source time to live value
                    
                    'sload': 0.0,             # Source bits per second
                    'dload': 0.0,             # Destination bits per second
                    
                    'sloss': 0,             # Source packets retransmitted or dropped 
                    'dloss': 0,             # Destination packets retransmitted or dropped
                    
                    'sinpkt': 0.0,           # Source interpacket arrival time (mSec)
                    'dinpkt': 0.0,           # Destination interpacket arrival time (mSec)
                    
                    'sjit': 0.0,              # Source jitter (mSec)
                    'djit': 0.0,              # Destination jitter (mSec)
                    
                    'swin': swin,           # Source TCP window advertisement value
                    'stcpb': stcpb,         # Source TCP base sequence number
                    'dtcpb': 0,         # Destination TCP base sequence number
                    'dwin': 0,              # Destination TCP window advertisement value
                    
                    'tcprtt': 0.0,            # TCP connection setup round-trip time, the sum of ’synack’ and ’ackdat’.
                    'synack': 0.0,            # TCP connection setup time, the time between the SYN and the SYN_ACK packets.
                    'ackdat': 0.0,            # TCP connection setup time, the time between the SYN_ACK and the ACK packets.
                    
                    'smean': 0,             # Mean of the ?ow packet size transmitted by the src 
                    'dmean': 0,             # Mean of the ?ow packet size transmitted by the dst 

                    'trans_depth': 0,       # Represents the pipelined depth into the connection of http request/response transaction
                    'response_body_len': 0, # Actual uncompressed content size of the data transferred from the server’s http service.
                    
                    'ct_srv_src': 0,        # No. of connections that contain the same service (14) and source address (1) in 100 connections according to the last time (26).
                    'ct_state_ttl': 0,      # No. for each state (6) according to specific range of values for source/destination time to live (10) (11).
                    'ct_dst_ltm': 0,        # No. of connections of the same destination address (3) in 100 connections according to the last time (26).
                    'ct_src_dport_ltm': 0,  # No of connections of the same source address (1) and the destination port (4) in 100 connections according to the last time (26).
                    'ct_dst_sport_ltm': 0,  # No of connections of the same destination address (3) and the source port (2) in 100 connections according to the last time (26).
                    'ct_dst_src_ltm': 0,    # No of connections of the same source (1) and the destination (3) address in in 100 connections according to the last time (26).
                    'is_ftp_login': 0,      # If the ftp session is accessed by user and password then 1 else 0. 
                    'ct_ftp_cmd': 0,        # No of flows that has a command in ftp session.
                    'ct_flw_http_mthd': 0,  # No. of flows that has methods such as Get and Post in http service.
                    'ct_src_ltm': 0,        # No. of connections of the same source address (1) in 100 connections according to the last time (26).
                    'ct_srv_dst': 0,        # No. of connections that contain the same service (14) and destination address (3) in 100 connections according to the last time (26).

                    'is_sm_ips_ports': 0,    # If source (1) and destination (3)IP addresses equal and port numbers (2)(4)  equal then, this variable takes value 1 else 0
                    
                    
                    
                    
                    
                    
                    
                    'http_requests': 0,  # Track HTTP requests
                    'http_responses': 0, # Track HTTP responses
                }
            
            conn = connections[conn_key]
            conn['end_time'] = timestamp
            
            # TODO: Basic features
            conn['state'] = state
            conn['service'] = service
            conn['spkts'] += 1
            conn['sbytes'] += len(packet)
            conn['sttl'] = ttl
            conn['sload'] = (conn['sbytes'] * 8.0) / float(conn['end_time'] - conn['start_time'] + 1e-6)
            
            # sloss
            if proto == 'tcp' and hasattr(pro_layer, 'flags') and pro_layer.flags == 'R':
                conn['sloss'] += 1
            
            # TODO: Content features
            conn['swin'] = swin
            conn['stcpb'] = stcpb
            conn['smean'] = int(conn['sbytes'] / (conn['spkts'] + 1e-6))
            
            # trans_depth
            if proto == 'tcp' and service == 'http':
                if is_http_request(packet):
                    conn['http_requests'] += 1
                elif is_http_response(packet):
                    conn['http_responses'] += 1
            conn['trans_depth'] = conn['http_requests'] - conn['http_responses']
            
            # response_body_len
            if proto == 'tcp' and service == 'HTTP':
                if hasattr(pro_layer, 'http_body_length'):
                    conn['response_body_len'] += pro_layer.http_body_length
            
            # TODO: Time features
            # sinpkt
            if 'last_timestamp' in conn:
                conn['sinpkt'] = (timestamp - conn['last_timestamp']) * 1000  # Convert to milliseconds
            else:
                conn['sinpkt'] = 0
            conn['last_timestamp'] = timestamp
            
            # sjit
            if 'arrival_times' not in conn:
                conn['arrival_times'] = []
            if 'last_arrival' in conn:
                interarrival_time = (timestamp - conn['last_arrival']) * 1000  # Convert to milliseconds
                conn['arrival_times'].append(interarrival_time)
                if len(conn['arrival_times']) > 1:
                    mean_interarrival = sum(conn['arrival_times']) / len(conn['arrival_times'])
                    variance = sum((x - mean_interarrival) ** 2 for x in conn['arrival_times']) / len(conn['arrival_times'])
                    conn['sjit'] = variance ** 0.5  # Jitter is the standard deviation of interarrival times
                else:
                    conn['sjit'] = 0
            else:
                conn['sjit'] = 0
            conn['last_arrival'] = timestamp
            
            # synack, ackdat, tcprtt
            if proto == 'tcp' and state == 'SYN':
                conn['syn_time'] = timestamp
            elif proto == 'tcp' and state == 'SYN-ACK':
                conn['synack_time'] = timestamp
            elif proto == 'tcp' and state == 'ACK':
                conn['ack_time'] = timestamp
            
            
            # TODO: General purpose features
            # ct_state_ttl (Count of connections with same state and TTL)
            
            # ct_flw_http_mthd (Count of flows that has methods such as Get and Post in http service)
            if proto == 'tcp' and service == 'http':
                print(dir(pro_layer))
                if hasattr(pro_layer, 'http_method'):
                    conn['ct_flw_http_mthd'] += 1
                    
            # is_ftp_login (If the ftp session is accessed by user and password then 1 else 0)
            if proto == 'tcp' and service == 'FTP':
                if hasattr(pro_layer, 'ftp_command') and 'USER' in pro_layer.ftp_command:
                    conn['is_ftp_login'] = 1
            # ct_ftp_cmd (Count of flows that has a command in ftp session)
            if proto == 'tcp' and service == 'FTP':
                if hasattr(pro_layer, 'ftp_command'):
                    conn['ct_ftp_cmd'] += 1
                    
            # is_sm_ips_ports (If source (1) and destination (3)IP addresses equal and port numbers (2)(4)  equal then, this variable takes value 1 else 0)
            conn['is_sm_ips_ports'] = int(src_ip == dst_ip and src_port == dst_port)

            # TODO: Connection Counts (ct_* metrics)
            # ct_srv_src: Count of connections with same service and source IP in 100 connections according to the last time (26).
            
            # ct_state_ttl: Count of connections with same state and TTL
            
            # ct_dst_ltm: Count of connections of the same destination address (3) in 100 connections according to the last time (26).
            
            # ct_src_dport_ltm: Count of connections of the same source address (1) and the destination port (4) in 100 connections according to the last time (26).
            
            # ct_dst_sport_ltm: Count of connections of the same destination address (3) and the source port (2) in 100 connections according to the last time (26).
            
            # ct_dst_src_ltm: Count of connections of the same source (1) and the destination (3) address in in 100 connections according to the last time (26).
            
            # ct_srv_dst: Count of connections that contain the same service (14) and destination address (3) in 100 connections according to the last time (26).

        except Exception as e:
            print(f"Error processing packet: {packet}")
            print(f"Error initializing connection: {e}")
            continue

    return connections

In [298]:
from collections import defaultdict, deque
import numpy as np

# Initialize data structures
rolling_window_size = 100
rolling_windows = {
    'connections': deque(maxlen=rolling_window_size),
}
feature_counters = {
    'srv_src': defaultdict(int),
    'srv_dst': defaultdict(int),
    'dst_ltm': defaultdict(int),
    'src_ltm': defaultdict(int),
    'src_dport_ltm': defaultdict(int),
    'dst_sport_ltm': defaultdict(int),
    'dst_src_ltm': defaultdict(int),
    'flw_http_mthd': 0,
    'ftp_login': 0,
    'ftp_cmd': 0,
}

def process_connection(conn):
    # Extract relevant details from the connection
    src_ip = conn['src_ip']
    dst_ip = conn['dst_ip']
    src_port = conn['src_port']
    dst_port = conn['dst_port']
    proto = conn['proto']
    service = conn['service']
    
    # Update rolling window
    rolling_windows['connections'].append(conn)

    # General purpose features
    # conn['is_sm_ips_ports'] = 1 if src_ip == dst_ip and src_port == dst_port else 0

    if proto == 'http':
        if conn.get('http_method') in ['GET', 'POST']:
            feature_counters['flw_http_mthd'] += 1

    if proto == 'ftp':
        if conn.get('ftp_login') == 1:
            feature_counters['ftp_login'] += 1
        if conn.get('ftp_cmd'):
            feature_counters['ftp_cmd'] += 1

    # Connection features
    for c in rolling_windows['connections']:
        feature_counters['srv_src'][(c['service'], c['src_ip'])] += 1
        feature_counters['srv_dst'][(c['service'], c['dst_ip'])] += 1
        feature_counters['dst_ltm'][c['dst_ip']] += 1
        feature_counters['src_ltm'][c['src_ip']] += 1
        feature_counters['src_dport_ltm'][(c['src_ip'], c['dst_port'])] += 1
        feature_counters['dst_sport_ltm'][(c['dst_ip'], c['src_port'])] += 1
        feature_counters['dst_src_ltm'][(c['src_ip'], c['dst_ip'])] += 1

    # Update connection counts for current connection
    conn['ct_srv_src'] = feature_counters['srv_src'][(service, src_ip)]
    conn['ct_srv_dst'] = feature_counters['srv_dst'][(service, dst_ip)]
    conn['ct_dst_ltm'] = feature_counters['dst_ltm'][dst_ip]
    conn['ct_src_ltm'] = feature_counters['src_ltm'][src_ip]
    conn['ct_src_dport_ltm'] = feature_counters['src_dport_ltm'][(src_ip, dst_port)]
    conn['ct_dst_sport_ltm'] = feature_counters['dst_sport_ltm'][(dst_ip, src_port)]
    conn['ct_dst_src_ltm'] = feature_counters['dst_src_ltm'][(src_ip, dst_ip)]

    # Add additional features to connection
    conn['is_flw_http_mthd'] = feature_counters['flw_http_mthd']
    conn['is_ftp_login'] = feature_counters['ftp_login']
    conn['ct_ftp_cmd'] = feature_counters['ftp_cmd']

In [299]:

def aggregate_connections(connections):
    seen_connections = set()
    aggregated_connections = {}
    for conn_key, conn in connections.items():
        src_ip, dst_ip, src_port, dst_port, proto = conn_key
        rev_conn_key = (dst_ip, src_ip, dst_port, src_port, proto)
        
        if rev_conn_key not in connections:
            continue
        
        if rev_conn_key not in seen_connections:
            seen_connections.add(conn_key)
            rev_conn = connections[rev_conn_key]
            
            # Update destination connection statistics
            conn['end_time'] = rev_conn['end_time']
            conn['dpkts'] = rev_conn['spkts']
            conn['dbytes'] = rev_conn['sbytes']
            
            total_bytes = conn['sbytes'] + conn['dbytes']
            conn['rate'] = total_bytes / (conn['end_time'] - conn['start_time'])
            
            conn['dttl'] = rev_conn['sttl']
            conn['dload'] = rev_conn['sload']
            conn['dloss'] = rev_conn['sloss']
            conn['dinpkt'] = rev_conn['sinpkt']
            conn['djit'] = rev_conn['sjit']
            conn['dwin'] = rev_conn['swin']
            conn['dtcpb'] = rev_conn['stcpb']
        
            # A --> B : SYN
            # B --> A: SYN-ACK
            # A --> B: ACK
            # So syn_time, and ack_time belong to A --> B
            #              synack_time belongs to B --> A
            conn['synack'] = rev_conn['synack_time'] - conn['syn_time'] if 'synack_time' in rev_conn and 'syn_time' in conn else 0
            conn['ackdat'] = conn['ack_time'] - rev_conn['synack_time'] if 'ack_time' in conn and 'synack_time' in rev_conn else 0
            conn['tcprtt'] = conn['synack'] + conn['ackdat']
            
            conn['dmean'] = int(conn['dbytes'] / (conn['dpkts'] + 1e-6))
            
            # process_connection(conn)
            
            aggregated_connections[conn_key] = conn
    
    return aggregated_connections

In [300]:
def parse_pcap(file):
    packets = rdpcap(file)
    data = []
    connections = initialize_connections(packets)
    aggregated_connections = aggregate_connections(connections)
            
    data = []
    for conn_key, conn in aggregated_connections.items():
        src_ip, dst_ip, src_port, dst_port, proto = conn_key
        dur = conn['end_time'] - conn['start_time']
        data.append([
            dur, 
            conn['proto'], 
            conn['service'], 
            conn['state'], 
            conn['spkts'], 
            conn['dpkts'], 
            conn['sbytes'], 
            conn['dbytes'],
            conn['rate'], 
            conn['sttl'], 
            conn['dttl'], 
            conn['sload'], 
            conn['dload'], 
            conn['sloss'], 
            conn['dloss'], 
            conn['sinpkt'],
            conn['dinpkt'], 
            conn['sjit'], 
            conn['djit'], 
            conn['swin'], 
            conn['stcpb'], 
            conn['dtcpb'], 
            conn['dwin'], 
            conn['tcprtt'],
            conn['synack'], 
            conn['ackdat'], 
            conn['smean'], 
            conn['dmean'], 
            conn['trans_depth'], 
            conn['response_body_len'],
            conn['ct_state_ttl'], 
            conn['is_ftp_login'],
            conn['ct_ftp_cmd'],
            conn['ct_flw_http_mthd'], 
            conn['is_sm_ips_ports'],
            
            # conn['ct_srv_src'],
            # conn['ct_dst_ltm'], 
            # conn['ct_src_dport_ltm'],
            # conn['ct_dst_sport_ltm'],
            # conn['ct_dst_src_ltm'], 
            # conn['ct_src_ltm'], 
            # conn['ct_srv_dst'], 
        ])

    return pd.DataFrame(data, columns=[
        'dur', 
        'proto', 
        'service', 
        'state', 
        'spkts', 
        'dpkts', 
        'sbytes', 
        'dbytes',
        'rate', 
        'sttl', 
        'dttl', 
        'sload', 
        'dload', 
        'sloss', 
        'dloss', 
        'sinpkt',
        'dinpkt', 
        'sjit', 
        'djit', 
        'swin', 
        'stcpb', 
        'dtcpb', 
        'dwin', 
        'tcprtt',
        'synack', 
        'ackdat', 
        'smean', 'dmean', 
        'trans_depth', 
        'response_body_len',
        'ct_state_ttl', 
        'is_ftp_login', 
        'ct_ftp_cmd',
        'ct_flw_http_mthd', 
        'is_sm_ips_ports',
        
        # 'ct_srv_src', 
        # 'ct_dst_ltm', 
        # 'ct_src_dport_ltm',
        # 'ct_dst_sport_ltm', 
        # 'ct_dst_src_ltm', 
        # 'ct_src_ltm',
        # 'ct_srv_dst', 
    ])

# Read and parse the pcap file
df = parse_pcap('wifi_capture.pcap')

# Save the DataFrame to a CSV file
df.to_csv('output3.csv', index=False)
df

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ackdat,smean,dmean,trans_depth,response_body_len,ct_state_ttl,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,is_sm_ips_ports
0,45.035808,udp,-,INT,180,180,18360,18360,0,128,...,0,101,101,0,0,0,0,0,0,0
1,39.928921,tcp,https,ACK,9,9,603,598,0,128,...,0,66,66,0,0,0,0,0,0,0
2,0.119102,tcp,https,ACK,1,1,55,66,0,128,...,0,54,65,0,0,0,0,0,0,0
3,30.226420,tcp,https,ACK,10,8,4946,1080,0,128,...,0,494,134,0,0,0,0,0,0,0
4,40.068587,tcp,-,ACK,12,8,939,745,0,128,...,0,78,93,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,0.015467,tcp,https,ACK,1,1,55,54,0,128,...,0,54,53,0,0,0,0,0,0,0
61,0.017970,tcp,https,ACK,1,1,55,54,0,128,...,0,54,53,0,0,0,0,0,0,0
62,0.018697,tcp,dns,ACK,6,6,371,387,0,128,...,0.012844,61,64,0,0,0,0,0,0,0
63,0.018887,tcp,dns,ACK,6,5,371,374,0,128,...,0.012882,61,74,0,0,0,0,0,0,0
