# Importing Libraries

In [6]:
import scapy.all as scapy
import numpy as np
import joblib
import pandas as pd

# Code

In [3]:
df = pd.read_csv('Dataset/ML-EdgeIIOT-dataset-cleaned-2.csv')

In [None]:
# Create a summary DataFrame
summary_df = pd.DataFrame({
    "Column Name": df.columns,
    "Min Value": df.min(),
    "Max Value": df.max(),
    "Unique Values": df.nunique()
}).reset_index(drop=True)   

# Print the table
summary_df

Unnamed: 0,Column Name,Min Value,Max Value,Unique Values
0,arp.hw.size,0.0,6.0,2
1,icmp.checksum,0.0,65532.0,13187
2,icmp.seq_le,0.0,65524.0,13824
3,http.content_length,0.0,83655.0,33
4,http.request.method,0.0,4.0,5
5,http.referer,0.0,2.0,3
6,http.response,0.0,1.0,2
7,tcp.ack,0.0,2147333000.0,27929
8,tcp.ack_raw,0.0,4294947000.0,94716
9,tcp.dstport,0.0,65535.0,23188


In [7]:
model = joblib.load("Models/RFE_GradientBoosting.pkl")

# Attack Mapping
attack_mapping = {
    'MITM': 1, 'Fingerprinting': 2, 'Ransomware': 3, 'Uploading': 4,
    'SQL_injection': 5, 'DDoS_HTTP': 6, 'DDoS_TCP': 7, 'Password': 8,
    'Port_Scanning': 9, 'Vulnerability_scanner': 10, 'Backdoor': 11,
    'XSS': 12, 'Normal': 13, 'DDoS_UDP': 14, 'DDoS_ICMP': 15
}

In [8]:
# Function to create a simulated attack packet
def create_attack_packet(attack_type):
    if attack_type == "MITM":
        return scapy.ARP(op=2, psrc="192.168.1.1", pdst="192.168.1.100", hwsrc="aa:bb:cc:dd:ee:ff")

    elif attack_type == "DDoS_HTTP":
        return scapy.IP(dst="192.168.1.100") / scapy.TCP(dport=80, flags="S")

    elif attack_type == "DDoS_TCP":
        return scapy.IP(dst="192.168.1.100") / scapy.TCP(dport=443, flags="S")

    elif attack_type == "SQL_injection":
        return scapy.IP(dst="192.168.1.100") / scapy.TCP(dport=3306, flags="P")

    elif attack_type == "Port_Scanning":
        return scapy.IP(dst="192.168.1.100") / scapy.TCP(dport=(20, 1024), flags="S")

    elif attack_type == "Normal":
        return scapy.IP(dst="192.168.1.100") / scapy.TCP(dport=80, flags="A")

    else:
        return scapy.IP(dst="192.168.1.100") / scapy.UDP(dport=53)

In [9]:
# Extract features from the packet
def extract_features(packet):
    features = {
        "arp.hw.size": getattr(packet, "hwlen", 0),
        "icmp.checksum": getattr(packet, "chksum", 0),
        "icmp.seq_le": getattr(packet, "seq", 0),
        "http.content_length": getattr(packet, "len", 0),
        "http.request.method": 0,  # Placeholder
        "http.referer": 0,  # Placeholder
        "http.response": 0,  # Placeholder
        "tcp.ack": getattr(packet, "ack", 0),
        "tcp.ack_raw": getattr(packet, "ack", 0),
        "tcp.dstport": getattr(packet, "dport", 0),
        "tcp.flags.ack": getattr(packet, "flags", 0) & 16,
        "tcp.len": getattr(packet, "len", 0),
        "tcp.seq": getattr(packet, "seq", 0),
        "udp.port": getattr(packet, "dport", 0) if scapy.UDP in packet else 0,
        "udp.stream": 0,  # Placeholder
        "udp.time_delta": 0,  # Placeholder
        "dns.qry.name": 0,  # Placeholder
        "dns.qry.qu": 0,  # Placeholder
        "dns.retransmission": 0,  # Placeholder
        "dns.retransmit_request": 0,  # Placeholder
        "mqtt.msgtype": 0,  # Placeholder
        "mqtt.topic_len": 0,  # Placeholder
        "mqtt.ver": 0,  # Placeholder
        "ip.src_1": int(packet[scapy.IP].src.split(".")[0]) if scapy.IP in packet else 0,
        "ip.src_2": int(packet[scapy.IP].src.split(".")[1]) if scapy.IP in packet else 0,
        "ip.src_3": int(packet[scapy.IP].src.split(".")[2]) if scapy.IP in packet else 0,
        "ip.src_4": int(packet[scapy.IP].src.split(".")[3]) if scapy.IP in packet else 0,
        "ip.dst_1": int(packet[scapy.IP].dst.split(".")[0]) if scapy.IP in packet else 0,
        "ip.dst_2": int(packet[scapy.IP].dst.split(".")[1]) if scapy.IP in packet else 0,
        "ip.dst_3": int(packet[scapy.IP].dst.split(".")[2]) if scapy.IP in packet else 0,
        "ip.dst_4": int(packet[scapy.IP].dst.split(".")[3]) if scapy.IP in packet else 0,
        "tcp_flag_category": getattr(packet, "flags", 0)
    }
    
    return np.array(list(features.values())).reshape(1, -1)

In [10]:
# Simulate and predict attacks
for attack in attack_mapping.keys():
    print(f"\nSimulating {attack} attack...")
    packet = create_attack_packet(attack)
    features = extract_features(packet)
    
    # Predict Attack Type
    prediction = model.predict(features)[0]
    
    # Map prediction back to attack name
    predicted_attack = [k for k, v in attack_mapping.items() if v == prediction][0]
    
    print(f"Actual Attack: {attack}, Predicted Attack: {predicted_attack}")


Simulating MITM attack...


ValueError: Input X contains NaN.
GradientBoostingClassifier does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values