In [5]:
import pandas as pd
import xgboost as xgb
import scapy.all as scapy
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pickle
from concurrent.futures import ThreadPoolExecutor


# Global variables
initial_bandwidth = 0
initial_packet_rate = 0
initial_total_packets = 0
initial_total_time = 0
label = ""
source_ip = ""
destination_ip = ""

def calculate_total_packets_and_time(packets):
    total_packets = len(packets)
    total_time = packets[-1].time - packets[0].time if len(packets) > 1 else 0.000012
    return total_packets, total_time

def calculate_bandwidth_and_packet_rate(packets, total_length, total_time):
    bandwidth = total_length / total_time
    packet_rate = len(packets) / total_time
    return round(bandwidth, 2), round(packet_rate, 2)

def calculate_initial_features(packets):
    total_length = sum(len(packet) for packet in packets)

    with ThreadPoolExecutor() as executor:
        total_packets_and_time_future = executor.submit(calculate_total_packets_and_time, packets)
        total_packets, total_time = total_packets_and_time_future.result()

        bandwidth_and_packet_rate_future = executor.submit(calculate_bandwidth_and_packet_rate, packets, total_length, total_time)
        bandwidth, packet_rate = bandwidth_and_packet_rate_future.result()

    return packets, round(bandwidth, 2), round(packet_rate, 2), round(total_packets, 2), round(total_time, 2)

def load_pretrained_model():
    with open('Original-XGB-Dataset-A-80-20.pkl', 'rb') as model_file:
        model = pickle.load(model_file)
    with open('Label-Encoder-Original-XGB-Dataset-A-80-20.pkl', 'rb') as encoder_file:
        label_encoder = pickle.load(encoder_file)
    return model, label_encoder

def predict_with_model(model, label_encoder, features):
    features_df = pd.DataFrame([features], columns=[
        'Protocol', 'Source Port', 'Destination Port', 'Total Fwd Pkt', 'Total Bwd Pkt', 'Total Fwd Len', 'Total Bwd Len', 
        'Min Fwd Len', 'Max Fwd Len', 'Mean Fwd Len', 'Std Fwd Len', 'Min Bwd Len', 'Max Bwd Len', 
        'Mean Bwd Len', 'Std Bwd Len', 'Flow Bytes/s', 'Flow Pkt/s', 'Flow Duration', 
        'Bandwidth', 'Packet Rate'
    ])
    features_df = features_df.astype('float64')
    prediction = model.predict(features_df)
    predicted_label = label_encoder.inverse_transform(prediction)[0]
    return predicted_label

def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    labels = sorted(set(y_true) | set(y_pred))
    cm = confusion_matrix(y_true, y_pred, labels=labels)

    tn, fp, fn, tp = cm.ravel() if cm.size == 4 else (0, 0, 0, 0)
    return accuracy, precision, recall, f1, tp, tn, fp, fn

def group_packets_by_flow(packets):
    flow_dict = {}
    
    for packet in packets:
        if scapy.IP in packet and scapy.TCP in packet:
            src_ip = packet[scapy.IP].src
            dst_ip = packet[scapy.IP].dst
            src_port = packet[scapy.TCP].sport
            dst_port = packet[scapy.TCP].dport
            proto = packet[scapy.IP].proto

            flow_key = (src_ip, dst_ip, src_port, dst_port, proto)
            
            if flow_key not in flow_dict:
                flow_dict[flow_key] = []
            flow_dict[flow_key].append(packet)
    
    return flow_dict

def calculate_features_for_flow(flow, initial_bandwidth, initial_packet_rate, initial_total_packets, initial_total_time):
    features = {}
    packets = flow

    if packets and scapy.IP in packets[0] and scapy.TCP in packets[0]:
        src_ip = packets[0][scapy.IP].src
        dst_ip = packets[0][scapy.IP].dst
        packet_len = len(packets[0])

    # Calculate other features if not a special packet
    total_len = sum(len(pkt) for pkt in packets)
    min_len = min(len(pkt) for pkt in packets)
    max_len = max(len(pkt) for pkt in packets)
    mean_len = total_len / len(packets)
    std_len = pd.Series([len(pkt) for pkt in packets]).std()
    
    first_time = packets[0].time
    last_time = packets[-1].time
    flow_duration = last_time - first_time

    # Ensure flow_duration is not zero to avoid DivisionByZero error
    if flow_duration == 0:
        flow_duration = 0.000012

    flow_bytes_s = total_len / flow_duration
    flow_pkt_s = len(packets) / flow_duration

    features = {
        'Protocol': packets[0][scapy.IP].proto,
        'Source Port': packets[0][scapy.TCP].sport,
        'Destination Port': packets[0][scapy.TCP].dport,
        'Total Fwd Pkt': len(packets) if src_ip == source_ip else 0,
        'Total Fwd Len': total_len if src_ip == source_ip else 0,
        'Min Fwd Len': min_len if src_ip == source_ip else 0,
        'Max Fwd Len': max_len if src_ip == source_ip else 0,
        'Mean Fwd Len': mean_len if src_ip == source_ip else 0,
        'Std Fwd Len': std_len if src_ip == source_ip else 0,
        'Total Bwd Pkt': len(packets) if src_ip != source_ip else 0,
        'Total Bwd Len': total_len if src_ip != source_ip else 0,
        'Min Bwd Len': min_len if src_ip != source_ip else 0,
        'Max Bwd Len': max_len if src_ip != source_ip else 0,
        'Mean Bwd Len': mean_len if src_ip != source_ip else 0,
        'Std Bwd Len': std_len if src_ip != source_ip else 0, 
        'Flow Bytes/s': flow_bytes_s,
        'Flow Pkt/s': flow_pkt_s,
        'Flow Duration': flow_duration,
        'Bandwidth': initial_bandwidth,
        'Packet Rate': initial_packet_rate,
       
    }

    return features, None

def is_request(packet):
    return packet.haslayer(scapy.IP) and packet.haslayer(scapy.TCP)

def is_reply(packet):
    return packet.haslayer(scapy.IP) and packet.haslayer(scapy.TCP)

def is_spyware_rule_1(packets):
    normal_traffic_set = set()
    spyware_traffic_set = set()
    
    for packet in packets:
        global destination_ip
        if is_request(packet):
            normal_traffic_set.add((packet[scapy.IP].src, packet[scapy.IP].dst, 'RQ'))
        elif is_reply(packet):
            normal_traffic_set.add((packet[scapy.IP].dst, packet[scapy.IP].src, 'RP'))
        destination_ip = packet[scapy.IP].dst
    
    for packet in packets:
        if is_request(packet):
            spyware_traffic_set.add((packet[scapy.IP].src, packet[scapy.IP].dst, 'RQ'))
        elif is_reply(packet):
            spyware_traffic_set.add((packet[scapy.IP].dst, packet[scapy.IP].src, 'RP'))
    for flow in spyware_traffic_set:
        if flow not in normal_traffic_set:
            return True
    
    return False

def is_spyware_rule_2():
    global destination_ip
    unique_dest_ips = len(set(destination_ip))
    return unique_dest_ips > 3

def is_spyware_rule_3(total_fwd_len, total_bwd_len):
    return total_fwd_len > total_bwd_len

def main():
    csv_files = ['1-All-Together-Update-Single-26-8-24.csv', '2-All-Together-Update-Single-26-8-24.csv',
                 '3-All-Together-Update-Single-26-8-24.csv', '4-All-Together-Update-Single-26-8-24.csv',
                 '5-All-Together-Update-Single-26-8-24.csv']
    
    model, label_encoder = load_pretrained_model()
    
    previous_samples = pd.concat([pd.read_csv(f) for f in csv_files], ignore_index=True)
    y_true = previous_samples['Label']
    y_true_encoded = label_encoder.transform(y_true)

    # Capture initial packets to calculate initial features
    initial_packets = scapy.sniff(count=50)  # Capturing an initial set of 50 packets
    packets, initial_bandwidth, initial_packet_rate, initial_total_packets, initial_total_time = calculate_initial_features(initial_packets)
    
    label_row = previous_samples[previous_samples['Total-Pkt'] == initial_total_packets]
    if not label_row.empty:
        label = label_row['Label'].values[0]
    else:
        label = 'Normal-Traffic'

    packet_count = 0
    predictions = []
    true_labels = []
    captured_packets = []

    def process_packet(packet):
        nonlocal packet_count, captured_packets, initial_bandwidth, initial_packet_rate, initial_total_packets, initial_total_time, label

        captured_packets.append(packet)
        packet_count += 1

        if packet_count % 50 == 0:
            flow_dict = group_packets_by_flow(captured_packets)
            
            for flow_key, flow_packets in flow_dict.items():
                features, special_label = calculate_features_for_flow(flow_packets, initial_bandwidth, initial_packet_rate, initial_total_packets, initial_total_time)

                if special_label:
                    true_labels.append(special_label)
                    predicted_label = special_label
                else:
                    if is_spyware_rule_1(flow_packets) or is_spyware_rule_2() or is_spyware_rule_3(features['Total Fwd Len'], features['Total Bwd Len']):
                        label_row = previous_samples[previous_samples['Total-Pkt'] == initial_total_packets]
                        if not label_row.empty:
                            label = label_row['Label'].values[0]
                        else:
                            label = 'Unknown'
                    else:
                        label = 'Normal-Traffic'

                    true_labels.append(label)
                    predicted_label = predict_with_model(model, label_encoder, features)  
                predicted_label_encoded = label_encoder.transform([predicted_label])[0]
                predictions.append(predicted_label_encoded)
                true_labels_encoded = label_encoder.transform(true_labels)
                y_pred = predictions[-len(true_labels_encoded):]
                accuracy, precision, recall, f1, tp, tn, fp, fn = evaluate_model(true_labels_encoded[:len(y_pred)], y_pred)
                
                print(f"Predicted Label: {predicted_label}")
                print(f"Accuracy: {accuracy}")
                print(f"Precision: {precision}")
                print(f"Recall: {recall}")
                print(f"F1-Score: {f1}")

            captured_packets = []
            packet_count = 0

    scapy.sniff(prn=process_packet, store=False, timeout=180)

if __name__ == '__main__':
    main()




Predicted Label: Normal-Traffic
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-Score: 1.0
Predicted Label: FamiGuardPro
Accuracy: 0.5
Precision: 0.5067889
Recall: 0.5
F1-Score: 0.5033715608108115
Predicted Label: FamiGuardPro
Accuracy: 0.3333333333333333
Precision: 0.3401222333333333
Recall: 0.3333333333333333
F1-Score: 0.33669356491901525
Predicted Label: FamiGuardPro
Accuracy: 0.25
Precision: 0.2567889
Recall: 0.25
F1-Score: 0.25334897824320934
Predicted Label: FamiGuardPro
Accuracy: 0.2
Precision: 0.2067889
Recall: 0.2
F1-Score: 0.2033378000235503
Predicted Label: FamiGuardPro
Accuracy: 0.16666666666666666
Precision: 0.17345556666666664
Recall: 0.16666666666666666
F1-Score: 0.1699933628436391
Predicted Label: FamiGuardPro
Accuracy: 0.14285714285714285
Precision: 0.14964604285714284
Recall: 0.14285714285714285
F1-Score: 0.14617280882082978
Predicted Label: Normal-Traffic
Accuracy: 0.25
Precision: 0.2567889
Recall: 0.25
F1-Score: 0.25334897824320934
Predicted Label: Normal-Traffic
Accura

Precision: 0.5745855101694916
Recall: 0.5677966101694916
F1-Score: 0.5711708877760386
Predicted Label: FamiGuardPro
Accuracy: 0.5630252100840336
Precision: 0.5698141100840336
Recall: 0.5630252100840336
F1-Score: 0.5663993177625809
Predicted Label: Normal-Traffic
Accuracy: 0.5666666666666667
Precision: 0.5734555666666666
Recall: 0.5666666666666667
F1-Score: 0.5700409042886151
Predicted Label: Normal-Traffic
Accuracy: 0.5702479338842975
Precision: 0.5770368338842975
Recall: 0.5702479338842975
F1-Score: 0.5736222976927501
Predicted Label: FamiGuardPro
Accuracy: 0.5655737704918032
Precision: 0.5723626704918032
Recall: 0.5655737704918032
F1-Score: 0.5689479692890339
Predicted Label: FamiGuardPro
Accuracy: 0.5609756097560976
Precision: 0.5677645097560976
Recall: 0.5609756097560976
F1-Score: 0.5643496435582441
Predicted Label: Normal-Traffic
Accuracy: 0.5645161290322581
Precision: 0.5713050290322581
Recall: 0.5645161290322581
F1-Score: 0.5678902901149041
Predicted Label: FamiGuardPro
Accuracy

F1-Score: 0.5791321306518177
Predicted Label: FamiGuardPro
Accuracy: 0.5732758620689655
Precision: 0.5800647620689655
Recall: 0.5732758620689655
F1-Score: 0.5766503313441712
Predicted Label: Normal-Traffic
Accuracy: 0.575107296137339
Precision: 0.581896196137339
Recall: 0.575107296137339
F1-Score: 0.5784818286679807
Predicted Label: Normal-Traffic
Accuracy: 0.5769230769230769
Precision: 0.5837119769230769
Recall: 0.5769230769230769
F1-Score: 0.5802976717743557
Predicted Label: Normal-Traffic
Accuracy: 0.5787234042553191
Precision: 0.5855123042553191
Recall: 0.5787234042553191
F1-Score: 0.5820980605130068
Predicted Label: Normal-Traffic
Accuracy: 0.5805084745762712
Precision: 0.5872973745762712
Recall: 0.5805084745762712
F1-Score: 0.5838831913461099
Predicted Label: FamiGuardPro
Accuracy: 0.5780590717299579
Precision: 0.5848479717299578
Recall: 0.5780590717299579
F1-Score: 0.5814337053725502
Predicted Label: Normal-Traffic
Accuracy: 0.5798319327731093
Precision: 0.5866208327731093
Recal

Precision: 0.5720062913043478
Recall: 0.5652173913043478
F1-Score: 0.5685915774090761
Predicted Label: Normal-Traffic
Accuracy: 0.5664739884393064
Precision: 0.5732628884393064
Recall: 0.5664739884393064
F1-Score: 0.5698482192272475
Predicted Label: Normal-Traffic
Accuracy: 0.5677233429394812
Precision: 0.5745122429394812
Recall: 0.5677233429394812
F1-Score: 0.5710976179581639
Predicted Label: Normal-Traffic
Accuracy: 0.5689655172413793
Precision: 0.5757544172413793
Recall: 0.5689655172413793
F1-Score: 0.5723398360451708
Predicted Label: FamiGuardPro
Accuracy: 0.5673352435530086
Precision: 0.5741241435530086
Recall: 0.5673352435530086
F1-Score: 0.570709504852591
Predicted Label: Normal-Traffic
Accuracy: 0.5685714285714286
Precision: 0.5753603285714286
Recall: 0.5685714285714286
F1-Score: 0.5719457335046787
Predicted Label: FamiGuardPro
Accuracy: 0.5669515669515669
Precision: 0.5737404669515669
Recall: 0.5669515669515669
F1-Score: 0.5703258146700395
Predicted Label: FamiGuardPro
Accurac