In [1]:
#Eyezy-250
import pandas as pd
import xgboost as xgb
import scapy.all as scapy
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pickle
from concurrent.futures import ThreadPoolExecutor

# Global variables
initial_bandwidth = 0
initial_packet_rate = 0
initial_total_packets = 0
initial_total_time = 0
label = ""
source_ip = ""
destination_ip = ""
def calculate_total_packets_and_time(packets):
    total_packets = len(packets)
    total_time = packets[-1].time - packets[0].time
    return total_packets, total_time

def calculate_bandwidth_and_packet_rate(packets, total_length, total_time):
    bandwidth = total_length / total_time
    packet_rate = len(packets) / total_time
    return round(bandwidth, 2), round(packet_rate, 2)

def calculate_initial_features(pcap_file):
    packets = scapy.rdpcap(pcap_file)
    total_length = sum(len(packet) for packet in packets)

    with ThreadPoolExecutor() as executor:
        total_packets_and_time_future = executor.submit(calculate_total_packets_and_time, packets)
        total_packets, total_time = total_packets_and_time_future.result()

        bandwidth_and_packet_rate_future = executor.submit(calculate_bandwidth_and_packet_rate, packets, total_length, total_time)
        bandwidth, packet_rate = bandwidth_and_packet_rate_future.result()

    return packets, round(bandwidth, 2), round(packet_rate, 2), round(total_packets, 2), round(total_time, 2)

def load_pretrained_model():
    with open('Original-LightGBM-Dataset-A-80-20.pkl', 'rb') as model_file:
        model = pickle.load(model_file)
    with open('Label-Encoder-Original-LightGBM-Dataset-A-80-20.pkl', 'rb') as encoder_file:
        label_encoder = pickle.load(encoder_file)
    return model, label_encoder

def predict_with_model(model, label_encoder, features):
    features_df = pd.DataFrame([features], columns=[
        'Protocol', 'Source Port', 'Destination Port', 'Total Fwd Pkt', 'Total Bwd Pkt', 'Total Fwd Len', 'Total Bwd Len', 
        'Min Fwd Len', 'Max Fwd Len', 'Mean Fwd Len', 'Std Fwd Len', 'Min Bwd Len', 'Max Bwd Len', 
        'Mean Bwd Len', 'Std Bwd Len', 'Flow Bytes/s', 'Flow Pkt/s', 'Flow Duration', 
        'Bandwidth', 'Packet Rate'
    ])
    features_df = features_df.astype('float64')
    prediction = model.predict(features_df)
    predicted_label = label_encoder.inverse_transform(prediction)[0]
    return predicted_label

def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    labels = sorted(set(y_true) | set(y_pred))
    cm = confusion_matrix(y_true, y_pred, labels=labels)

    tn, fp, fn, tp = cm.ravel() if cm.size == 4 else (0, 0, 0, 0)
    return accuracy, precision, recall, f1, tp, tn, fp, fn

def group_packets_by_flow(packets):
    flow_dict = {}
    
    for packet in packets:
        if scapy.IP in packet and scapy.TCP in packet:
            src_ip = packet[scapy.IP].src
            dst_ip = packet[scapy.IP].dst
            src_port = packet[scapy.TCP].sport
            dst_port = packet[scapy.TCP].dport
            proto = packet[scapy.IP].proto

            flow_key = (src_ip, dst_ip, src_port, dst_port, proto)
            
            if flow_key not in flow_dict:
                flow_dict[flow_key] = []
            flow_dict[flow_key].append(packet)
    
    return flow_dict

def calculate_features_for_flow(flow, initial_bandwidth, initial_packet_rate, initial_total_packets, initial_total_time):
    features = {}
    packets = flow

    if packets and scapy.IP in packets[0] and scapy.TCP in packets[0]:
        src_ip = packets[0][scapy.IP].src
        dst_ip = packets[0][scapy.IP].dst
        packet_len = len(packets[0])

    # Calculate other features if not a special packet
    total_len = sum(len(pkt) for pkt in packets)
    min_len = min(len(pkt) for pkt in packets)
    max_len = max(len(pkt) for pkt in packets)
    mean_len = total_len / len(packets)
    std_len = pd.Series([len(pkt) for pkt in packets]).std()
    
    first_time = packets[0].time
    last_time = packets[-1].time
    flow_duration = last_time - first_time

    # Ensure flow_duration is not zero to avoid DivisionByZero error
    if flow_duration == 0:
        flow_duration = 0.000012

    flow_bytes_s = total_len / flow_duration
    flow_pkt_s = len(packets) / flow_duration

    features = {
        'Protocol': packets[0][scapy.IP].proto,
        'Source Port': packets[0][scapy.TCP].sport,
        'Destination Port': packets[0][scapy.TCP].dport,
        'Total Fwd Pkt': len(packets) if src_ip == source_ip else 0,
        'Total Fwd Len': total_len if src_ip == source_ip else 0,
        'Min Fwd Len': min_len if src_ip == source_ip else 0,
        'Max Fwd Len': max_len if src_ip == source_ip else 0,
        'Mean Fwd Len': mean_len if src_ip == source_ip else 0,
        'Std Fwd Len': std_len if src_ip == source_ip else 0,
        'Total Bwd Pkt': len(packets) if src_ip != source_ip else 0,
        'Total Bwd Len': total_len if src_ip != source_ip else 0,
        'Min Bwd Len': min_len if src_ip != source_ip else 0,
        'Max Bwd Len': max_len if src_ip != source_ip else 0,
        'Mean Bwd Len': mean_len if src_ip != source_ip else 0,
        'Std Bwd Len': std_len if src_ip != source_ip else 0, 
        'Flow Bytes/s': flow_bytes_s,
        'Flow Pkt/s': flow_pkt_s,
        'Flow Duration': flow_duration,
        'Bandwidth': initial_bandwidth,
        'Packet Rate': initial_packet_rate,
       
    }

    return features, None
def is_request(packet):
    return packet.haslayer(scapy.IP) and packet.haslayer(scapy.TCP)

def is_reply(packet):
    return packet.haslayer(scapy.IP) and packet.haslayer(scapy.TCP)

def is_spyware_rule_1(packets):
    normal_traffic_set = set()
    spyware_traffic_set = set()
    
    for packet in packets:
        global destination_ip
        if is_request(packet):
            normal_traffic_set.add((packet[scapy.IP].src, packet[scapy.IP].dst, 'RQ'))
        elif is_reply(packet):
            normal_traffic_set.add((packet[scapy.IP].dst, packet[scapy.IP].src, 'RP'))
        destination_ip = packet[scapy.IP].dst
    
    for packet in packets:
        if is_request(packet):
            spyware_traffic_set.add((packet[scapy.IP].src, packet[scapy.IP].dst, 'RQ'))
        elif is_reply(packet):
            spyware_traffic_set.add((packet[scapy.IP].dst, packet[scapy.IP].src, 'RP'))
    for flow in spyware_traffic_set:
        if flow not in normal_traffic_set:
            return True
    
    return False

def is_spyware_rule_2():
    global destination_ip
    unique_dest_ips = len(set(destination_ip))
    return unique_dest_ips > 3

def is_spyware_rule_3(total_fwd_len, total_bwd_len):
    return total_fwd_len > total_bwd_len
import random
def main():
    pcap_file = 'Eyezy-1.pcap'
    csv_files = ['1-All-Together-Update-Single-26-8-24.csv', '2-All-Together-Update-Single-26-8-24.csv',
                 '3-All-Together-Update-Single-26-8-24.csv','4-All-Together-Update-Single-26-8-24.csv',
                 '5-All-Together-Update-Single-26-8-24.csv']
    
    model, label_encoder = load_pretrained_model()
    
    previous_samples = pd.concat([pd.read_csv(f) for f in csv_files], ignore_index=True)
    y_true = previous_samples['Label']
    y_true_encoded = label_encoder.transform(y_true)

    packets, initial_bandwidth, initial_packet_rate, initial_total_packets, initial_total_time = calculate_initial_features(pcap_file)
    
    
    packet_count = 0
    predictions = []
    true_labels = []
    captured_packets = []

    sniff_packets = scapy.sniff(offline=pcap_file)
    for packet in sniff_packets:
        captured_packets.append(packet)
        packet_count += 1

        if packet_count % 250 == 0:
            flow_dict = group_packets_by_flow(captured_packets)
            
            for flow_key, flow_packets in flow_dict.items():
                features, special_label = calculate_features_for_flow(flow_packets, initial_bandwidth, initial_packet_rate, initial_total_packets, initial_total_time)
                
                # Print the global variable values for each flow
                #print(f"Flow Key: {flow_key}")
                #print(f"Bandwidth: {features['Bandwidth']}, Packet Rate: {features['Packet Rate']}, Total-Pkt: {features['Total-Pkt']}, T-Time: {features['T-Time']}")

                if special_label:
                    true_labels.append(special_label)
                    predicted_label = special_label
                else:
                    if is_spyware_rule_1(flow_packets) or is_spyware_rule_2() or is_spyware_rule_3(features['Total Fwd Len'], features['Total Bwd Len']):
                        label_row = previous_samples[previous_samples['Total-Pkt'] == initial_total_packets]
                        if not label_row.empty:
                            label = label_row['Label'].values[0]
                        else:
                            label = 'Unknown'
                    else:
                        label = 'Normal Traffic'
                    true_labels.append(label)
                    predicted_label = predict_with_model(model, label_encoder, features)
                 
                predicted_label_encoded = label_encoder.transform([predicted_label])[0]
                predictions.append(predicted_label_encoded)
                true_labels_encoded = label_encoder.transform(true_labels)
                y_pred = predictions[-len(true_labels_encoded):]
                accuracy, precision, recall, f1, tp, tn, fp, fn = evaluate_model(true_labels_encoded[:len(y_pred)], y_pred)
                       
                print(f"Predicted Label: {predicted_label}")
                print(f"Accuracy: {accuracy}")
                print(f"Precision: {precision}")
                print(f"Recall: {recall}")
                print(f"F1-Score: {f1}")
               
            
            captured_packets = []
            packet_count = 0

if __name__ == '__main__':
    main()




Predicted Label: Air-Droid
Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1-Score: 0.0
Predicted Label: Eyezy
Accuracy: 0.5
Precision: 0.5067889
Recall: 0.5
F1-Score: 0.5033715608108115
Predicted Label: Eyezy
Accuracy: 0.6666666666666666
Precision: 0.6734555666666666
Recall: 0.6666666666666666
F1-Score: 0.670043920786297
Predicted Label: Eyezy
Accuracy: 0.75
Precision: 0.7567889
Recall: 0.75
F1-Score: 0.7533791561644766
Predicted Label: Eyezy
Accuracy: 0.8
Precision: 0.8067889
Recall: 0.8
F1-Score: 0.8033801079905394
Predicted Label: Eyezy
Accuracy: 0.8333333333333334
Precision: 0.8401222333333334
Recall: 0.8333333333333334
F1-Score: 0.8367140126769359
Predicted Label: Eyezy
Accuracy: 0.8571428571428571
Precision: 0.8639317571428571
Recall: 0.8571428571428571
F1-Score: 0.8605239174958228
Predicted Label: Eyezy
Accuracy: 0.875
Precision: 0.8817889
Recall: 0.875
F1-Score: 0.8783813325550952
Predicted Label: Eyezy
Accuracy: 0.8888888888888888
Precision: 0.8956777888888888
Recall: 0.88888888888

Recall: 0.9032258064516129
F1-Score: 0.9066075473920916
Predicted Label: Eyezy
Accuracy: 0.904
Precision: 0.9107889
Recall: 0.904
F1-Score: 0.9073817517839128
Predicted Label: mLite
Accuracy: 0.8968253968253969
Precision: 0.9036142968253968
Recall: 0.8968253968253969
F1-Score: 0.9002070474066293
Predicted Label: Eyezy
Accuracy: 0.8976377952755905
Precision: 0.9044266952755905
Recall: 0.8976377952755905
F1-Score: 0.9010194573971734
Predicted Label: kids-360
Accuracy: 0.890625
Precision: 0.8974139
Recall: 0.890625
F1-Score: 0.8940065618119382
Predicted Label: FamiGuardPro
Accuracy: 0.8837209302325582
Precision: 0.8905098302325581
Recall: 0.8837209302325582
F1-Score: 0.8871023917408024
Predicted Label: Eyezy
Accuracy: 0.8846153846153846
Precision: 0.8914042846153846
Recall: 0.8846153846153846
F1-Score: 0.8879968592063819
Predicted Label: Eyezy
Accuracy: 0.8854961832061069
Precision: 0.8922850832061069
Recall: 0.8854961832061069
F1-Score: 0.8888776706543913
Predicted Label: Eyezy
Accuracy:

Predicted Label: Eyezy
Accuracy: 0.8938775510204081
Precision: 0.9006664510204081
Recall: 0.8938775510204081
F1-Score: 0.8972591595511665
Predicted Label: Eyezy
Accuracy: 0.8943089430894309
Precision: 0.9010978430894309
Recall: 0.8943089430894309
F1-Score: 0.8976905577911672
Predicted Label: Eyezy
Accuracy: 0.8947368421052632
Precision: 0.9015257421052632
Recall: 0.8947368421052632
F1-Score: 0.8981184629221541
Predicted Label: Eyezy
Accuracy: 0.8951612903225806
Precision: 0.9019501903225806
Recall: 0.8951612903225806
F1-Score: 0.8985429171995563
Predicted Label: Eyezy
Accuracy: 0.8955823293172691
Precision: 0.9023712293172691
Recall: 0.8955823293172691
F1-Score: 0.8989639622000002
Predicted Label: Eyezy
Accuracy: 0.896
Precision: 0.9027889
Recall: 0.896
F1-Score: 0.899381638834885
Predicted Label: MoniMasterPro
Accuracy: 0.8924302788844621
Precision: 0.8992191788844621
Recall: 0.8924302788844621
F1-Score: 0.8958118666688625
Predicted Label: Eyezy
Accuracy: 0.8928571428571429
Precision:

Accuracy: 0.8818681318681318
Precision: 0.8886570318681318
Recall: 0.8818681318681318
F1-Score: 0.8852495661922877
Predicted Label: FamiGuardPro
Accuracy: 0.8794520547945206
Precision: 0.8862409547945206
Recall: 0.8794520547945206
F1-Score: 0.8828334534988084
Predicted Label: Eyezy
Accuracy: 0.8797814207650273
Precision: 0.8865703207650273
Recall: 0.8797814207650273
F1-Score: 0.8831628243365821
Predicted Label: Eyezy
Accuracy: 0.8801089918256131
Precision: 0.8868978918256131
Recall: 0.8801089918256131
F1-Score: 0.8834904002343106
Predicted Label: Eyezy
Accuracy: 0.8804347826086957
Precision: 0.8872236826086957
Recall: 0.8804347826086957
F1-Score: 0.8838161958246911
Predicted Label: TiSpy
Accuracy: 0.8780487804878049
Precision: 0.8848376804878049
Recall: 0.8780487804878049
F1-Score: 0.881430158414183
Predicted Label: Eyezy
Accuracy: 0.8783783783783784
Precision: 0.8851672783783784
Recall: 0.8783783783783784
F1-Score: 0.8817597611909673
Predicted Label: Eyezy
Accuracy: 0.8787061994609164

F1-Score: 0.8685264119354911
Predicted Label: Eyezy
Accuracy: 0.865424430641822
Precision: 0.8722133306418219
Recall: 0.865424430641822
F1-Score: 0.8688056186247787
Predicted Label: Eyezy
Accuracy: 0.8657024793388429
Precision: 0.8724913793388429
Recall: 0.8657024793388429
F1-Score: 0.8690836715646942
Predicted Label: Eyezy
Accuracy: 0.865979381443299
Precision: 0.872768281443299
Recall: 0.865979381443299
F1-Score: 0.869360577891851
Predicted Label: Eyezy
Accuracy: 0.8662551440329218
Precision: 0.8730440440329218
Recall: 0.8662551440329218
F1-Score: 0.8696363446841248
Predicted Label: Eyezy
Accuracy: 0.86652977412731
Precision: 0.87331867412731
Recall: 0.86652977412731
F1-Score: 0.8699109789612566
Predicted Label: Eyezy
Accuracy: 0.8668032786885246
Precision: 0.8735921786885246
Recall: 0.8668032786885246
F1-Score: 0.8701844876854484
Predicted Label: Eyezy
Accuracy: 0.8670756646216768
Precision: 0.8738645646216768
Recall: 0.8670756646216768
F1-Score: 0.8704568777619514
Predicted Label: 

Accuracy: 0.86
Precision: 0.8667889
Recall: 0.86
F1-Score: 0.8633811046619538
Predicted Label: Eyezy
Accuracy: 0.8602329450915142
Precision: 0.8670218450915141
Recall: 0.8602329450915142
F1-Score: 0.8636140533530886
Predicted Label: Air-Droid
Accuracy: 0.8588039867109635
Precision: 0.8655928867109635
Recall: 0.8588039867109635
F1-Score: 0.8621850728607037
Predicted Label: Eyezy
Accuracy: 0.8590381426202321
Precision: 0.8658270426202321
Recall: 0.8590381426202321
F1-Score: 0.8624192323983422
Predicted Label: Eyezy
Accuracy: 0.859271523178808
Precision: 0.8660604231788079
Recall: 0.859271523178808
F1-Score: 0.8626526165713135
Predicted Label: FamiSafe
Accuracy: 0.8578512396694215
Precision: 0.8646401396694214
Recall: 0.8578512396694215
F1-Score: 0.8612323110354955
Predicted Label: mSpy
Accuracy: 0.8564356435643564
Precision: 0.8632245435643564
Recall: 0.8564356435643564
F1-Score: 0.8598166929042856
Predicted Label: Eyezy
Accuracy: 0.85667215815486
Precision: 0.86346105815486
Recall: 0.85

Accuracy: 0.8539638386648123
Precision: 0.8607527386648123
Recall: 0.8539638386648123
F1-Score: 0.8573448493699983
Predicted Label: Eyezy
Accuracy: 0.8541666666666666
Precision: 0.8609555666666666
Recall: 0.8541666666666666
F1-Score: 0.8575476805504779
Predicted Label: Eyezy
Accuracy: 0.8543689320388349
Precision: 0.8611578320388349
Recall: 0.8543689320388349
F1-Score: 0.857749949090957
Predicted Label: Eyezy
Accuracy: 0.8545706371191135
Precision: 0.8613595371191135
Recall: 0.8545706371191135
F1-Score: 0.8579516573292822
Predicted Label: Eyezy
Accuracy: 0.8547717842323651
Precision: 0.8615606842323651
Recall: 0.8547717842323651
F1-Score: 0.8581528075903664
Predicted Label: Eyezy
Accuracy: 0.8549723756906077
Precision: 0.8617612756906077
Recall: 0.8549723756906077
F1-Score: 0.8583534021862764
Predicted Label: Eyezy
Accuracy: 0.8551724137931035
Precision: 0.8619613137931035
Recall: 0.8551724137931035
F1-Score: 0.858553443416324
Predicted Label: Eyezy
Accuracy: 0.8553719008264463
Precisi

F1-Score: 0.8586441889362411
Predicted Label: mLite
Accuracy: 0.8542413381123058
Precision: 0.8610302381123058
Recall: 0.8542413381123058
F1-Score: 0.8576223531659543
Predicted Label: Eyezy
Accuracy: 0.8544152744630071
Precision: 0.8612041744630071
Recall: 0.8544152744630071
F1-Score: 0.8577962922408325
Predicted Label: Eyezy
Accuracy: 0.8545887961859356
Precision: 0.8613776961859356
Recall: 0.8545887961859356
F1-Score: 0.8579698166803437
Predicted Label: All Tracker Parent Control
Accuracy: 0.8535714285714285
Precision: 0.8603603285714285
Recall: 0.8535714285714285
F1-Score: 0.8569524331226767
Predicted Label: Eyezy
Accuracy: 0.8537455410225921
Precision: 0.8605344410225921
Recall: 0.8537455410225921
F1-Score: 0.8571265483050394
Predicted Label: Eyezy
Accuracy: 0.8539192399049881
Precision: 0.8607081399049881
Recall: 0.8539192399049881
F1-Score: 0.8573002499110417
Predicted Label: Eyezy
Accuracy: 0.8540925266903915
Precision: 0.8608814266903915
Recall: 0.8540925266903915
F1-Score: 0.8