In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
cd /content/drive/MyDrive/HorusEye

/content/drive/MyDrive/HorusEye


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
from sklearn.metrics import confusion_matrix, roc_auc_score, average_precision_score
from sklearn.metrics import f1_score
from fcntl import F_SETFL
import pickle
# from re import T
import time
import torch
import torch.utils.data as Data
from sklearn import preprocessing
from sklearn.metrics import roc_curve, auc, precision_recall_curve, classification_report
# from thop import clever_format
# from thop import profile
from torch import nn, optim
import random
from queue import Queue
import warnings
warnings.filterwarnings("ignore")
criterion = nn.MSELoss()
scaler = preprocessing.MinMaxScaler()

#Autoencoder Model

In [None]:
class DilatedSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, dilation):
        super(DilatedSeparableConv, self).__init__()
        self.depthwise = nn.Conv1d(in_channels, in_channels, kernel_size=3, padding=dilation, dilation=dilation, groups=in_channels)
        self.pointwise = nn.Conv1d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(Autoencoder, self).__init__()
        torch.manual_seed(42)
        torch.cuda.manual_seed(42)
        self.encoder = nn.Sequential(
            DilatedSeparableConv(1, 16, dilation=1),
            nn.ReLU(True),
            nn.MaxPool1d(2),
            DilatedSeparableConv(16, 32, dilation=2),
            nn.ReLU(True),
            nn.MaxPool1d(2),
            DilatedSeparableConv(32, 64, dilation=4),
            nn.ReLU(True),
            nn.MaxPool1d(2),
            nn.Conv1d(64, latent_dim, kernel_size=1)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(latent_dim, 64, kernel_size=1),
            nn.ReLU(True),
            nn.ConvTranspose1d(64, 32, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose1d(32, 16, kernel_size=5, stride=2, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose1d(16, 1, kernel_size=6, stride=2, padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
# #new autoencoder to try that 4 attacks for better f1
# class DilatedSeparableConv(nn.Module):
#     def __init__(self, in_channels, out_channels, dilation):
#         super(DilatedSeparableConv, self).__init__()
#         self.depthwise = nn.Conv1d(in_channels, in_channels, kernel_size=3, padding=dilation, dilation=dilation, groups=in_channels)
#         self.pointwise = nn.Conv1d(in_channels, out_channels, kernel_size=1)

#     def forward(self, x):
#         x = self.depthwise(x)
#         x = self.pointwise(x)
#         return x

# class Autoencoder(nn.Module):
#     def __init__(self, input_dim, latent_dim):
#         super(Autoencoder, self).__init__()
#         torch.manual_seed(42)
#         torch.cuda.manual_seed(42)
#         # Encoder
#         self.encoder = nn.Sequential(
#             DilatedSeparableConv(1, 16, dilation=1),
#             nn.ReLU(True),
#             DilatedSeparableConv(16, 32, dilation=2),
#             nn.ReLU(True),
#             nn.MaxPool1d(2),
#             DilatedSeparableConv(32, 64, dilation=4),
#             nn.ReLU(True),
#             nn.MaxPool1d(2),
#             DilatedSeparableConv(64, 128, dilation=8),  # Additional layer
#             nn.ReLU(True),
#             nn.MaxPool1d(2),
#             nn.Conv1d(128, latent_dim, kernel_size=1)  # Reducing to latent dimension
#         )

#         # Decoder
#         self.decoder = nn.Sequential(
#             nn.ConvTranspose1d(latent_dim, 128, kernel_size=1),
#             nn.ReLU(True),
#             nn.ConvTranspose1d(128, 64, kernel_size=5, stride=2, padding=1),
#             nn.ReLU(True),
#             nn.ConvTranspose1d(64, 32, kernel_size=5, stride=2, padding=1),
#             nn.ReLU(True),
#             nn.ConvTranspose1d(32, 16, kernel_size=5, stride=1, padding=1),
#             nn.ReLU(True),
#             nn.ConvTranspose1d(16, 1, kernel_size=6, stride=1, padding=1),
#             nn.Sigmoid()  # Assuming input is normalized to [0, 1]
#         )

#     def forward(self, x):
#         x = self.encoder(x)
#         x = self.decoder(x)
#         return x


#Training Autoencoder

In [None]:
def train_autoencoder(df_normal_train, df_normal_eval, df_attack_eval):
    X_train = df_normal_train.values
    X_test = pd.concat([df_normal_eval, df_attack_eval]).values
    actual = torch.cat([torch.zeros(df_normal_eval.shape[0]), torch.ones(df_attack_eval.shape[0])])
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    input_dim = X_train.shape[1]
    encoding_dim = 8
    num_epochs = 1000
    batch_size = 512

    autoencoder = Autoencoder(input_dim, encoding_dim)
    criterion = nn.BCELoss()
    #optimizer = optim.SGD(autoencoder.parameters(), lr=0.1, weight_decay=0.001)
    optimizer = optim.Adam(autoencoder.parameters(), lr=1e-3)
    #optimizer = optim.RMSprop(autoencoder.parameters(), lr=0.001, alpha=0.9)
    #optimizer = optim.Adagrad(autoencoder.parameters(), lr=0.01)

    autoencoder = autoencoder.cuda()

    for epoch in range(num_epochs):
        total_loss = 0
        for i in range(0, len(X_train), batch_size):
            inputs = torch.tensor(X_train[i:i+batch_size], dtype=torch.float).unsqueeze(0)
            inputs = inputs.cuda()
            inputs = inputs.transpose(0, 1)
            outputs = autoencoder(inputs)
            loss = criterion(outputs, inputs)
            total_loss += loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if (epoch+1) % 200 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss.sum():.4f}')

    autoencoder.eval()
    with torch.no_grad():
        input = torch.tensor(X_test, dtype=torch.float).unsqueeze(0).transpose(0, 1).cuda()
        decoded_data = autoencoder(input).cpu().numpy()
    decoded_data_binary = decoded_data.squeeze()
    mse = np.mean(np.power(X_test - decoded_data_binary, 2), axis=1)
    rmse = np.sqrt(mse)
    # print(rmse)
    # print(np.quantile(rmse,(1-0.2)))
    for i in [0.001]: #play around threshold
        thres = i
        print("-------------i",i)
        predicted = pd.Series(np.where(rmse > thres, 1, 0),dtype="float64")
        f1 = f1_score(actual, predicted)
        print(confusion_matrix(actual, predicted))
        print("\n Classification report")
        print(classification_report(actual, predicted))
        print('F1 Score: ', f1)

    return autoencoder

#Isolation Forest from scratch

In [None]:
class Node:
    def __init__(self, data, left=None, right=None, depth=0):
        self.data = data
        self.left = left
        self.right = right
        self.depth = depth
        self.label = None
        self.val = None
        self.col = None

class isolationTree:
    rmse_scores = []
    def __init__(self, data, depth=0, max_depth=0, thres = None):
        self.max_depth = max_depth
        self.thres = thres
        self.root = self.build_tree(data, depth)
        self.model = None
        self.num_rows = 50

    def build_tree(self, data, depth):
        if len(data) == 0:
            return None
        elif len(data) == 1:
            return Node(data, depth=depth)
        else:
            q = Queue()
            root = Node(data, depth=depth)
            q.put(root)

            while not q.empty():
                current_node = q.get()
                if len(current_node.data) > 1:
                  split_column = random.randint(0, len(current_node.data.iloc[0]) - 1)
                  i = 0
                  while True:
                    if min(current_node.data.iloc[:, split_column]) != max(current_node.data.iloc[:, split_column]):
                      break
                    i += 1
                    if i == 9:
                      break
                    # print(f"Number of points: {len(current_node.data)}, {split_column}")
                    split_column = (split_column + 1) % len(current_node.data.iloc[0])
                  if i == 9:
                    continue
                  split_value = random.uniform(min(current_node.data.iloc[:, split_column]), max(current_node.data.iloc[:, split_column]))
                  left_data = current_node.data[current_node.data.iloc[:, split_column] <= split_value]
                  right_data = current_node.data[current_node.data.iloc[:, split_column] > split_value]
                  current_node.val = split_value
                  current_node.col = split_column

                  if len(left_data) > 0:
                      left_node = Node(left_data, depth=current_node.depth + 1)
                      current_node.left = left_node
                      if len(left_data) != 1:
                        q.put(left_node)

                  if len(right_data) > 0:
                      right_node = Node(right_data, depth=current_node.depth + 1)
                      current_node.right = right_node
                      if len(right_data) != 1:
                        q.put(right_node)
            return root

    def label_tree(self, root):

        if root.left is None and root.right is None:
            sampled_df = root.data.sample(n=self.num_rows, replace=True, random_state=42)

            noise = np.random.normal(0, 0.01, sampled_df.shape)
            sampled_df = scaler.transform(sampled_df)
            sampled_df = sampled_df + noise
            sampled_df = torch.tensor(sampled_df, dtype=torch.float32).unsqueeze(0).transpose(0,1)
            pred = self.model(sampled_df)
            rmse = criterion(pred, sampled_df)
            rmse = torch.sqrt(rmse)
            print(rmse.item())
            isolationTree.rmse_scores.append(rmse.item())
            root.label = 1 if rmse > self.thres else 0
            # print(rmse, root.label)
        else:
            if root.left:
              self.label_tree(root.left)
            if root.right:
              self.label_tree(root.right)

    def pred_tree(self, data, node):
      if node is None:
        return 1, 0

      if node.left is None and node.right is None:
        return node.label, 0
      if data[node.col] <= node.val:
        label, path_len = self.pred_tree(data,node.left)
        return label, 1 + path_len
      if data[node.col] > node.val:
        label, path_len = self.pred_tree(data,node.right)
        return label, 1 + path_len


class isolationForest(nn.Module):
    def __init__(self, data, n_trees=100, max_depth=5, subspace=256, model=None):
        super().__init__()
        self.data = data
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.subspace = subspace
        self.model = model
        self.thres = 0.022
        self.avg_path_len = 2 * (np.log(subspace - 1) + 0.5772) - 2 * (subspace - 1) / subspace
        self.trees = []

    def fit(self):
        for i in range(self.n_trees):
            if self.subspace > 1:
                subdata = self.data.sample(self.subspace)
            else:
                subdata = self.data.sample(frac=self.subspace)
            tree = isolationTree(subdata, depth=0, max_depth = self.max_depth, thres = self.thres)
            self.trees.append(tree)

    # def anomaly_score(self, x):
    #     scores = []
    #     for tree in self.trees:
    #         score = self.path_length(tree, x)
    #         scores.append(score)
    #     avg_score = np.mean(scores)
    #     return avg_score

    # def path_length(self, node, x):
    #     if node is None:
    #         return 0
    #     elif np.all(x == node.data):
    #         return 0
    #     else:
    #         left_path_len = self.path_length(node.left, x)
    #         right_path_len = self.path_length(node.right, x)
    #         path_len = 1 + (left_path_len if left_path_len > right_path_len else right_path_len)
    #         return path_len

    def label(self, model):
        for tree in self.trees:
            tree.model = model
        for tree in self.trees:
            tree.label_tree(tree.root)
            print("--------------predicted threshold : ", np.quantile(isolationTree.rmse_scores,(1-0.2)))

    def pred(self, data, alpha):
      eval = []
      for i,row in data.iterrows():
          temp = [tree.pred_tree(row, tree.root) for tree in self.trees]
          res = min(temp)[1], max(temp)[1]
          agg_label = sum(x[0] for x in temp) / self.n_trees
          avg_len = sum(x[1] for x in temp) / self.n_trees

          iso_score = 2 ** (- avg_len / self.avg_path_len)
          print(f"len: {avg_len} score: {iso_score}")
          eval.append(-1 if alpha * agg_label + (1 - alpha) * iso_score > 0.5 else 1)
      return eval


In [None]:
def pred(model, data, alpha):
  eval = []
  for i,row in data.iterrows():
      temp = [tree.pred_tree(row, tree.root) for tree in model.trees]
      res = min(temp)[1], max(temp)[1]
      agg_label = sum(x[0] for x in temp) / model.n_trees
      avg_len = sum(x[1] for x in temp) / model.n_trees

      iso_score = 1 - 2 ** (- avg_len / model.avg_path_len - np.log(1-0.1475) / np.log(2))
      # print(f"len: {avg_len} score: {iso_score}")
      eval.append(-1 if alpha * agg_label + (1 - alpha) * iso_score > 0.5 else 1)
  return eval

#Training our isolation forest model

In [None]:
def isolation_train(x_train,attack_x_train):
    tmp = pd.concat([x_train,attack_x_train])
    tmp.fillna(0,inplace=True)
    tmp.columns = range(len(tmp.columns))
    clf_model = isolationForest(tmp, n_trees=50, max_depth=8, subspace=200)
    clf_model.fit()
    print("Isolation Forest training completed\n")
    return clf_model

#Training sklearn's isolation forest model

In [None]:
def sk_iforest(a):
    sklearn_clf_model = IsolationForest(n_estimators=50, max_samples=200, random_state=114514,
                             contamination=0.2,n_jobs=8)
    tmp = pd.concat([x_train,attack_x_train])
    tmp.fillna(0,inplace=True)
    sklearn_clf_model.fit(tmp)
    y_pred_eval = sklearn_clf_model.predict(x_eval)
    eval_y = y_eval
    eval_x = x_eval

    y_pred_eval[y_pred_eval == 1] = 0
    y_pred_eval[y_pred_eval == -1] = 1
    temp_str = classification_report(y_true=eval_y, y_pred=y_pred_eval)
    temp_list = temp_str.split()
    print("sklearn Isolation Forest")
    print(temp_str)
    print("Confusion Matrix")
    print(confusion_matrix(eval_y, y_pred_eval))
    print("\n F1 Score")
    print(f1_score(eval_y, y_pred_eval))
    print("\n roc-auc")
    print(roc_auc_score(eval_y, y_pred_eval))
    print("\n pr-auc")
    print(average_precision_score(eval_y, y_pred_eval))
    name = 'iForest'+a+'.pkl'
    with open(name, 'wb') as f:
        pickle.dump(sklearn_clf_model, f)

#Executing our training autoencoder function

In [None]:
def autoencode():
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    auto = train_autoencoder(df_norm_train.iloc[:,:12], df_normal_eval.iloc[:,:12], df_attack_eval.iloc[:,:12])
    print("Autoencoder training completed\n")
    # with open('autoencoder.pkl', 'wb') as f:
    #     pickle.dump(auto, f)
    return auto

#Executing our training model function

In [None]:
def our_model(auto,a):
    clf_model = isolation_train(x_train,attack_x_train)
    auto = auto.cpu()
    clf_model.label(auto)
    print("Isolation Forest labeling completed\n")
    x_eval = eval
    x_eval.fillna(0,inplace=True)
    x_eval.columns = range(len(x_eval.columns))
    y_pred_eval = pred(clf_model, x_eval, 1)
    eval_y = y_eval
    eval_x = x_eval

    y_pred_eval = np.array(y_pred_eval)
    y_pred_eval[y_pred_eval == 1] = 0
    y_pred_eval[y_pred_eval == -1] = 1
    print("Our Isolation Forest + Autoencoder")
    print("Confusion Matrix")
    print(confusion_matrix(eval_y, y_pred_eval))
    print("\n Classification report")
    print(classification_report(eval_y, y_pred_eval))
    print("\n F1 Score")
    print(f1_score(eval_y, y_pred_eval))
    print("\n roc-auc")
    print(roc_auc_score(eval_y, y_pred_eval))
    print("\n pr-auc")
    print(average_precision_score(eval_y, y_pred_eval))
    name = 'iGuard'+a+'.pkl'
    with open(name, 'wb') as f:
        pickle.dump(clf_model, f)

# Main Block

In [None]:
device_list=['360_camera']
attack_list=['http_ddos','data_theft','keylogging','service_scan','tcp_ddos','mirai','os_scan','aidra','bashlite','mirai_router_filter','os_scan_router','port_scan_router','tcp_ddos_router','udp_ddos','udp_ddos_router']
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
contamination = 0.2
for a in attack_list:

    normal_path = "/content/drive/MyDrive/HorusEye/DataSets/Dataplane/Normal/" + device_list[0] + ".csv"
    attack_path = "/content/drive/MyDrive/HorusEye/DataSets/Dataplane/Anomaly/" + a + ".csv"
    df_normal_train_data = pd.read_csv(normal_path)
    df_attack_train = pd.read_csv(attack_path)
    df_normal_train_data = df_normal_train_data.drop(columns=['tcp_udp','dst_port','n_packets'])
    df_attack_train = df_attack_train.drop(columns=['tcp_udp','dst_port','n_packets'])
    df_normal_train_data = df_normal_train_data.applymap(int)
    df_attack_train = df_attack_train.applymap(int)
    datafetch = True
    n1 = 200
    n2 = 1000
    while(datafetch):
        if contamination != -1:
            try:
                num = int((n1 / contamination - n1) / 0.2)
                temp = df_normal_train_data.sample(n=num, replace=False, random_state=20)
                df_norm_train = df_normal_train_data.drop(temp.index)

                df_attack_train = df_attack_train.sample(n=n2, replace=False, random_state=20)
                df_normal_train_data = temp
                datafetch = False
            except ValueError as e:
                if "Cannot take a larger sample than population when 'replace=False'" in str(e):
                    n1 = int(n1/2)
                    n2 = int(n2/2)
                else:
                    raise e

    df_normal_train, df_normal_eval = train_test_split(df_normal_train_data, test_size=0.2, random_state=20)
    df_norm_train = pd.concat([df_norm_train, df_normal_train])
    df_attack_train, df_attack_eval = train_test_split(df_attack_train, test_size=0.2, random_state=20)
    x_train, y_train = df_normal_train.drop(columns=['class']), df_normal_train['class']
    attack_x_train, attack_y_train = df_attack_train.drop(columns=['class']), df_attack_train['class']
    df_eval = pd.concat([df_normal_eval, df_attack_eval])
    x_eval, y_eval = df_eval.drop(columns=['class']), df_eval['class']
    eval = x_eval
    print("------------------------------Attack : ",a, "--------------------------------\n")
    sk_iforest(a)
    #use this to train autoencoder also
    # no = int(0.01 * len(df_normal_train))
    # selected_samples = df_attack_train.sample(no, replace=False)
    # df_normal_train = df_normal_train.loc[selected_samples.index] = selected_samples
    auto = autoencode()
    # torch.save(auto.state_dict(), 'autoencoder_model.pth')
    # use this to load pretrained autoencoder
    # with open('autoencoder_final.pkl', 'rb') as f:
    #     auto = pickle.load(f)
    # auto = Autoencoder(12, 8)
    # auto.load_state_dict(torch.load('auto_85.pth'))
    # scaler.fit(df_normal_train.iloc[:,:12])
    our_model(auto,a)

# Optional testing trained autoencoder model

In [None]:
#function just to test the autoencoder
def test_ae(autoencoder):
    X_test = pd.concat([df_normal_eval.iloc[:,:12], df_attack_eval.iloc[:,:12]]).values
    actual = torch.cat([torch.zeros(df_normal_eval.shape[0]), torch.ones(df_attack_eval.shape[0])])

    X_test = scaler.transform(X_test)
    autoencoder.eval()
    with torch.no_grad():
        input = torch.tensor(X_test, dtype=torch.float).unsqueeze(0).transpose(0, 1)
        decoded_data = autoencoder(input).cpu().numpy()
    decoded_data_binary = decoded_data.squeeze()
    mse = np.mean(np.power(X_test - decoded_data_binary, 2), axis=1)
    rmse = np.sqrt(mse)
    # print(rmse)
    for i in [0.35]:
        thres = i
        print("-------------i",i)
        predicted = pd.Series(np.where(rmse > thres, 1, 0),dtype="float64")
        f1 = f1_score(actual, predicted)
        print(confusion_matrix(actual, predicted))
        print("\n Classification report")
        print(classification_report(actual, predicted))
        print('F1 Score: ', f1)

#To generate dataset from .pcap files

In [None]:
!pip install scapy

In [None]:
from scapy.all import *
import os
import numpy as np
import pickle
import csv
from math import sqrt
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

def file_name_list(file_dir):
    file_list = []
    for root, dirs, files in os.walk(file_dir):
        for file in files:
            if os.path.splitext(file)[1] == ".pcap":
                file_list.append("{}/{}".format(root, file))
    return file_list

#features = ["n_packets", "size_total", "size_avg", "size_var", "size_std", "ipd_av", "ipd_min", "ipd_var", "ipd_std", "class"]
features = ["n_packets", "size_total", "size_avg", "size_var", "size_std", "size_min", "size_max", "ipd_av", "ipd_min", "ipd_var", "ipd_std", "ipd_max", "flow_dur", "tcp_udp","dst_port","class"]

if not os.path.exists("/content/drive/MyDrive/HorusEye/DataSets/Dataplane/Normal"):
    os.makedirs("/content/drive/MyDrive/HorusEye/DataSets/Dataplane/Normal")
if not os.path.exists("/content/drive/MyDrive/HorusEye/DataSets/Dataplane/Anomaly"):
    os.makedirs("/content/drive/MyDrive/HorusEye/DataSets/Dataplane/Anomaly")



idle_timeout = 0.01
pkt_thres = 3
# FIN = 0x01
# RST = 0x04

fls_train = 0
fls_test = 0

def extract_features(pkt_list):
    feats = [0, 0, 0, 0, 0, 9999999999999, 0, 0, 9999999999,0,0,0,0,0,0]
    #n_packets
    feats[0] = len(pkt_list)
    #size_total
    feats[1] = sum([len(pkt) for pkt in pkt_list])
    #size_avg
    feats[2] = sum([len(pkt) for pkt in pkt_list])/len(pkt_list)
    #size_var
    feats[3] = sum([(len(pkt) - feats[2]) ** 2 for pkt in pkt_list]) / len(pkt_list)
    #size_std
    feats[4] = sqrt(feats[3])
    #size_min
    feats[5] = min([len(pkt) for pkt in pkt_list])
    #size_max
    feats[6] = max([len(pkt) for pkt in pkt_list])

    #ipd_av
    feats[7] = (pkt_list[-1].time - pkt_list[0].time)/(len(pkt_list)) #IPD_AV
    for i in range(len(pkt_list) - 1):
        #ipd_min
        feats[8] = min(pkt_list[i+1].time - pkt_list[i].time, feats[8]) #IPD MIN
        #ipd_var
        feats[9] = (abs(pkt_list[i+1].time - pkt_list[i].time - feats[7]))**2 + feats[9]
        feats[11] = max(pkt_list[i+1].time - pkt_list[i].time, feats[9])
    feats[9] = feats[7]/(len(pkt_list))
    feats[10] = sqrt(feats[9])
    feats[12] = (pkt_list[-1].time - pkt_list[0].time)
    if TCP in pkt_list[0]:
        feats[-1] = pkt_list[0][TCP].dport
        feats[-2] = 6
    elif UDP in pkt_list[0]:
        feats[-1] = pkt_list[0][UDP].dport
        feats[-2] = 17

    return feats

def extract_flows(path, label,name):
    global fls_test, fls_train, idle_timeout,X,y, pkt_thres
    X = []
    y = []
    c = 0
    lst = file_name_list(path)
    print(lst)
    for f in lst:
        flows = {}
        c += 1
        print(c)
        packets = PcapReader(os.path.join(path, f))
        for packet in packets:
            if IP in packet and (TCP in packet or UDP in packet):
                # Define a tuple that represents the 5-tuple information of the packe
                if TCP in packet:
                    flow_tuple = (packet[IP].src, packet[IP].dst, packet[TCP].sport, packet[TCP].dport, 'TCP')
                else:
                    flow_tuple = (packet[IP].src, packet[IP].dst, packet[UDP].sport, packet[UDP].dport,'UDP')

                if(flow_tuple in flows):
                    # if((len(flows[flow_tuple]) >= pkt_thres) and (packet.time - flows[flow_tuple][-1].time > idle_timeout or  (TCP in packet and (packet['TCP'].flags & FIN or packet['TCP'].flags & RST)))):
                    if((len(flows[flow_tuple]) >= pkt_thres) or (packet.time - flows[flow_tuple][-1].time > idle_timeout or  (TCP in packet and (packet['TCP'].flags or packet['TCP'].flags)))):
                        X.append(extract_features(flows[flow_tuple]))
                        y.append(label)
                        flows[flow_tuple] = [packet]
                    elif(len(flows[flow_tuple]) > 0):
                        flows[flow_tuple].append(packet)
                else:
                    flows[flow_tuple] = [packet]
        for flow_tuple, packets in flows.items():
            X.append(extract_features(flows[flow_tuple]))
            y.append(label)
    for i in range(len(X)):
        X[i].append(y[i])
    file_name = "/content/drive/MyDrive/HorusEye/DataSets/Dataplane/"+name+".csv"
    with open(file_name, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(features)
        writer.writerows(X)

# for i in os.listdir("/content/drive/MyDrive/HorusEye/DataSets/HorusEye_Pcap/Pcap/Normal"):
#       flow = "/content/drive/MyDrive/HorusEye/DataSets/HorusEye_Pcap/Pcap/Normal/" + i
#       name = "Normal/"+i
#       extract_flows(flow,0,name)

# for i in os.listdir("/content/drive/MyDrive/HorusEye/DataSets/HorusEye_Pcap/Pcap/Anomaly"):
#       flow = "/content/drive/MyDrive/HorusEye/DataSets/HorusEye_Pcap/Pcap/Anomaly/" + i
#       name = "Anomaly/"+i
#       extract_flows(flow,1,name)

# for i in os.listdir("/content/drive/MyDrive/HorusEye/DataSets/HorusEye_Pcap/Pcap/robust/low_rate/tcp_ddos_0.01"):
#       flow = "/content/drive/MyDrive/HorusEye/DataSets/HorusEye_Pcap/Pcap/robust/low_rate/tcp_ddos_0.01/" + i
#       name = "robust/"+i
#       print(flow)
#       print(name)
#       extract_flows(flow,1,name)
