In [1]:
# Importing Necessary Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
from numpy.random import randn, randint
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input
from tensorflow.keras.models import Sequential, Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam

In [2]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Reading the Trojan Dataset
df = pd.read_csv('/content/drive/MyDrive/DataSets/Trojan_Detection.csv')

In [7]:
df.head()

Unnamed: 0.1,Unnamed: 0,Flow ID,Source IP,Source Port,Destination IP,Destination Port,Protocol,Timestamp,Flow Duration,Total Fwd Packets,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,73217,10.42.0.42-121.14.255.84-49975-80-6,10.42.0.42,49975,121.14.255.84,80,6,17/07/2017 01:18:33,10743584,4,...,20,0.0,0.0,0,0,0.0,0.0,0,0,Trojan
1,72089,172.217.6.226-10.42.0.42-443-49169-17,10.42.0.42,49169,172.217.6.226,443,17,17/07/2017 10:25:25,254217,6,...,20,0.0,0.0,0,0,0.0,0.0,0,0,Trojan
2,96676,10.42.0.1-10.42.0.42-53-37749-17,10.42.0.42,37749,10.42.0.1,53,17,30/06/2017 07:16:12,1023244,1,...,32,0.0,0.0,0,0,0.0,0.0,0,0,Benign
3,42891,10.42.0.1-10.42.0.42-53-41352-17,10.42.0.42,41352,10.42.0.1,53,17,13/07/2017 03:48:44,286483,1,...,20,0.0,0.0,0,0,0.0,0.0,0,0,Trojan
4,169326,10.42.0.151-107.22.241.77-44353-443-6,10.42.0.151,44353,107.22.241.77,443,6,5/7/2017 10:47,65633087,12,...,32,322594.0,0.0,322594,322594,60306983.0,0.0,60306983,60306983,Benign


In [5]:
# To print all the classes in Label Column
label = df[' Label']

for label in label.unique():
    print(label)

Trojan
Benign


In [None]:
df = df.dropna()
df.drop(["Unnamed: 0"], axis = 1).values

In [8]:
# Taking only Trojan Class Dataframes
# Removing Benign Class Dataframes
df = df[df[" Label"] != 'Benign']

In [9]:
# To print all the classes in Label Column
label = df[' Label']

for label in label.unique():
    print(label)

Trojan


In [10]:
df.columns

Index(['Unnamed: 0', 'Flow ID', ' Source IP', ' Source Port',
       ' Destination IP', ' Destination Port', ' Protocol', ' Timestamp',
       ' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets',
       'Total Length of Fwd Packets', ' Total Length of Bwd Packets',
       ' Fwd Packet Length Max', ' Fwd Packet Length Min',
       ' Fwd Packet Length Mean', ' Fwd Packet Length Std',
       'Bwd Packet Length Max', ' Bwd Packet Length Min',
       ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s',
       ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max',
       ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std',
       ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean',
       ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags',
       ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags',
       ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s',
       ' Bwd Packets/s', ' Min Packet Len

In [11]:
# Modifying the values so that it can fit with the normal data
number = preprocessing.LabelEncoder()

df["Flow ID"] = number.fit_transform(df["Flow ID"])
df[" Source IP"] = number.fit_transform(df[" Source IP"])
df[" Destination IP"] = number.fit_transform(df[" Destination IP"])
df[" Timestamp"] = number.fit_transform(df[" Timestamp"])

df.head()

Unnamed: 0.1,Unnamed: 0,Flow ID,Source IP,Source Port,Destination IP,Destination Port,Protocol,Timestamp,Flow Duration,Total Fwd Packets,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,73217,27129,7,49975,252,80,6,7173,10743584,4,...,20,0.0,0.0,0,0,0.0,0.0,0,0,Trojan
1,72089,38590,7,49169,635,443,17,10145,254217,6,...,20,0.0,0.0,0,0,0.0,0.0,0,0,Trojan
3,42891,4342,7,41352,4,53,17,789,286483,1,...,20,0.0,0.0,0,0,0.0,0.0,0,0,Trojan
5,34510,16212,6,6021,4,53,17,410,251336,1,...,20,0.0,0.0,0,0,0.0,0.0,0,0,Trojan
6,59506,30810,7,38871,2103,443,6,2025,3096,3,...,20,0.0,0.0,0,0,0.0,0.0,0,0,Trojan


In [12]:
# Keeping Label column in a different variable
X = df.drop([" Label"], axis = 1).values
y = df[" Label"].values

In [13]:
# Standardize Features by removing the Mean and Scaling to Unit Variance
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

In [14]:
# Data Spliting
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=0, test_size=0.2)

In [15]:
# Defining Dimensions and Hyperparameters
latent_dim = 128
n_epochs = 15
n_batch = 128

data_size = X_train.shape[0]
data_dim = X_train.shape[1]

In [16]:
# Defining the Generator Model
def build_generator(latent_dim, output_dim):
    model = Sequential()

    model.add(Dense(128, input_dim=latent_dim))
    model.add(BatchNormalization())  # Adding BatchNormalization layer
    model.add(LeakyReLU(alpha=0.2))  # Adding LeakyReLU activation

    model.add(Dense(256))
    model.add(BatchNormalization())  # Adding BatchNormalization layer
    model.add(LeakyReLU(alpha=0.2))  # Adding LeakyReLU activation

    model.add(Dense(output_dim, activation='linear'))  # Output layer
    return model

# Defining the Discriminator Model
def build_discriminator(input_dim):
    model = Sequential()

    model.add(Dense(128, input_dim=input_dim))
    model.add(BatchNormalization())  # Adding BatchNormalization layer
    model.add(LeakyReLU(alpha=0.2))  # Adding LeakyReLU activation

    model.add(Dense(256))
    model.add(BatchNormalization())  # Adding BatchNormalization layer
    model.add(LeakyReLU(alpha=0.2))  # Adding LeakyReLU activation

    model.add(Dense(128))  # Adding another hidden layer with 128 units
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))

    model.add(Dense(1, activation='sigmoid'))  # Output layer with 1 unit (binary classification)

    optimizer = Adam(learning_rate=0.0002, beta_1=0.5)

    model.compile(loss='binary_crossentropy', optimizer=optimizer)
    return model

In [17]:
# Creating the Generator, Discriminator and GAN Models
generator = build_generator(latent_dim, data_dim)
discriminator = build_discriminator(data_dim)

gan = Sequential([generator, discriminator])

# Freezing Discriminator Weights during GAN Training
discriminator.trainable = False

gan.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))

In [18]:
# Training loop for GAN
batches_per_epoch = int(data_size / n_batch)
n_steps = batches_per_epoch * n_epochs

In [20]:
for i in range(n_steps):

    # Training Discriminator
    ix = np.random.randint(0, data_size, n_batch)
    ix = np.clip(ix, 0, len(X_train) - 1)

    real_batch = X_train[ix]
    real_labels = np.ones((n_batch, 1))

    noise = randn(latent_dim * n_batch).reshape(n_batch, latent_dim)
    generated_data = generator.predict(noise)
    generated_labels = np.zeros((n_batch, 1))

    d_loss_real = discriminator.train_on_batch(real_batch, real_labels)
    d_loss_fake = discriminator.train_on_batch(generated_data, generated_labels)

    # Training Generator
    noise = randn(latent_dim * n_batch).reshape(n_batch, latent_dim)
    generated_labels = np.ones((n_batch, 1))

    g_loss = gan.train_on_batch(noise, generated_labels)

    # Printing Progress
    if (i + 1) % batches_per_epoch == 0:
        print(f"Step {i + 1}/{n_steps} | D Loss Real: {d_loss_real:.4f} | D Loss Fake: {d_loss_fake:.4f} | G Loss: {g_loss:.4f}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step 3962/8490 | D Loss Real: 0.0064 | D Loss Fake: 0.0043 | G Loss: 0.0000
Step 4528/8490 | D Loss Real: 0.0018 | D Loss Fake: 0.0021 | G Loss: 0.0000
Step 5094/8490 | D Loss Real: 0.0056 | D Loss Fake: 0.0023 | G Loss: 0.0014
Step 5660/8490 | D Loss Real: 0.0009 | D Loss Fake: 0.0013 | G Loss: 0.0000
Step 6226/8490 | D Loss Real: 0.0008 | D Loss Fake: 0.0005 | G Loss: 0.0000
Step 6792/8490 | D Loss Real: 0.0003 | D Loss Fake: 0.0003 | G Loss: 0.0000
Step 7358/8490 | D Loss Real: 0.0004 | D Loss Fake: 0.0002 | G Loss: 0.0000
Step 7924/8490 | D Loss Real: 0.0003 | D Loss Fake: 0.0012 | G Loss: 0.0000
Step 8490/8490 | D Loss Real: 0.0003 | D Loss Fake: 0.0006 | G Loss: 0.0000
