In [None]:
# !pip install tensorflow

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from joblib import dump

In [None]:
df_kdd = pd.read_pickle("./data/KDDCUP/KDDCUP_DF_Preprocessed.pkl")

In [None]:
y = df_kdd[['Attack Type']]
X = df_kdd.drop(columns=['Attack Type'], axis=1)

In [None]:
sc = MinMaxScaler()
X = sc.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
print("X Type: {} Shape: {}".format(type(X), X.shape))
print("y Type: {} Shape: {}".format(type(y), y.shape))

print("X_train Type: {} Shape: {}".format(type(X_train), X_train.shape))
print("y_train Type: {} Shape: {}".format(type(y_train), y_train.shape))

print("X_test Type: {} Shape: {}".format(type(X_test), X_test.shape))
print("y_test Type: {} Shape: {}".format(type(y_test), y_test.shape))

In [None]:
model_gnb = GaussianNB()
start_time = time.time()
model_gnb.fit(X_train, y_train.values.ravel())
end_time = time.time()
print("Training time: ", end_time - start_time)

In [None]:
print("Train score: ", model_gnb.score(X_train, y_train))
print("Test score: ", model_gnb.score(X_test, y_test))

In [None]:
model_rfc = RandomForestClassifier(n_estimators=20)
start_time = time.time()
model_rfc.fit(X_train, y_train.values.ravel())
end_time = time.time()
print("Training time: ", end_time - start_time)

In [None]:
print("Train score: ", model_rfc.score(X_train, y_train))
print("Test score: ", model_rfc.score(X_test, y_test))

In [None]:
def create_sequences(data, seq_length):
    xs = []
    for i in range(len(data) - seq_length + 1):
        x = data[i:(i + seq_length)]
        xs.append(x)
    return np.array(xs)

In [None]:
seq_length = 40  # Specify your sequence length here

X_train_seq = create_sequences(X_train, seq_length)
X_test_seq = create_sequences(X_test, seq_length)

X_train_seq = X_train_seq.reshape((-1, seq_length, X_train.shape[1]))
X_test_seq = X_test_seq.reshape((-1, seq_length, X_test.shape[1]))

y_train_seq = y_train[seq_length - 1:]
y_test_seq = y_test[seq_length - 1:]

In [None]:
model_lstm = Sequential([
    LSTM(80, input_shape=(seq_length, X_train.shape[1])),
    Dense(1, activation='sigmoid')
])

In [None]:
model_lstm.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history = model_lstm.fit(X_train_seq, y_train_seq, epochs=10, batch_size=64, validation_split=0.2)

In [None]:
test_loss, test_acc = model_lstm.evaluate(X_test_seq, y_test_seq)
print(f"Test Accuracy: {test_acc:.4f}%")

In [None]:
print(history.history.keys())

In [None]:
def plot_lstm(history):
    
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(acc) + 1)

    # Plotting training and validation accuracy
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, 'bo-', label='Training accuracy')
    plt.plot(epochs, val_acc, 'gs-', label='Validation accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Plotting training and validation loss
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, 'bo-', label='Training loss')
    plt.plot(epochs, val_loss, 'gs-', label='Validation loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
plot_lstm(history)

In [None]:
model_lstm.save("./models/kdd_model_lstm.keras")

In [None]:
dump(model_rfc, "./models/kdd_model_rfc.joblib")

In [None]:
dump(model_gnb, "./models/kdd_model_gnb.joblib")