## Importing libraries

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import joblib

## Loading the CSV captured and labelling them data then combining them in a single CSV file for training

In [25]:

# Load the data from the CSV files
normal_data = pd.read_csv('Outputs/normal_output.csv')
hulk_data = pd.read_csv('Outputs/hulk_output_2.csv')
slowloris_data = pd.read_csv('Outputs/slowloris_output_2.csv')

# Data labelling 
normal_data['Label'] = 0      
hulk_data['Label'] = 1        
slowloris_data['Label'] = 2    

# Concatenate the dataframes into a single DataFrame then shuffle it
combined_data = pd.concat([normal_data, hulk_data, slowloris_data], ignore_index=True)
combined_data = combined_data.sample(frac=1, random_state=42).reset_index(drop=True)

# Save the combined and labeled DataFrame to a new CSV file
combined_data.to_csv('Outputs/labeled_combined_output.csv', index=False)

## CSV Data viewing

In [6]:
data = pd.read_csv('Outputs/hulk_output_2.csv')

data.head(10000)

FileNotFoundError: [Errno 2] No such file or directory: 'Outputs/hulk_output_2.csv'

## Model training and evaluation

In [27]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

data = pd.read_csv('Outputs/labeled_combined_output.csv')
data = data.drop(['src_ip', 'dst_ip','timestamp'], axis=1)
# separate features and target
X = data.drop('Label', axis=1)
y = data['Label']

# split data into training and testing 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# training the model using random forest
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# model eval
y_pred = clf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[  61    0    1]
 [   0 1877    2]
 [   0    1 1185]]
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        62
           1       1.00      1.00      1.00      1879
           2       1.00      1.00      1.00      1186

    accuracy                           1.00      3127
   macro avg       1.00      0.99      1.00      3127
weighted avg       1.00      1.00      1.00      3127



## Saving the model and scaler using joblib

In [28]:
joblib.dump(clf, 'models/DoS_model_mo.pkl')
joblib.dump(scaler, 'models/DoS_scaler_mo.pkl')
print("Model and scaler saved successfully!")

Model and scaler saved successfully!
