In [None]:
from datetime import datetime
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
data = pd.read_csv('/content/drive/MyDrive/DDOS /dataset.csv')
data.iloc[:, 2] = data.iloc[:, 2].str.replace('.', '')
data.iloc[:, 3] = data.iloc[:, 3].str.replace('.', '')
data.iloc[:, 5] = data.iloc[:, 5].str.replace('.', '')
X_flow = data.iloc[:, :-1].values
X_flow = X_flow.astype('float64')
y_flow = data.iloc[:, -1].values
X_flow_train, X_flow_test, y_flow_train, y_flow_test = train_test_split(X_flow, y_flow, test_size=0.25, random_state=0)
classifier = LogisticRegression(random_state=0)
flow_model = classifier.fit(X_flow_train, y_flow_train)
y_flow_pred = flow_model.predict(X_flow_test)
print("------------------------------------------------------------------------------")
print("confusion matrix")
cm = confusion_matrix(y_flow_test, y_flow_pred)
print(cm)
acc = accuracy_score(y_flow_test, y_flow_pred)
print("succes accuracy = {0:.2f} %".format(acc*100))
fail = 1.0 - acc
print("fail accuracy = {0:.2f} %".format(fail*100))
print("------------------------------------------------------------------------------")
# with open('LR_model', 'wb') as file:
#             pickle.dump(classifier, file)

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


------------------------------------------------------------------------------
confusion matrix
[[226596      0]
 [440285      0]]
succes accuracy = 33.98 %
fail accuracy = 66.02 %
------------------------------------------------------------------------------


In [None]:
# Random Forest Without Hyperparameter tuning

from datetime import datetime
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

class DDoSClassifier:
    def __init__(self, data_path):
        self.data_path = data_path
        self.classifier = RandomForestClassifier(n_estimators=10, criterion="entropy", random_state=0)
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.model = None

    def preprocess_data(self):
        data = pd.read_csv(self.data_path)
        data.iloc[:, 2] = data.iloc[:, 2].str.replace('.', '')
        data.iloc[:, 3] = data.iloc[:, 3].str.replace('.', '')
        data.iloc[:, 5] = data.iloc[:, 5].str.replace('.', '')
        X_flow = data.iloc[:, :-1].values
        X_flow = X_flow.astype('float64')
        y_flow = data.iloc[:, -1].values
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X_flow, y_flow, test_size=0.25, random_state=0)

    def train_model(self):
        self.model = self.classifier.fit(self.X_train, self.y_train)

    def evaluate_model(self):
        y_flow_pred = self.model.predict(self.X_test)
        cm = confusion_matrix(self.y_test, y_flow_pred)
        acc = accuracy_score(self.y_test, y_flow_pred)
        print("------------------------------------------------------------------------------")
        print("confusion matrix")
        print(cm)
        print("succes accuracy = {0:.2f} %".format(acc*100))
        fail = 1.0 - acc
        print("fail accuracy = {0:.2f} %".format(fail*100))
        print("------------------------------------------------------------------------------")

    def save_model(self, model_path):
        with open(model_path, 'wb') as file:
            pickle.dump(self.model, file)

if __name__ == "__main__":
    data_path = '/content/drive/MyDrive/DDOS /dataset.csv'
    model_path = 'RandomForest_model.pkl'

    ddos_classifier = DDoSClassifier(data_path)
    ddos_classifier.preprocess_data()
    ddos_classifier.train_model()
    ddos_classifier.evaluate_model()
    ddos_classifier.save_model(model_path)


------------------------------------------------------------------------------
confusion matrix
[[226596      0]
 [     3 440282]]
succes accuracy = 100.00 %
fail accuracy = 0.00 %
------------------------------------------------------------------------------


In [None]:
from datetime import datetime
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
import pickle

class DDoSClassifier:
    def __init__(self, data_path):
        self.data_path = data_path
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.model = None

    def preprocess_data(self):
        data = pd.read_csv(self.data_path)
        data.iloc[:, 2] = data.iloc[:, 2].str.replace('.', '')
        data.iloc[:, 3] = data.iloc[:, 3].str.replace('.', '')
        data.iloc[:, 5] = data.iloc[:, 5].str.replace('.', '')
        X_flow = data.iloc[:, :-1].values
        X_flow = X_flow.astype('float64')
        y_flow = data.iloc[:, -1].values
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X_flow, y_flow, test_size=0.25, random_state=0)

    def train_model(self):
        # Define parameters for grid search
        param_grid = {
            'n_estimators': [10, 15],
            'max_depth': [None, 5],
            'min_samples_split': [2, 5],
            'min_samples_leaf': [1, 2]
        }

        # Initialize Random Forest classifier
        rf = RandomForestClassifier(random_state=0)

        # Perform grid search
        grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
        grid_search.fit(self.X_train, self.y_train)

        # Get best parameters
        best_params = grid_search.best_params_
        print("Best Parameters:", best_params)

        # Train model with best parameters
        self.model = RandomForestClassifier(**best_params, random_state=0)
        self.model.fit(self.X_train, self.y_train)

    def evaluate_model(self):
        y_flow_pred = self.model.predict(self.X_test)
        cm = confusion_matrix(self.y_test, y_flow_pred)
        acc = accuracy_score(self.y_test, y_flow_pred)
        print("------------------------------------------------------------------------------")
        print("confusion matrix")
        print(cm)
        print("succes accuracy = {0:.2f} %".format(acc*100))
        fail = 1.0 - acc
        print("fail accuracy = {0:.2f} %".format(fail*100))
        print("------------------------------------------------------------------------------")

    def save_model(self, model_path):
        with open(model_path, 'wb') as file:
            pickle.dump(self.model, file)

if __name__ == "__main__":
    data_path = '/content/drive/MyDrive/DDOS /dataset.csv'
    model_path = 'RandomForest_model_hyperparameter_tuned.pkl'

    ddos_classifier = DDoSClassifier(data_path)
    ddos_classifier.preprocess_data()
    ddos_classifier.train_model()
    ddos_classifier.evaluate_model()
    ddos_classifier.save_model(model_path)


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 15}
------------------------------------------------------------------------------
confusion matrix
[[226596      0]
 [     2 440283]]
succes accuracy = 100.00 %
fail accuracy = 0.00 %
------------------------------------------------------------------------------
