In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error

In [2]:
df = pd.read_csv("ddos_dataset.csv")
df.columns

Index(['Flow ID', 'Timestamp', 'Fwd Pkt Len Mean', 'Fwd Seg Size Avg',
       'Init Fwd Win Byts', 'Init Bwd Win Byts', 'Fwd Seg Size Min', 'Label'],
      dtype='object')

In [6]:
df = df.sort_values("Timestamp")

In [3]:
columns = [
    'Fwd Pkt Len Mean',
    'Fwd Seg Size Avg',
    'Init Fwd Win Byts',
    'Init Bwd Win Byts',
    'Fwd Seg Size Min',
    'Label'
]

In [5]:
dtypes = {
    'Fwd Pkt Len Mean': 'float',
    'Fwd Seg Size Avg': 'float',
    'Init Fwd Win Byts': 'int',
    'Init Bwd Win Byts': 'int',
    'Fwd Seg Size Min': 'int',
    'Label': 'string'
}

In [44]:
df = pd.read_csv("ddos_dataset.csv", usecols=columns, dtype=dtypes)

In [52]:
def ddos_replace(row):
    if row["Label"] == "ddos":
        return 1
    else:
        return 0
    
df["Label"] = df.apply(ddos_replace, axis=1)

In [87]:
X = df.drop(['Label'], axis=1)
y = df['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4242)

In [88]:
rf_model = RandomForestClassifier().fit(X_train, y_train)

# Tahmin

In [89]:
y_pred = rf_model.predict(X_test)

In [90]:
np.sqrt(mean_squared_error(y_test, y_pred))

0.05431390245600108

# Model Tuning

In [None]:
rf_params = {'max_depth': list(range(1, 5)), 'max_features': [1, 2, 3], 'n_estimators': [100, 200, 300]}
rf_cv_model = GridSearchCV(rf_model, rf_params, cv=10, n_jobs=-1).fit(X_train, y_train)
rf_cv_model.best_params_

In [None]:
rf_tuned = RandomForestClassifier(max_depth=9, max_features=20, n_estimators=100).fit(X_train, y_train)
y_pred = rf_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))