<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/Hybrid%20RF-MLP%20for%2010%20class%20classifications.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Hybrid RF-MLP for 10 class classifications**

In [1]:
import pandas as pd
import numpy as np
import collections as c
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE

In [2]:
# Load dataset
dataset = pd.read_csv("dataset_P2.2.csv")

# Check for missing values
dataset.isnull().sum()

id                   0
dur                  0
spkts                0
dpkts                0
sbytes               0
dbytes               0
rate                 0
sttl                 0
dttl                 0
sload                0
dload                0
sloss                0
dloss                0
sinpkt               0
dinpkt               0
sjit                 0
djit                 0
swin                 0
stcpb                0
dtcpb                0
dwin                 0
tcprtt               0
synack               0
ackdat               0
smean                0
dmean                0
trans_depth          0
response_body_len    0
ct_srv_src           0
ct_state_ttl         0
ct_dst_ltm           0
ct_src_dport_ltm     0
ct_dst_sport_ltm     0
ct_dst_src_ltm       0
is_ftp_login         0
ct_ftp_cmd           0
ct_flw_http_mthd     0
ct_src_ltm           0
ct_srv_dst           0
is_sm_ips_ports      0
Class                0
dtype: int64

In [3]:
# Data preprocessing
y = dataset['Class'].values
X = dataset.drop(['Class'], axis=1).values

In [4]:
# Class distribution before balancing
counter = c.Counter(y)
print("Class Distribution (Before Balancing):", counter)

Class Distribution (Before Balancing): Counter({0: 56000, 6: 40000, 4: 33393, 5: 18184, 3: 12264, 7: 10491, 1: 2000, 2: 1746, 8: 1133, 9: 130})


In [5]:
# Apply SMOTE for data balancing
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

# Class distribution after balancing
counter = c.Counter(y)
print("Class Distribution (After Balancing):", counter)

Class Distribution (After Balancing): Counter({0: 56000, 2: 56000, 1: 56000, 5: 56000, 8: 56000, 7: 56000, 4: 56000, 3: 56000, 9: 56000, 6: 56000})


In [6]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [8]:
# Random Forest model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# MLP model
mlp_model = MLPClassifier(hidden_layer_sizes=(25,), max_iter=1000, random_state=101)
mlp_model.fit(X_train, y_train)

# Combine predictions from Random Forest and MLP models
rf_predictions = rf_model.predict(X_test)
mlp_predictions = mlp_model.predict(X_test)
combined_predictions = np.array([rf_predictions[i] if mlp_predictions[i] == rf_predictions[i] else mlp_predictions[i] for i in range(len(mlp_predictions))])

# Evaluate combined model
combined_accuracy = np.mean(combined_predictions == y_test)
print("Combined Model Accuracy:", combined_accuracy)

Combined Model Accuracy: 0.1336875
