<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/Hybrid%20LR-Bi-LSTM%20for%2010%20class%20classifications%20based%20on%20UNSW-NB15%20dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Hybrid LR-Bi-LSTM for 10 class classifications based on UNSW-NB15 dataset**

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, Dropout, Flatten

In [2]:
# Load dataset
dataset = pd.read_csv("dataset_P2.2.csv")
dataset = dataset.dropna() # Remove missing values
X = dataset.drop(['Class'], axis=1)
y = dataset['Class']

In [3]:
# Encode labels
encoder = LabelEncoder()
y = encoder.fit_transform(dataset['Class'])

In [4]:
# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(dataset.drop(['Class'], axis=1))

In [5]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### **Hybrid combination of LR-Bi-LSTM for 10 class classifications**

In [6]:
# Train logistic regression model
lr_model = LogisticRegression(multi_class='multinomial', solver='lbfgs')
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
lr_acc = accuracy_score(y_test, lr_pred)
print("Logistic Regression Accuracy:", lr_acc)
print(classification_report(y_test, lr_pred))

Logistic Regression Accuracy: 0.7736177250563175
              precision    recall  f1-score   support

           0       0.92      0.90      0.91     11169
           1       0.60      0.08      0.15       393
           2       0.00      0.00      0.00       360
           3       0.36      0.12      0.17      2370
           4       0.59      0.82      0.69      6772
           5       0.59      0.56      0.57      3570
           6       0.97      0.97      0.97      8079
           7       0.55      0.63      0.59      2098
           8       1.00      0.01      0.02       233
           9       0.00      0.00      0.00        25

    accuracy                           0.77     35069
   macro avg       0.56      0.41      0.41     35069
weighted avg       0.76      0.77      0.75     35069



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
# Train Bi-LSTM model
num_classes = len(np.unique(y))
input_dim = X_train.shape[1]
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
lstm_model = Sequential()
lstm_model.add(Bidirectional(LSTM(64, input_shape=(1, input_dim), activation='relu', return_sequences=True)))
lstm_model.add(Dropout(0.2))
lstm_model.add(LSTM(32, activation='relu'))
lstm_model.add(Dense(10, activation='softmax'))
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
y_train_onehot = pd.get_dummies(y_train).values
lstm_model.fit(X_train, y_train_onehot, epochs=15, batch_size=32, verbose=0)
lstm_pred = lstm_model.predict(X_test)
lstm_pred = np.argmax(lstm_pred, axis=1)
lstm_acc = accuracy_score(y_test, lstm_pred)
print("Bi-LSTM Accuracy:", lstm_acc)
print(classification_report(y_test, lstm_pred))

Bi-LSTM Accuracy: 0.8362086172973281
              precision    recall  f1-score   support

           0       0.98      0.95      0.96     11169
           1       0.61      0.21      0.31       393
           2       0.43      0.04      0.08       360
           3       0.37      0.09      0.14      2370
           4       0.62      0.91      0.73      6772
           5       0.78      0.78      0.78      3570
           6       1.00      0.98      0.99      8079
           7       0.80      0.74      0.77      2098
           8       0.62      0.30      0.40       233
           9       0.86      0.24      0.38        25

    accuracy                           0.84     35069
   macro avg       0.71      0.52      0.55     35069
weighted avg       0.83      0.84      0.82     35069



In [8]:
# Combine predictions
lr_probs = lr_model.predict_proba(np.reshape(X_test, (X_test.shape[0], -1)))
lstm_probs = lstm_model.predict(X_test)
combined_probs = (lr_probs + lstm_probs) / 2
combined_pred = np.argmax(combined_probs, axis=1)
combined_acc = accuracy_score(y_test, combined_pred)
print("Combined Accuracy:", combined_acc)
print(classification_report(y_test, combined_pred))

Combined Accuracy: 0.8293934814223388
              precision    recall  f1-score   support

           0       0.98      0.93      0.96     11169
           1       0.67      0.16      0.26       393
           2       0.50      0.03      0.05       360
           3       0.35      0.10      0.16      2370
           4       0.62      0.89      0.73      6772
           5       0.74      0.80      0.77      3570
           6       0.99      0.98      0.98      8079
           7       0.76      0.73      0.75      2098
           8       0.96      0.10      0.18       233
           9       1.00      0.08      0.15        25

    accuracy                           0.83     35069
   macro avg       0.76      0.48      0.50     35069
weighted avg       0.82      0.83      0.81     35069

