<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/Hybrid%20MNB-LSTM%20for%205%20class%20classifications%20based%20on%20WSN-DS%20dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Hybrid MNB-LSTM for 5 class classifications based on WSN-DS dataset**

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder

In [17]:
# Load the dataset
dataset = pd.read_csv("dataset_WSN-DS.csv")

# Check for missing values
dataset.isnull().sum()

 id                 0
 Time               0
 Is_CH              0
 who CH             0
 Dist_To_CH         0
 ADV_S              0
 ADV_R              0
 JOIN_S             0
 JOIN_R             0
 SCH_S              0
 SCH_R              0
Rank                0
 DATA_S             0
 DATA_R             0
 Data_Sent_To_BS    0
 dist_CH_To_BS      0
 send_code          0
Expaned Energy      0
Class               0
dtype: int64

In [20]:
from keras.utils import np_utils
encoder = LabelEncoder()
y = encoder.fit_transform(dataset['Class'])
y = np_utils.to_categorical(y, num_classes=5)
X = dataset.drop(['Class'],axis=1)
X=X.values
print(X.shape)
print(y.shape)

(374661, 18)
(374661, 5)


In [21]:
# Oversample the minority class using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

In [22]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [25]:
# Multinomial Naive Bayes model
mnb = MultinomialNB()
mnb.fit(X_train, np.argmax(y_train, axis=1))
y_pred_mnb = mnb.predict(X_test)
y_pred_mnb = to_categorical(y_pred_mnb, num_classes=5)

from sklearn.metrics import accuracy_score

# Calculate accuracy for Multinomial Naive Bayes model
accuracy_mnb = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_pred_mnb, axis=1))
print("Multinomial Naive Bayes Accuracy:", accuracy_mnb)

Multinomial Naive Bayes Accuracy: 0.6494709850440796


In [26]:
# LSTM model
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(Dense(5, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train.reshape(X_train.shape[0], X_train.shape[1], 1), y_train, epochs=10)

y_pred_lstm = model.predict(X_test.reshape(X_test.shape[0], X_test.shape[1], 1))
y_pred_lstm = np.argmax(y_pred_lstm, axis=1)
y_pred_lstm = to_categorical(y_pred_lstm, num_classes=5)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [27]:
# Combine models
y_pred = y_pred_mnb * 0.6 + y_pred_lstm * 0.4
y_pred = np.argmax(y_pred, axis=1)

# Evaluation metrics
print(classification_report(np.argmax(y_test, axis=1), y_pred))
print(confusion_matrix(np.argmax(y_test, axis=1), y_pred))

              precision    recall  f1-score   support

           0       0.93      0.81      0.87     67745
           1       0.39      0.45      0.42     68144
           2       0.99      0.35      0.51     68282
           3       0.59      0.82      0.69     68009
           4       0.66      0.82      0.73     67886

    accuracy                           0.65    340066
   macro avg       0.71      0.65      0.64    340066
weighted avg       0.71      0.65      0.64    340066

[[54898  2529    27  5557  4734]
 [    0 30510    16 21481 16137]
 [    0 30778 23713 11427  2364]
 [ 3917  2270   130 56058  5634]
 [    0 11190    26   986 55684]]
