<a href="https://colab.research.google.com/github/sankeawthong/Project-1-Lita-Chatbot/blob/main/Hybrid%20RF-Bi-LSTM%20for%205%20class%20classifications%20based%20on%20WSN-DS%20dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Hybrid RF-Bi-LSTM for 5 class classifications based on WSN-DS dataset**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, LSTM
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

In [2]:
# Load the dataset
dataset = pd.read_csv("dataset_WSN-DS.csv")
dataset.shape
dataset.isnull().sum()
dataset.info()
dataset["Class"].unique()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374661 entries, 0 to 374660
Data columns (total 19 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0    id               374661 non-null  int64  
 1    Time             374661 non-null  int64  
 2    Is_CH            374661 non-null  int64  
 3    who CH           374661 non-null  int64  
 4    Dist_To_CH       374661 non-null  float64
 5    ADV_S            374661 non-null  int64  
 6    ADV_R            374661 non-null  int64  
 7    JOIN_S           374661 non-null  int64  
 8    JOIN_R           374661 non-null  int64  
 9    SCH_S            374661 non-null  int64  
 10   SCH_R            374661 non-null  int64  
 11  Rank              374661 non-null  int64  
 12   DATA_S           374661 non-null  int64  
 13   DATA_R           374661 non-null  int64  
 14   Data_Sent_To_BS  374661 non-null  int64  
 15   dist_CH_To_BS    374661 non-null  float64
 16   send_code        37

array([0, 4, 3, 1, 2])

In [3]:
# Data preprocessing
y = dataset['Class'].values
X = dataset.drop(['Class'], axis=1)
X = X.values

In [4]:
# Data balancing using SMOTE
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

In [5]:
# Reshape the data for LSTM
X = X.reshape(X.shape[0], X.shape[1], 1)

In [6]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train.reshape(X_train.shape[0], -1), y_train)  # Reshape X_train to 2D

y_pred_rf = rf_model.predict(X_test.reshape(X_test.shape[0], -1))  # Reshape X_test to 2D

from sklearn.metrics import accuracy_score
# Calculate accuracy for Random Forest model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Random Forest Accuracy:", accuracy_rf)

# Evaluate Random Forest model
rf_report = classification_report(y_test, y_pred_rf)
print("Random Forest Model Classification Report:")
print(rf_report)

Random Forest Accuracy: 0.999091352855034
Random Forest Model Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     67557
           1       1.00      1.00      1.00     68077
           2       1.00      1.00      1.00     68330
           3       1.00      1.00      1.00     68180
           4       1.00      1.00      1.00     67922

    accuracy                           1.00    340066
   macro avg       1.00      1.00      1.00    340066
weighted avg       1.00      1.00      1.00    340066



In [8]:
from tensorflow.keras.utils import to_categorical
# Bi-LSTM model
lstm_model = Sequential()
lstm_model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=(X_train.shape[1], 1)))
lstm_model.add(Dropout(0.2))
lstm_model.add(Bidirectional(LSTM(64)))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(5, activation='softmax'))
lstm_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_model.summary()

# Convert labels to one-hot encoded vectors
num_classes = len(np.unique(y))
y_train_encoded = to_categorical(y_train, num_classes)
y_test_encoded = to_categorical(y_test, num_classes)

lstm_model.fit(X_train, y_train_encoded, epochs=5, batch_size=64, validation_data=(X_test, y_test_encoded), verbose=1)

##y_pred_lstm = lstm_model.predict_classes(X_test)
y_pred_lstm_prob = lstm_model.predict(X_test)
y_pred_lstm = np.argmax(y_pred_lstm_prob, axis=1)

# Calculate accuracy for Bi-LSTM model
accuracy_lstm = accuracy_score(y_test, y_pred_lstm)
print("Bi-LSTM Accuracy:", accuracy_lstm)

# Evaluate Bi-LSTM model
lstm_report = classification_report(y_test, y_pred_lstm)
print("Bi-LSTM Model Classification Report:")
print(lstm_report)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 18, 128)          33792     
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 18, 128)           0         
                                                                 
 bidirectional_1 (Bidirectio  (None, 128)              98816     
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense (Dense)               (None, 5)                 645       
                                                                 
Total params: 133,253
Trainable params: 133,253
Non-trai

In [9]:
# Combine Random Forest and Bi-LSTM predictions
combined_pred = np.concatenate((y_pred_rf.reshape(-1, 1), y_pred_lstm.reshape(-1, 1)), axis=1)
y_pred_combined = [np.bincount(row).argmax() for row in combined_pred]

# Calculate accuracy for the combined model
accuracy_combined = accuracy_score(y_test, y_pred_combined)
print("Combined Model Accuracy:", accuracy_combined)

# Evaluate the combined model
combined_report = classification_report(y_test, y_pred_combined)
print("Combined Model Classification Report:")
print(combined_report)

Combined Model Accuracy: 0.9894108790646522
Combined Model Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98     67557
           1       0.99      1.00      0.99     68077
           2       1.00      0.99      0.99     68330
           3       1.00      0.96      0.98     68180
           4       1.00      1.00      1.00     67922

    accuracy                           0.99    340066
   macro avg       0.99      0.99      0.99    340066
weighted avg       0.99      0.99      0.99    340066

