In [27]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/happydb-cleaned/cleaned_hm.csv


In [28]:
import os
import nltk
import zipfile

wordnet_path = "/usr/share/nltk_data/corpora/wordnet.zip"
wordnet_dir = "/usr/share/nltk_data/corpora/wordnet"

# Unzipping the WordNet data to the target directory
if not os.path.exists(wordnet_dir):
    with zipfile.ZipFile(wordnet_path, 'r') as z:
        z.extractall("/usr/share/nltk_data/corpora/")

# Setting the WordNet path
nltk.data.path.append("/usr/share/nltk_data/")

In [29]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import pandas as pd
import re

# Download required NLTK resources
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('omw-1.4')
nltk.download('wordnet')

# Initialize tools
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# Function for cleaning text
def preprocess_text(text):
    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)
    # Lowercase text
    text = text.lower()
    # Remove non-alphabetic characters
    text = re.sub(r'[^a-z\s]', '', text)
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stop words and lemmatize
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    # Join tokens back to a single string
    return ' '.join(tokens)

[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /usr/share/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [30]:
data = pd.read_csv("/kaggle/input/happydb-cleaned/cleaned_hm.csv")

In [31]:
data['cleaned_hm'] = data['cleaned_hm'].apply(preprocess_text)

In [32]:
mode_value = data['ground_truth_category'].mode()[0]
data['ground_truth_category'].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['ground_truth_category'].fillna(mode_value, inplace=True)


In [33]:
from sklearn.preprocessing import LabelEncoder

reflection_encoder = LabelEncoder()
data['reflection_period_encoded'] = reflection_encoder.fit_transform(data['reflection_period'])

category_encoder = LabelEncoder()
data['ground_truth_category_encoded'] = category_encoder.fit_transform(data['ground_truth_category'])
data['predicted_category_encoded'] = category_encoder.fit_transform(data['predicted_category'])

encoded_columns = {
    "reflection_period_encoded": data['reflection_period_encoded'].unique(),
    "ground_truth_category_encoded": data['ground_truth_category_encoded'].unique(),
    "predicted_category_encoded": data['predicted_category_encoded'].unique(),
}
encoded_columns

{'reflection_period_encoded': array([0, 1]),
 'ground_truth_category_encoded': array([1, 2, 5, 3, 0, 6, 4]),
 'predicted_category_encoded': array([1, 4, 2, 5, 0, 3, 6])}

In [34]:
from sklearn.model_selection import train_test_split
X = data['cleaned_hm']
y = data['predicted_category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer 
tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1, 2), min_df=2, max_df=0.90, sublinear_tf=True)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)
X_train_tfidf.shape, X_test_tfidf.shape

((75401, 5000), (25134, 5000))

In [36]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [37]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [38]:
max_sequence_length = 100  # Maximum length of sequences
X_train_padded = pad_sequences(X_train_seq, maxlen=max_sequence_length, padding='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_sequence_length, padding='post')

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Embedding

model_lstm = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_sequence_length),
    LSTM(128, return_sequences=True),
    LSTM(64),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
model_lstm.build(input_shape=(None, max_sequence_length))
model_lstm.summary()



In [14]:
model_lstm.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [40]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [16]:
history = model_lstm.fit(X_train_padded, y_train_encoded,epochs=25,batch_size=64,validation_data=(X_test_padded, y_test_encoded),verbose=1,callbacks=early_stopping)

Epoch 1/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 12ms/step - accuracy: 0.3383 - loss: 1.5480 - val_accuracy: 0.3381 - val_loss: 1.5415
Epoch 2/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.3363 - loss: 1.5387 - val_accuracy: 0.3385 - val_loss: 1.5371
Epoch 3/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.3762 - loss: 1.4691 - val_accuracy: 0.7724 - val_loss: 0.6593
Epoch 4/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.8084 - loss: 0.5571 - val_accuracy: 0.8630 - val_loss: 0.4295
Epoch 5/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.8909 - loss: 0.3391 - val_accuracy: 0.8827 - val_loss: 0.3469
Epoch 6/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.9201 - loss: 0.2427 - val_accuracy: 0.8879 - val_loss: 0.3282
Epoc

In [17]:
import numpy as np
y_pred_lstm = model_lstm.predict(X_test_padded)
y_pred_classes_lstm = np.argmax(y_pred_lstm, axis=1)

[1m786/786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step


In [18]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Print Classification Report
print("\nClassification Report for Vanilla LSTM:")
print(classification_report(y_test_encoded, y_pred_classes_lstm, zero_division=1))

# Print Accuracy
print("\nAccuracy for Vanilla LSTM:")
print(accuracy_score(y_test_encoded, y_pred_classes_lstm))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_classes_lstm, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded, y_pred_classes_lstm, average='weighted',zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded, y_pred_classes_lstm, average='weighted'))


Classification Report for Vanilla LSTM:
              precision    recall  f1-score   support

           0       0.88      0.91      0.89      8498
           1       0.94      0.94      0.94      8542
           2       0.91      0.93      0.92      2682
           3       0.78      0.75      0.77      2786
           4       0.81      0.77      0.79       300
           5       0.87      0.75      0.81      1865
           6       0.71      0.72      0.71       461

    accuracy                           0.89     25134
   macro avg       0.84      0.82      0.83     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for Vanilla LSTM:
0.8878809580647729

F1 Score:
0.8871100756493778

Precision Score:
0.8874037448455901

Recall Score:
0.8878809580647729


In [101]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout

# Define the CNN model
model_cnn = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_sequence_length),
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(128, activation='relu'),
    Dense(7, activation='softmax')  # 7 classes
])
model_cnn.build(input_shape=(None, max_sequence_length))
model_cnn.summary()



In [102]:
model_cnn.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [103]:
history = model_cnn.fit(
    X_train_padded,
    y_train_encoded,
    epochs=25,
    batch_size=64,
    validation_data=(X_test_padded, y_test_encoded),
    verbose=1,
    callbacks=[early_stopping]
)

Epoch 1/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.7355 - loss: 0.7462 - val_accuracy: 0.8834 - val_loss: 0.3189
Epoch 2/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9189 - loss: 0.2319 - val_accuracy: 0.8848 - val_loss: 0.3179
Epoch 3/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9503 - loss: 0.1453 - val_accuracy: 0.8866 - val_loss: 0.3464
Epoch 4/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9708 - loss: 0.0896 - val_accuracy: 0.8850 - val_loss: 0.4044
Epoch 5/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9816 - loss: 0.0558 - val_accuracy: 0.8815 - val_loss: 0.4735


In [104]:
y_pred_cnn = model_cnn.predict(X_test_padded)
y_pred_classes_cnn = np.argmax(y_pred_cnn, axis=1)

[1m786/786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step


In [105]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Print Classification Report
print("\nClassification Report for CNN:")
print(classification_report(y_test_encoded,y_pred_classes_cnn, zero_division=1))

# Print Accuracy
print("\nAccuracy for CNN:")
print(accuracy_score(y_test_encoded,y_pred_classes_cnn))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_classes_cnn, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_classes_cnn, average='weighted',zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_classes_cnn, average='weighted'))


Classification Report for CNN:
              precision    recall  f1-score   support

           0       0.91      0.86      0.89      8498
           1       0.93      0.94      0.94      8542
           2       0.92      0.93      0.93      2682
           3       0.73      0.79      0.76      2786
           4       0.80      0.84      0.82       300
           5       0.77      0.81      0.79      1865
           6       0.76      0.75      0.75       461

    accuracy                           0.88     25134
   macro avg       0.83      0.85      0.84     25134
weighted avg       0.89      0.88      0.88     25134


Accuracy for CNN:
0.8834248428423649

F1 Score:
0.884188813893119

Precision Score:
0.8858934682399872

Recall Score:
0.8834248428423649


In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout, Embedding

model_rnn = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_sequence_length),
    SimpleRNN(128, return_sequences=True),
    SimpleRNN(64),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
model_rnn.build(input_shape=(None, max_sequence_length))
model_rnn.summary()



In [20]:
from tensorflow.keras.optimizers import Adam  
optimizer = Adam(learning_rate=1e-5)
model_rnn.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [21]:
history = model_rnn.fit(X_train_padded, y_train_encoded,epochs=25,batch_size=64,validation_data=(X_test_padded, y_test_encoded),verbose=1,callbacks=early_stopping)

Epoch 1/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 23ms/step - accuracy: 0.3293 - loss: 1.6436 - val_accuracy: 0.3398 - val_loss: 1.5401
Epoch 2/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 20ms/step - accuracy: 0.3413 - loss: 1.5370 - val_accuracy: 0.3398 - val_loss: 1.5356
Epoch 3/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 20ms/step - accuracy: 0.3408 - loss: 1.5344 - val_accuracy: 0.3398 - val_loss: 1.5353
Epoch 4/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 20ms/step - accuracy: 0.3414 - loss: 1.5385 - val_accuracy: 0.3398 - val_loss: 1.5353
Epoch 5/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 20ms/step - accuracy: 0.3418 - loss: 1.5353 - val_accuracy: 0.3391 - val_loss: 1.5352


In [22]:
import numpy as np
y_pred_rnn = model_rnn.predict(X_test_padded)
y_pred_classes_rnn = np.argmax(y_pred_rnn, axis=1)

[1m786/786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step


In [23]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Print Classification Report
print("\nClassification Report for RNN:")
print(classification_report(y_test_encoded, y_pred_classes_rnn, zero_division=1))

# Print Accuracy
print("\nAccuracy for RNN:")
print(accuracy_score(y_test_encoded, y_pred_classes_rnn))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_classes_rnn, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded, y_pred_classes_rnn, average='weighted',zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded, y_pred_classes_rnn, average='weighted'))


Classification Report for RNN:
              precision    recall  f1-score   support

           0       1.00      0.00      0.00      8498
           1       0.34      1.00      0.51      8542
           2       1.00      0.00      0.00      2682
           3       1.00      0.00      0.00      2786
           4       1.00      0.00      0.00       300
           5       0.00      0.00      0.00      1865
           6       1.00      0.00      0.00       461

    accuracy                           0.34     25134
   macro avg       0.76      0.14      0.07     25134
weighted avg       0.70      0.34      0.17     25134


Accuracy for RNN:
0.3398185724516591

F1 Score:
0.17240698754979195

Precision Score:
0.7014433340411069

Recall Score:
0.3398185724516591


In [24]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout, Embedding

model_gru = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_sequence_length),
    GRU(128, return_sequences=True),
    GRU(64),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
model_gru.build(input_shape=(None, max_sequence_length))
model_gru.summary()



In [25]:
model_gru.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [26]:
history = model_gru.fit(X_train_padded, y_train_encoded,epochs=25,batch_size=64,validation_data=(X_test_padded, y_test_encoded),verbose=1,callbacks=early_stopping)

Epoch 1/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 11ms/step - accuracy: 0.3426 - loss: 1.5484 - val_accuracy: 0.6683 - val_loss: 0.9356
Epoch 2/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - accuracy: 0.7705 - loss: 0.6748 - val_accuracy: 0.8738 - val_loss: 0.3788
Epoch 3/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - accuracy: 0.8933 - loss: 0.3173 - val_accuracy: 0.8851 - val_loss: 0.3291
Epoch 4/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - accuracy: 0.9217 - loss: 0.2301 - val_accuracy: 0.8911 - val_loss: 0.3160
Epoch 5/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - accuracy: 0.9376 - loss: 0.1822 - val_accuracy: 0.8874 - val_loss: 0.3375
Epoch 6/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - accuracy: 0.9485 - loss: 0.1515 - val_accuracy: 0.8827 - val_loss: 0.3573
Epoc

In [27]:
import numpy as np
y_pred_gru = model_gru.predict(X_test_padded)
y_pred_classes_gru = np.argmax(y_pred_gru,axis=1)

[1m786/786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step


In [28]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Print Classification Report
print("\nClassification Report for GRU:")
print(classification_report(y_test_encoded, y_pred_classes_gru, zero_division=1))

# Print Accuracy
print("\nAccuracy for GRU:")
print(accuracy_score(y_test_encoded, y_pred_classes_gru))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_classes_gru, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded, y_pred_classes_gru, average='weighted',zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded, y_pred_classes_gru, average='weighted'))


Classification Report for GRU:
              precision    recall  f1-score   support

           0       0.89      0.90      0.90      8498
           1       0.94      0.94      0.94      8542
           2       0.93      0.92      0.93      2682
           3       0.81      0.72      0.76      2786
           4       0.76      0.86      0.81       300
           5       0.79      0.83      0.81      1865
           6       0.72      0.79      0.75       461

    accuracy                           0.89     25134
   macro avg       0.83      0.85      0.84     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for GRU:
0.8911434709954643

F1 Score:
0.8907689977908473

Precision Score:
0.8911388426754073

Recall Score:
0.8911434709954643


In [29]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,LSTM,Bidirectional

model_lstm_bi=Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_sequence_length),
    Bidirectional(LSTM(128, return_sequences=True)),
    Bidirectional(LSTM(64)),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
model_lstm_bi.build(input_shape=(None, max_sequence_length))
model_lstm_bi.summary()



In [30]:
model_lstm_bi.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [31]:
history=model_lstm_bi.fit(X_train_padded, y_train_encoded,epochs=25,batch_size=64,validation_data=(X_test_padded, y_test_encoded),verbose=1,callbacks=early_stopping)

Epoch 1/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 25ms/step - accuracy: 0.7202 - loss: 0.7787 - val_accuracy: 0.8743 - val_loss: 0.3585
Epoch 2/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 24ms/step - accuracy: 0.9034 - loss: 0.2782 - val_accuracy: 0.8910 - val_loss: 0.2973
Epoch 3/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 24ms/step - accuracy: 0.9226 - loss: 0.2189 - val_accuracy: 0.8940 - val_loss: 0.3064
Epoch 4/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 24ms/step - accuracy: 0.9356 - loss: 0.1792 - val_accuracy: 0.8917 - val_loss: 0.3319
Epoch 5/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 24ms/step - accuracy: 0.9482 - loss: 0.1463 - val_accuracy: 0.8927 - val_loss: 0.3261
Epoch 6/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 24ms/step - accuracy: 0.9541 - loss: 0.1347 - val_accuracy: 0.8883 - val_loss: 0.3707
Epoc

In [32]:
import numpy as np
y_pred_lstm_bi = model_lstm_bi.predict(X_test_padded)
y_pred_classes_lstm_bi = np.argmax(y_pred_lstm_bi,axis=1)

[1m786/786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step


In [33]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Print Classification Report
print("\nClassification Report for Bi-LSTM:")
print(classification_report(y_test_encoded,y_pred_classes_lstm_bi, zero_division=1))

# Print Accuracy
print("\nAccuracy for Bi-LSTM:")
print(accuracy_score(y_test_encoded,y_pred_classes_lstm_bi))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_classes_lstm_bi, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_classes_lstm_bi, average='weighted',zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_classes_lstm_bi, average='weighted'))


Classification Report for Bi-LSTM:
              precision    recall  f1-score   support

           0       0.88      0.92      0.90      8498
           1       0.94      0.93      0.94      8542
           2       0.92      0.94      0.93      2682
           3       0.78      0.76      0.77      2786
           4       0.86      0.79      0.82       300
           5       0.87      0.74      0.80      1865
           6       0.71      0.81      0.76       461

    accuracy                           0.89     25134
   macro avg       0.85      0.84      0.85     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for Bi-LSTM:
0.8910241107662926

F1 Score:
0.8904489430187092

Precision Score:
0.8910424463484742

Recall Score:
0.8910241107662926


In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,SimpleRNN,Bidirectional

model_rnn_bi=Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_sequence_length),
    Bidirectional(SimpleRNN(128, return_sequences=True)),
    Bidirectional(SimpleRNN(64)),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
model_rnn_bi.build(input_shape=(None, max_sequence_length))
model_rnn_bi.summary()



In [35]:
model_rnn_bi.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [36]:
history=model_rnn_bi.fit(X_train_padded, y_train_encoded,epochs=25,batch_size=64,validation_data=(X_test_padded, y_test_encoded),verbose=1,callbacks=early_stopping)

Epoch 1/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 44ms/step - accuracy: 0.6891 - loss: 0.8723 - val_accuracy: 0.8520 - val_loss: 0.4395
Epoch 2/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 41ms/step - accuracy: 0.8739 - loss: 0.3760 - val_accuracy: 0.8175 - val_loss: 0.5434
Epoch 3/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 41ms/step - accuracy: 0.8843 - loss: 0.3417 - val_accuracy: 0.8732 - val_loss: 0.3767
Epoch 4/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 41ms/step - accuracy: 0.9096 - loss: 0.2671 - val_accuracy: 0.8678 - val_loss: 0.4059
Epoch 5/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 41ms/step - accuracy: 0.9125 - loss: 0.2570 - val_accuracy: 0.8795 - val_loss: 0.3605


In [37]:
import numpy as np
y_pred_rnn_bi = model_rnn_bi.predict(X_test_padded)
y_pred_classes_rnn_bi = np.argmax(y_pred_rnn_bi,axis=1)

[1m786/786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step


In [38]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Print Classification Report
print("\nClassification Report for Bi-RNN:")
print(classification_report(y_test_encoded,y_pred_classes_rnn_bi, zero_division=1))

# Print Accuracy
print("\nAccuracy for Bi-RNN:")
print(accuracy_score(y_test_encoded,y_pred_classes_rnn_bi))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_classes_rnn_bi, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_classes_rnn_bi, average='weighted',zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_classes_rnn_bi, average='weighted'))


Classification Report for Bi-RNN:
              precision    recall  f1-score   support

           0       0.94      0.79      0.86      8498
           1       0.92      0.94      0.93      8542
           2       0.94      0.88      0.90      2682
           3       0.62      0.81      0.70      2786
           4       0.72      0.75      0.74       300
           5       0.68      0.83      0.75      1865
           6       0.64      0.71      0.67       461

    accuracy                           0.85     25134
   macro avg       0.78      0.81      0.79     25134
weighted avg       0.87      0.85      0.86     25134


Accuracy for Bi-RNN:
0.8520331025702236

F1 Score:
0.855648744157884

Precision Score:
0.8681623228016022

Recall Score:
0.8520331025702236


In [39]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,GRU,Bidirectional

model_gru_bi=Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_sequence_length),
    Bidirectional(GRU(128, return_sequences=True)),
    Bidirectional(GRU(64)),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])
model_gru_bi.build(input_shape=(None, max_sequence_length))
model_gru_bi.summary()



In [40]:
model_gru_bi.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [41]:
history=model_gru_bi.fit(X_train_padded, y_train_encoded,epochs=25,batch_size=64,validation_data=(X_test_padded, y_test_encoded),verbose=1,callbacks=early_stopping)

Epoch 1/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 23ms/step - accuracy: 0.7571 - loss: 0.6876 - val_accuracy: 0.8839 - val_loss: 0.3251
Epoch 2/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 22ms/step - accuracy: 0.9053 - loss: 0.2645 - val_accuracy: 0.8902 - val_loss: 0.3033
Epoch 3/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 22ms/step - accuracy: 0.9258 - loss: 0.2073 - val_accuracy: 0.8892 - val_loss: 0.3126
Epoch 4/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 23ms/step - accuracy: 0.9384 - loss: 0.1686 - val_accuracy: 0.8886 - val_loss: 0.3207
Epoch 5/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 22ms/step - accuracy: 0.9504 - loss: 0.1414 - val_accuracy: 0.8841 - val_loss: 0.3477


In [42]:
import numpy as np
y_pred_gru_bi = model_gru_bi.predict(X_test_padded)
y_pred_classes_gru_bi = np.argmax(y_pred_gru_bi,axis=1)

[1m786/786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step


In [43]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Print Classification Report
print("\nClassification Report for Bi-GRU:")
print(classification_report(y_test_encoded,y_pred_classes_gru_bi, zero_division=1))

# Print Accuracy
print("\nAccuracy for Bi-GRU:")
print(accuracy_score(y_test_encoded,y_pred_classes_gru_bi))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_classes_gru_bi, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_classes_gru_bi, average='weighted',zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_classes_gru_bi, average='weighted'))


Classification Report for Bi-GRU:
              precision    recall  f1-score   support

           0       0.85      0.93      0.89      8498
           1       0.94      0.94      0.94      8542
           2       0.92      0.93      0.92      2682
           3       0.83      0.64      0.72      2786
           4       0.83      0.77      0.80       300
           5       0.84      0.76      0.80      1865
           6       0.78      0.72      0.75       461

    accuracy                           0.88     25134
   macro avg       0.86      0.81      0.83     25134
weighted avg       0.88      0.88      0.88     25134


Accuracy for Bi-GRU:
0.8838624970159943

F1 Score:
0.8811871976846064

Precision Score:
0.8828450773232037

Recall Score:
0.8838624970159943


In [44]:
from cuml.svm import SVC 
import cudf 
import numpy as np
X_train_cudf = cudf.DataFrame.from_records(X_train_tfidf.toarray()) 
X_test_cudf = cudf.DataFrame.from_records(X_test_tfidf.toarray()) 
y_train_cudf = cudf.Series(y_train_encoded) 
y_test_cudf = cudf.Series(y_test_encoded)

In [45]:
svm_model = SVC(kernel='rbf',C=1.0,gamma='scale') 
svm_model.fit(X_train_cudf,y_train_cudf)

In [46]:
y_pred = svm_model.predict(X_test_cudf) 
y_pred_cusvm = y_pred.to_numpy()

In [47]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score

# Print Classification Report for Cu-SVM
print("\nClassification Report for Cu-SVM:")
print(classification_report(y_test_encoded,y_pred_cusvm, zero_division=1))

# Print Accuracy for Cu-SVM
print("\nAccuracy for Cu-SVM:")
print(accuracy_score(y_test_encoded,y_pred_cusvm))

# Print F1 Score for Cu-SVM
print("\nF1 Score for Cu-SVM:")
print(f1_score(y_test_encoded,y_pred_cusvm, average='weighted'))

# Print Precision Score for Cu-SVM
print("\nPrecision Score for Cu-SVM:")
print(precision_score(y_test_encoded,y_pred_cusvm, average='weighted', zero_division=1))

# Print Recall Score for Cu-SVM
print("\nRecall Score for Cu-SVM:")
print(recall_score(y_test_encoded,y_pred_cusvm, average='weighted'))


Classification Report for Cu-SVM:
              precision    recall  f1-score   support

           0       0.87      0.92      0.89      8498
           1       0.94      0.93      0.94      8542
           2       0.93      0.94      0.93      2682
           3       0.80      0.73      0.76      2786
           4       0.84      0.84      0.84       300
           5       0.83      0.78      0.80      1865
           6       0.76      0.73      0.75       461

    accuracy                           0.89     25134
   macro avg       0.85      0.84      0.84     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for Cu-SVM:
0.8885175459536883

F1 Score for Cu-SVM:
0.8876876878472125

Precision Score for Cu-SVM:
0.8878780046536137

Recall Score for Cu-SVM:
0.8885175459536883


In [48]:
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression(max_iter=1000,penalty="l2",C=1.0,solver="lbfgs")
logistic_model.fit(X_train_tfidf, y_train_encoded)
y_pred_log = logistic_model.predict(X_test_tfidf)

In [49]:
# Print Classification Report
print("\nClassification Report for Logistic Regression:")
print(classification_report(y_test_encoded, y_pred_log, zero_division=1))

# Print Accuracy
print("\nAccuracy for Logistic Regression:")
print(accuracy_score(y_test_encoded, y_pred_log))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_log, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded, y_pred_log, average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded, y_pred_log, average='weighted'))


Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.88      0.93      0.90      8498
           1       0.94      0.94      0.94      8542
           2       0.94      0.93      0.94      2682
           3       0.80      0.74      0.77      2786
           4       0.93      0.79      0.85       300
           5       0.84      0.79      0.81      1865
           6       0.82      0.72      0.77       461

    accuracy                           0.89     25134
   macro avg       0.88      0.83      0.85     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for Logistic Regression:
0.8946844911275563

F1 Score:
0.8938268799533892

Precision Score:
0.8940952012437308

Recall Score:
0.8946844911275563


In [50]:
from sklearn.svm import SVC
svm = SVC(kernel='rbf',C=1.0,gamma='scale') 
svm.fit(X_train_tfidf,y_train_encoded)
y_pred_svm = svm.predict(X_test_tfidf)

In [51]:
# Print Classification Report
print("\nClassification Report for SVM:")
print(classification_report(y_test_encoded, y_pred_svm, zero_division=1))

# Print Accuracy
print("\nAccuracy for SVM:")
print(accuracy_score(y_test_encoded, y_pred_svm))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_svm, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded, y_pred_svm, average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded, y_pred_svm, average='weighted'))


Classification Report for SVM:
              precision    recall  f1-score   support

           0       0.87      0.92      0.89      8498
           1       0.94      0.93      0.94      8542
           2       0.93      0.93      0.93      2682
           3       0.80      0.72      0.76      2786
           4       0.84      0.84      0.84       300
           5       0.83      0.78      0.80      1865
           6       0.76      0.72      0.74       461

    accuracy                           0.89     25134
   macro avg       0.85      0.84      0.84     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for SVM:
0.8878013845786584

F1 Score:
0.8868819458916631

Precision Score:
0.8871358602654739

Recall Score:
0.8878013845786584


In [52]:
from sklearn.svm import SVC
svm1 = SVC(kernel='rbf',C=100,gamma='scale') 
svm1.fit(X_train_tfidf,y_train_encoded)
y_pred_svm1 = svm1.predict(X_test_tfidf)

In [53]:
# Print Classification Report
print("\nClassification Report for SVM with C=100:")
print(classification_report(y_test_encoded, y_pred_svm1, zero_division=1))

# Print Accuracy
print("\nAccuracy for SVM with C=100:")
print(accuracy_score(y_test_encoded, y_pred_svm1))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_svm1, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded, y_pred_svm1, average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded, y_pred_svm1, average='weighted'))


Classification Report for SVM with C=100:
              precision    recall  f1-score   support

           0       0.88      0.91      0.90      8498
           1       0.94      0.93      0.94      8542
           2       0.93      0.94      0.94      2682
           3       0.79      0.74      0.76      2786
           4       0.85      0.86      0.86       300
           5       0.82      0.79      0.81      1865
           6       0.77      0.75      0.76       461

    accuracy                           0.89     25134
   macro avg       0.86      0.85      0.85     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for SVM with C=100:
0.8903875228773772

F1 Score:
0.8898174589461949

Precision Score:
0.8897280738692869

Recall Score:
0.8903875228773772


In [54]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100,random_state=42)
rf.fit(X_train_tfidf, y_train_encoded)
y_pred_rf = rf.predict(X_test_tfidf)

In [55]:
# Print Classification Report
print("\nClassification Report for RandomForest Classifier:")
print(classification_report(y_test_encoded,y_pred_rf, zero_division=1))

# Print Accuracy
print("\nAccuracy for RandomForest Classifier:")
print(accuracy_score(y_test_encoded,y_pred_rf))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_rf, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_rf,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_rf,average='weighted'))


Classification Report for RandomForest Classifier:
              precision    recall  f1-score   support

           0       0.82      0.89      0.85      8498
           1       0.90      0.93      0.91      8542
           2       0.93      0.89      0.91      2682
           3       0.75      0.55      0.64      2786
           4       0.86      0.73      0.79       300
           5       0.78      0.70      0.74      1865
           6       0.76      0.57      0.65       461

    accuracy                           0.85     25134
   macro avg       0.83      0.75      0.79     25134
weighted avg       0.85      0.85      0.84     25134


Accuracy for RandomForest Classifier:
0.848014641521445

F1 Score:
0.8435995755933006

Precision Score:
0.8450229503433347

Recall Score:
0.848014641521445


In [56]:
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier(loss='hinge',penalty='elasticnet',max_iter=1000,learning_rate='adaptive',eta0=0.01)
sgd.fit(X_train_tfidf,y_train_encoded)
y_pred_sgd = sgd.predict(X_test_tfidf)

In [57]:
# Print Classification Report
print("\nClassification Report for SGD Classifier:")
print(classification_report(y_test_encoded,y_pred_sgd, zero_division=1))

# Print Accuracy
print("\nAccuracy for SGD Classifier:")
print(accuracy_score(y_test_encoded,y_pred_sgd))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_sgd, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_sgd,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_sgd,average='weighted'))


Classification Report for SGD Classifier:
              precision    recall  f1-score   support

           0       0.82      0.93      0.87      8498
           1       0.92      0.94      0.93      8542
           2       0.91      0.93      0.92      2682
           3       0.83      0.54      0.65      2786
           4       0.88      0.71      0.78       300
           5       0.83      0.70      0.76      1865
           6       0.80      0.61      0.69       461

    accuracy                           0.87     25134
   macro avg       0.86      0.77      0.80     25134
weighted avg       0.87      0.87      0.86     25134


Accuracy for SGD Classifier:
0.866475690299992

F1 Score:
0.860757169815174

Precision Score:
0.8664060397186621

Recall Score:
0.866475690299992


In [58]:
from sklearn.linear_model import SGDClassifier
sgd1 = SGDClassifier(loss='log_loss',penalty='elasticnet',max_iter=1000,learning_rate='adaptive',eta0=0.01)
sgd1.fit(X_train_tfidf,y_train_encoded)
y_pred_sgd1 = sgd1.predict(X_test_tfidf)

In [59]:
# Print Classification Report
print("\nClassification Report for SGD Classifier:")
print(classification_report(y_test_encoded,y_pred_sgd1, zero_division=1))

# Print Accuracy
print("\nAccuracy for SGD Classifier:")
print(accuracy_score(y_test_encoded,y_pred_sgd1))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_sgd1, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_sgd1,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_sgd1,average='weighted'))


Classification Report for SGD Classifier:
              precision    recall  f1-score   support

           0       0.77      0.95      0.85      8498
           1       0.89      0.93      0.91      8542
           2       0.94      0.88      0.91      2682
           3       0.80      0.49      0.61      2786
           4       0.97      0.38      0.55       300
           5       0.85      0.60      0.70      1865
           6       0.92      0.37      0.53       461

    accuracy                           0.84     25134
   macro avg       0.88      0.66      0.72     25134
weighted avg       0.84      0.84      0.83     25134


Accuracy for SGD Classifier:
0.8391819845627437

F1 Score:
0.8293532369748482

Precision Score:
0.8449100960511657

Recall Score:
0.8391819845627437


In [60]:
from sklearn.svm import LinearSVC
lin_svc = LinearSVC(penalty='l2',loss='squared_hinge',C=0.1,max_iter=1000,random_state=42)
lin_svc.fit(X_train_tfidf,y_train_encoded)
y_pred_lin_svc = lin_svc.predict(X_test_tfidf)

In [61]:
# Print Classification Report
print("\nClassification Report for LinearSVC:")
print(classification_report(y_test_encoded,y_pred_lin_svc, zero_division=1))

# Print Accuracy
print("\nAccuracy for LinearSVC:")
print(accuracy_score(y_test_encoded,y_pred_lin_svc))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_lin_svc, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_lin_svc,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_lin_svc,average='weighted'))


Classification Report for LinearSVC:
              precision    recall  f1-score   support

           0       0.87      0.93      0.90      8498
           1       0.94      0.94      0.94      8542
           2       0.93      0.94      0.93      2682
           3       0.82      0.69      0.75      2786
           4       0.90      0.80      0.85       300
           5       0.84      0.77      0.80      1865
           6       0.81      0.72      0.76       461

    accuracy                           0.89     25134
   macro avg       0.87      0.83      0.85     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for LinearSVC:
0.8909445372801782

F1 Score:
0.8892023595081561

Precision Score:
0.8897453356179897

Recall Score:
0.8909445372801782


In [62]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(130,),activation='relu',solver='adam',learning_rate='adaptive',nesterovs_momentum=True,verbose=1)
mlp.fit(X_train_tfidf,y_train_encoded)
y_pred_mlp = mlp.predict(X_test_tfidf)

Iteration 1, loss = 0.81731230
Iteration 2, loss = 0.30340673
Iteration 3, loss = 0.23970303
Iteration 4, loss = 0.21045789
Iteration 5, loss = 0.19292795
Iteration 6, loss = 0.18034203
Iteration 7, loss = 0.17060313
Iteration 8, loss = 0.16396359
Iteration 9, loss = 0.15630068
Iteration 10, loss = 0.15319477
Iteration 11, loss = 0.14563940
Iteration 12, loss = 0.14110070
Iteration 13, loss = 0.13692798
Iteration 14, loss = 0.13239935
Iteration 15, loss = 0.12836967
Iteration 16, loss = 0.12415909
Iteration 17, loss = 0.11988659
Iteration 18, loss = 0.11599589
Iteration 19, loss = 0.11225874
Iteration 20, loss = 0.10797969
Iteration 21, loss = 0.10415225
Iteration 22, loss = 0.10036490
Iteration 23, loss = 0.09620974
Iteration 24, loss = 0.09174565
Iteration 25, loss = 0.08783113
Iteration 26, loss = 0.08371203
Iteration 27, loss = 0.07962750
Iteration 28, loss = 0.07580430
Iteration 29, loss = 0.07165669
Iteration 30, loss = 0.06796637
Iteration 31, loss = 0.06454133
Iteration 32, los

In [63]:
# Print Classification Report
print("\nClassification Report for MLP Classifier :")
print(classification_report(y_test_encoded,y_pred_mlp, zero_division=1))

# Print Accuracy
print("\nAccuracy for MLP Classifier:")
print(accuracy_score(y_test_encoded,y_pred_mlp))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_mlp, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_mlp,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_mlp,average='weighted'))


Classification Report for MLP Classifier :
              precision    recall  f1-score   support

           0       0.89      0.88      0.89      8498
           1       0.93      0.92      0.93      8542
           2       0.91      0.93      0.92      2682
           3       0.74      0.76      0.75      2786
           4       0.83      0.88      0.85       300
           5       0.79      0.79      0.79      1865
           6       0.80      0.77      0.78       461

    accuracy                           0.88     25134
   macro avg       0.84      0.85      0.84     25134
weighted avg       0.88      0.88      0.88     25134


Accuracy for MLP Classifier:
0.8794063817935863

F1 Score:
0.8796604816587533

Precision Score:
0.8800383072317475

Recall Score:
0.8794063817935863


In [64]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(random_state=42,max_depth=10)
dt.fit(X_train_tfidf,y_train_encoded)
y_pred_dt = dt.predict(X_test_tfidf)

In [65]:
# Print Classification Report
print("\nClassification Report for Decision tree Classifier :")
print(classification_report(y_test_encoded,y_pred_dt, zero_division=1))

# Print Accuracy
print("\nAccuracy for Decision Classifier:")
print(accuracy_score(y_test_encoded,y_pred_dt))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_dt, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_dt,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_dt,average='weighted'))


Classification Report for Decision tree Classifier :
              precision    recall  f1-score   support

           0       0.48      0.98      0.64      8498
           1       0.95      0.60      0.74      8542
           2       0.93      0.81      0.86      2682
           3       1.00      0.00      0.01      2786
           4       1.00      0.00      0.00       300
           5       1.00      0.00      0.00      1865
           6       1.00      0.00      0.00       461

    accuracy                           0.62     25134
   macro avg       0.91      0.34      0.32     25134
weighted avg       0.80      0.62      0.56     25134


Accuracy for Decision Classifier:
0.6217474337550728

F1 Score:
0.5606542524254952

Precision Score:
0.7993707330870954

Recall Score:
0.6217474337550728


In [66]:
from sklearn.naive_bayes import MultinomialNB
nb = MultinomialNB()
nb.fit(X_train_tfidf, y_train_encoded)
y_pred_nb = nb.predict(X_test_tfidf)

In [67]:
# Print Classification Report
print("\nClassification Report for Naive Bayes Classifier :")
print(classification_report(y_test_encoded,y_pred_nb, zero_division=1))

# Print Accuracy
print("\nAccuracy for Naive Bayes Classifier:")
print(accuracy_score(y_test_encoded,y_pred_nb))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_nb, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_nb,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_nb,average='weighted'))


Classification Report for Naive Bayes Classifier :
              precision    recall  f1-score   support

           0       0.79      0.87      0.83      8498
           1       0.76      0.89      0.82      8542
           2       0.91      0.71      0.80      2682
           3       0.73      0.49      0.58      2786
           4       0.97      0.42      0.58       300
           5       0.77      0.59      0.67      1865
           6       0.84      0.41      0.55       461

    accuracy                           0.78     25134
   macro avg       0.82      0.63      0.69     25134
weighted avg       0.79      0.78      0.78     25134


Accuracy for Naive Bayes Classifier:
0.783281610567359

F1 Score:
0.7751860382138609

Precision Score:
0.7870385207941842

Recall Score:
0.783281610567359


In [68]:
from sklearn.ensemble import GradientBoostingClassifier
gbm = GradientBoostingClassifier(n_estimators=100, random_state=42,max_depth=10)
gbm.fit(X_train_tfidf, y_train_encoded)
y_pred_gbm = gbm.predict(X_test_tfidf)

In [69]:
# Print Classification Report
print("\nClassification Report for Grdient Boosting Classifier :")
print(classification_report(y_test_encoded,y_pred_gbm, zero_division=1))

# Print Accuracy
print("\nAccuracy for Gradient Boosting Classifier:")
print(accuracy_score(y_test_encoded,y_pred_gbm))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_gbm, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_gbm,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_gbm,average='weighted'))


Classification Report for Grdient Boosting Classifier :
              precision    recall  f1-score   support

           0       0.78      0.92      0.84      8498
           1       0.94      0.91      0.92      8542
           2       0.91      0.93      0.92      2682
           3       0.79      0.54      0.64      2786
           4       0.76      0.71      0.73       300
           5       0.82      0.68      0.74      1865
           6       0.67      0.64      0.66       461

    accuracy                           0.85     25134
   macro avg       0.81      0.76      0.78     25134
weighted avg       0.85      0.85      0.84     25134


Accuracy for Gradient Boosting Classifier:
0.8480942150075594

F1 Score:
0.8442675434794854

Precision Score:
0.8504940152364057

Recall Score:
0.8480942150075594


In [70]:
import lightgbm as lgb
lgbm = lgb.LGBMClassifier(n_estimators=150,max_depth=50)
lgbm.fit(X_train_tfidf,y_train_encoded)
y_pred_lgbm = lgbm.predict(X_test_tfidf)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.159475 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 175921
[LightGBM] [Info] Number of data points in the train set: 75401, number of used features: 4986
[LightGBM] [Info] Start training from score -1.084338
[LightGBM] [Info] Start training from score -1.079213
[LightGBM] [Info] Start training from score -2.237770
[LightGBM] [Info] Start training from score -2.199601
[LightGBM] [Info] Start training from score -4.425961
[LightGBM] [Info] Start training from score -2.601305
[LightGBM] [Info] Start training from score -3.999289


In [71]:
# Print Classification Report
print("\nClassification Report for LightGBM Classifier :")
print(classification_report(y_test_encoded,y_pred_lgbm, zero_division=1))

# Print Accuracy
print("\nAccuracy for LightGBM Classifier:")
print(accuracy_score(y_test_encoded,y_pred_lgbm))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_lgbm, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_lgbm,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_lgbm,average='weighted'))


Classification Report for LightGBM Classifier :
              precision    recall  f1-score   support

           0       0.87      0.91      0.89      8498
           1       0.94      0.94      0.94      8542
           2       0.93      0.94      0.93      2682
           3       0.79      0.69      0.74      2786
           4       0.84      0.79      0.81       300
           5       0.82      0.77      0.79      1865
           6       0.76      0.75      0.75       461

    accuracy                           0.88     25134
   macro avg       0.85      0.83      0.84     25134
weighted avg       0.88      0.88      0.88     25134


Accuracy for LightGBM Classifier:
0.8840216439882231

F1 Score:
0.8827593088597941

Precision Score:
0.8827201004932869

Recall Score:
0.8840216439882231


In [72]:
from sklearn.tree import ExtraTreeClassifier
ext_class = ExtraTreeClassifier(max_depth=20,random_state=42,splitter='best')
ext_class.fit(X_train_tfidf,y_train_encoded)
y_pred_ext_class = ext_class.predict(X_test_tfidf)

In [73]:
# Print Classification Report
print("\nClassification Report for Extratree Classifier :")
print(classification_report(y_test_encoded,y_pred_ext_class, zero_division=1))

# Print Accuracy
print("\nAccuracy for ExtraTree Classifier:")
print(accuracy_score(y_test_encoded,y_pred_ext_class))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_ext_class, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_ext_class,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_ext_class,average='weighted'))


Classification Report for Extratree Classifier :
              precision    recall  f1-score   support

           0       0.38      0.94      0.54      8498
           1       0.74      0.28      0.41      8542
           2       0.64      0.05      0.09      2682
           3       0.30      0.00      0.00      2786
           4       0.82      0.20      0.33       300
           5       0.54      0.18      0.27      1865
           6       0.00      0.00      0.00       461

    accuracy                           0.44     25134
   macro avg       0.49      0.24      0.23     25134
weighted avg       0.53      0.44      0.36     25134


Accuracy for ExtraTree Classifier:
0.4353465425320283

F1 Score:
0.35627744712571213

Precision Score:
0.532701868025113

Recall Score:
0.4353465425320283


In [78]:
from sklearn.ensemble import AdaBoostClassifier
adb_class = AdaBoostClassifier(learning_rate=0.1)
adb_class.fit(X_train_tfidf,y_train_encoded)
y_pred_adb = adb_class.predict(X_test_tfidf)

In [79]:
# Print Classification Report
print("\nClassification Report for AdaBoost Classifier :")
print(classification_report(y_test_encoded,y_pred_adb, zero_division=1))

# Print Accuracy
print("\nAccuracy for AdaBoost Classifier:")
print(accuracy_score(y_test_encoded,y_pred_adb))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_adb, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_adb,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_adb,average='weighted'))


Classification Report for AdaBoost Classifier :
              precision    recall  f1-score   support

           0       0.43      0.98      0.60      8498
           1       0.80      0.30      0.44      8542
           2       0.89      0.80      0.84      2682
           3       1.00      0.00      0.00      2786
           4       1.00      0.00      0.01       300
           5       1.00      0.00      0.00      1865
           6       1.00      0.00      0.00       461

    accuracy                           0.52     25134
   macro avg       0.87      0.30      0.27     25134
weighted avg       0.73      0.52      0.44     25134


Accuracy for AdaBoost Classifier:
0.5200525185008356

F1 Score:
0.44081072539327104

Precision Score:
0.7263600361615534

Recall Score:
0.5200525185008356


In [82]:
X_train_vect_dense = X_train_tfidf.toarray()
X_test_vect_dense = X_test_tfidf.toarray()

from sklearn.ensemble import HistGradientBoostingClassifier
# Train the HistGradientBoostingClassifier
hist_gdb = HistGradientBoostingClassifier(loss='log_loss', max_depth=25, random_state=42)
hist_gdb.fit(X_train_vect_dense, y_train_encoded)

# Predict and evaluate
y_pred_hgdb = hist_gdb.predict(X_test_vect_dense)

In [83]:
# Print Classification Report
print("\nClassification Report for HistGradientBoostingClassifier:")
print(classification_report(y_test_encoded,y_pred_hgdb, zero_division=1))

# Print Accuracy
print("\nAccuracy for HistGradientBoostingClassifier:")
print(accuracy_score(y_test_encoded,y_pred_hgdb))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_hgdb, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_hgdb,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_hgdb,average='weighted'))


Classification Report for HistGradientBoostingClassifier:
              precision    recall  f1-score   support

           0       0.85      0.91      0.88      8498
           1       0.94      0.94      0.94      8542
           2       0.92      0.94      0.93      2682
           3       0.78      0.66      0.71      2786
           4       0.83      0.79      0.81       300
           5       0.82      0.76      0.79      1865
           6       0.74      0.72      0.73       461

    accuracy                           0.88     25134
   macro avg       0.84      0.82      0.83     25134
weighted avg       0.88      0.88      0.88     25134


Accuracy for HistGradientBoostingClassifier:
0.8779740590435267

F1 Score:
0.8762200131177791

Precision Score:
0.8765439890597662

Recall Score:
0.8779740590435267


In [84]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(X_train_tfidf,y_train_encoded)
y_pred_knn = knn.predict(X_test_tfidf)

In [85]:
# Print Classification Report
print("\nClassification Report for K-NN Classifier :")
print(classification_report(y_test_encoded,y_pred_knn, zero_division=1))

# Print Accuracy
print("\nAccuracy for K-NN Classifier:")
print(accuracy_score(y_test_encoded,y_pred_knn))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_knn, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_knn,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_knn,average='weighted'))


Classification Report for K-NN Classifier :
              precision    recall  f1-score   support

           0       0.44      0.96      0.61      8498
           1       0.84      0.29      0.44      8542
           2       0.89      0.34      0.50      2682
           3       0.68      0.28      0.40      2786
           4       0.78      0.46      0.58       300
           5       0.66      0.45      0.54      1865
           6       0.76      0.16      0.27       461

    accuracy                           0.53     25134
   macro avg       0.72      0.42      0.47     25134
weighted avg       0.68      0.53      0.50     25134


Accuracy for K-NN Classifier:
0.5346144664597756

F1 Score:
0.502042253244275

Precision Score:
0.6767912639947985

Recall Score:
0.5346144664597756


In [86]:
from xgboost import XGBClassifier
xgb = XGBClassifier(n_estimators=100,learning_rate=0.1,max_depth=10,random_state=42)
xgb.fit(X_train_tfidf, y_train_encoded)
y_pred_xgb = xgb.predict(X_test_tfidf)

In [87]:
# Print Classification Report
print("\nClassification Report for XGBoost Classifier :")
print(classification_report(y_test_encoded,y_pred_xgb, zero_division=1))

# Print Accuracy
print("\nAccuracy for XGBoost Classifier:")
print(accuracy_score(y_test_encoded,y_pred_xgb))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_xgb, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_xgb,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_xgb,average='weighted'))


Classification Report for XGBoost Classifier :
              precision    recall  f1-score   support

           0       0.75      0.92      0.83      8498
           1       0.94      0.89      0.91      8542
           2       0.92      0.93      0.93      2682
           3       0.77      0.48      0.59      2786
           4       0.81      0.80      0.81       300
           5       0.81      0.65      0.72      1865
           6       0.73      0.60      0.66       461

    accuracy                           0.84     25134
   macro avg       0.82      0.75      0.78     25134
weighted avg       0.84      0.84      0.83     25134


Accuracy for XGBoost Classifier:
0.8350839500278507

F1 Score:
0.8296990621108591

Precision Score:
0.8398164467498778

Recall Score:
0.8350839500278507


In [88]:
import catboost
catb = catboost.CatBoostClassifier(iterations=100, learning_rate=0.1, depth=8, verbose=0)
catb.fit(X_train_tfidf, y_train_encoded)
y_pred_catb = catb.predict(X_test_tfidf)

In [89]:
# Print Classification Report
print("\nClassification Report for CatBoost Classifier :")
print(classification_report(y_test_encoded,y_pred_catb, zero_division=1))

# Print Accuracy
print("\nAccuracy for CatBoost Classifier:")
print(accuracy_score(y_test_encoded,y_pred_catb))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_catb, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_catb,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_catb,average='weighted'))


Classification Report for CatBoost Classifier :
              precision    recall  f1-score   support

           0       0.64      0.94      0.76      8498
           1       0.93      0.87      0.90      8542
           2       0.92      0.89      0.90      2682
           3       0.81      0.20      0.33      2786
           4       0.91      0.45      0.60       300
           5       0.83      0.44      0.58      1865
           6       0.79      0.29      0.42       461

    accuracy                           0.78     25134
   macro avg       0.83      0.58      0.64     25134
weighted avg       0.81      0.78      0.75     25134


Accuracy for CatBoost Classifier:
0.7753640486989735

F1 Score:
0.7535795577177634

Precision Score:
0.8075686829511604

Recall Score:
0.7753640486989735


In [98]:
from sklearn.svm import NuSVC
nu_svc = NuSVC(nu=0.001,kernel='rbf',gamma='scale',coef0=0.0)
nu_svc.fit(X_train_tfidf,y_train_encoded)
y_pred_nu_svc = nu_svc.predict(X_test_tfidf)

In [99]:
# Print Classification Report
print("\nClassification Report for NuSVC:")
print(classification_report(y_test_encoded,y_pred_nu_svc, zero_division=1))

# Print Accuracy
print("\nAccuracy for NuSVC:")
print(accuracy_score(y_test_encoded,y_pred_nu_svc))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_nu_svc, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_nu_svc,average='weighted', zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_nu_svc, average='weighted'))


Classification Report for NuSVC:
              precision    recall  f1-score   support

           0       0.82      0.86      0.84      8498
           1       0.93      0.93      0.93      8542
           2       0.93      0.94      0.94      2682
           3       0.66      0.62      0.64      2786
           4       0.85      0.86      0.86       300
           5       0.76      0.68      0.72      1865
           6       0.77      0.75      0.76       461

    accuracy                           0.85     25134
   macro avg       0.82      0.81      0.81     25134
weighted avg       0.85      0.85      0.85     25134


Accuracy for NuSVC:
0.8504416328479351

F1 Score:
0.8493594499097491

Precision Score:
0.8489559483526807

Recall Score:
0.8504416328479351


In [58]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, MultiHeadAttention, Dense, GlobalAveragePooling1D


vocab_size = 5000
embed_dim = 128
max_sequence_length = 100  
num_classes = 7
# Input layer
inputs = Input(shape=(max_sequence_length,))
# Embedding layer
x = Embedding(input_dim=vocab_size, output_dim=embed_dim, input_length=max_sequence_length)(inputs)
# MultiHeadAttention layer
x = MultiHeadAttention(num_heads=8, key_dim=embed_dim)(x, x)  
# Feedforward layers
x = Dense(64, activation='relu')(x)
x = Dense(32, activation='relu')(x)
x = GlobalAveragePooling1D()(x)
# Output layer
outputs = Dense(num_classes, activation='softmax')(x)
# Build the model
model_self_attention = Model(inputs=inputs, outputs=outputs)

model_self_attention.summary()

In [59]:
# Compile the model
model_self_attention.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [60]:
history=model_self_attention.fit(X_train_padded, y_train_encoded,epochs=25,batch_size=64,validation_data=(X_test_padded, y_test_encoded),verbose=1,callbacks=early_stopping)

Epoch 1/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 19ms/step - accuracy: 0.5976 - loss: 1.0042 - val_accuracy: 0.8705 - val_loss: 0.3776
Epoch 2/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 16ms/step - accuracy: 0.8940 - loss: 0.3004 - val_accuracy: 0.8908 - val_loss: 0.3168
Epoch 3/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 16ms/step - accuracy: 0.9162 - loss: 0.2335 - val_accuracy: 0.8872 - val_loss: 0.3266
Epoch 4/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 16ms/step - accuracy: 0.9277 - loss: 0.1968 - val_accuracy: 0.8870 - val_loss: 0.3282
Epoch 5/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 17ms/step - accuracy: 0.9377 - loss: 0.1704 - val_accuracy: 0.8863 - val_loss: 0.3558
Epoch 6/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 17ms/step - accuracy: 0.9428 - loss: 0.1570 - val_accuracy: 0.8868 - val_loss: 0.3993
Epoc

In [61]:
import numpy as np
y_pred_self_attention = model_self_attention.predict(X_test_padded)
y_pred_classes_self_attention = np.argmax(y_pred_self_attention,axis=1)

[1m786/786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step


In [62]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Print Classification Report
print("\nClassification Report for Self attention network")
print(classification_report(y_test_encoded,y_pred_classes_self_attention, zero_division=1))

# Print Accuracy
print("\nAccuracy for Self-attention network:")
print(accuracy_score(y_test_encoded,y_pred_classes_self_attention))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded, y_pred_classes_self_attention, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_classes_self_attention, average='weighted',zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_classes_self_attention, average='weighted'))


Classification Report for Self attention network
              precision    recall  f1-score   support

           0       0.88      0.92      0.90      8498
           1       0.94      0.94      0.94      8542
           2       0.91      0.94      0.93      2682
           3       0.81      0.71      0.75      2786
           4       0.88      0.78      0.83       300
           5       0.81      0.83      0.82      1865
           6       0.82      0.72      0.77       461

    accuracy                           0.89     25134
   macro avg       0.86      0.83      0.85     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for Self-attention network:
0.8907853903079493

F1 Score:
0.8896977791518983

Precision Score:
0.8898601563663195

Recall Score:
0.8907853903079493


In [63]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Input, Conv1D, Bidirectional, LSTM, Dense, GlobalMaxPooling1D, Dropout
from tensorflow.keras.models import Model

vocab_size = 5000
embed_dim = 128
max_len = 100
num_classes = 7

# RCNN Architecture
inputs = Input(shape=(max_len,))
x = Embedding(input_dim=vocab_size, output_dim=embed_dim, input_length=max_len)(inputs)
x = Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')(x)
x = Bidirectional(LSTM(64, return_sequences=True))(x)
x = GlobalMaxPooling1D()(x)
x = Dropout(0.5)(x)
outputs = Dense(num_classes, activation='softmax')(x)

rcnn_model = Model(inputs, outputs)
rcnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])



In [64]:
rcnn_model.summary()

In [65]:
rcnn_model.fit(X_train_padded, y_train_encoded, validation_data=(X_test_padded, y_test_encoded),batch_size=64, epochs=25,callbacks=early_stopping)

Epoch 1/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - accuracy: 0.6913 - loss: 0.8509 - val_accuracy: 0.8817 - val_loss: 0.3395
Epoch 2/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.8994 - loss: 0.2942 - val_accuracy: 0.8914 - val_loss: 0.3040
Epoch 3/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.9205 - loss: 0.2282 - val_accuracy: 0.8903 - val_loss: 0.3196
Epoch 4/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.9350 - loss: 0.1889 - val_accuracy: 0.8902 - val_loss: 0.3181
Epoch 5/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.9459 - loss: 0.1584 - val_accuracy: 0.8895 - val_loss: 0.3340
Epoch 6/25
[1m1179/1179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.9534 - loss: 0.1371 - val_accuracy: 0.8834 - val_loss: 0.3753
Epoc

<keras.src.callbacks.history.History at 0x7c9222ed45b0>

In [66]:
import numpy as np
y_pred_rcnn_model = rcnn_model.predict(X_test_padded)
y_pred_classes_rcnn_model = np.argmax(y_pred_rcnn_model,axis=1)

[1m786/786[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step


In [67]:
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, precision_score, recall_score

# Print Classification Report
print("\nClassification Report for RCNN:")
print(classification_report(y_test_encoded,y_pred_classes_rcnn_model, zero_division=1))

# Print Accuracy
print("\nAccuracy for RCNN:")
print(accuracy_score(y_test_encoded,y_pred_classes_rcnn_model))

# Print F1 Score
print("\nF1 Score:")
print(f1_score(y_test_encoded,y_pred_classes_rcnn_model, average='weighted'))

# Print Precision Score
print("\nPrecision Score:")
print(precision_score(y_test_encoded,y_pred_classes_rcnn_model, average='weighted',zero_division=1))

# Print Recall Score
print("\nRecall Score:")
print(recall_score(y_test_encoded,y_pred_classes_rcnn_model, average='weighted'))


Classification Report for RCNN:
              precision    recall  f1-score   support

           0       0.89      0.91      0.90      8498
           1       0.94      0.94      0.94      8542
           2       0.94      0.91      0.93      2682
           3       0.77      0.77      0.77      2786
           4       0.85      0.80      0.83       300
           5       0.84      0.78      0.81      1865
           6       0.75      0.78      0.76       461

    accuracy                           0.89     25134
   macro avg       0.86      0.84      0.85     25134
weighted avg       0.89      0.89      0.89     25134


Accuracy for RCNN:
0.8913821914538076

F1 Score:
0.891200873834662

Precision Score:
0.8913406794899478

Recall Score:
0.8913821914538076
