In [1]:
import pandas as pd
import numpy as np
data = pd.read_csv('/kaggle/input/imdb-sentiment-analysis/IMDB Dataset.csv')

In [2]:
#Print first few entries in the data
print("First few rows of the dataset: ")
print(data.head())

First few rows of the dataset: 
                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive


In [3]:
#Printing the last few entries in the data
print("The last few entries of the dataset are: ")
data.tail()

The last few entries of the dataset are: 


Unnamed: 0,review,sentiment
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative
49999,No one expects the Star Trek movies to be high...,negative


In [4]:
#Printing the summary statistics
print("\nSummary statistics: ")
print(data.describe())


Summary statistics: 
                                                   review sentiment
count                                               50000     50000
unique                                              49582         2
top     Loved today's show!!! It was a variety and not...  positive
freq                                                    5     25000


In [5]:
#Print the information about the dataset
print("\nInformation about the dataset: ")
print(data.info())


Information about the dataset: 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   review     50000 non-null  object
 1   sentiment  50000 non-null  object
dtypes: object(2)
memory usage: 781.4+ KB
None


In [6]:
print(data.shape)

(50000, 2)


In [7]:
print(data.columns)

Index(['review', 'sentiment'], dtype='object')


In [8]:
#Print the null values from all the columns
print(data.isnull().sum())

review       0
sentiment    0
dtype: int64


In [9]:
#Sentiment Count
data['sentiment'].value_counts()

sentiment
positive    25000
negative    25000
Name: count, dtype: int64

In [10]:
#Data pre-processing
reviews = data['review'].values 
labels = data['sentiment'].values

In [11]:
# Tokenizing the text data
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences
num_words=10000
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(reviews)
sequences = tokenizer.texts_to_sequences(reviews)

In [12]:
# Padding sequences to ensure uniform length 
max_len = 250 
X = pad_sequences(sequences, maxlen=max_len)

In [13]:
# Convert labels to binary values
labels = data['sentiment'].apply(lambda x: 1 if x == 'positive' else 0).values

In [14]:
# Splitting data into training and test sets
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

In [15]:
#RNN Implementation
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
rnn_model=Sequential([
    Embedding(input_dim=num_words,output_dim=128,input_length=max_len),
    SimpleRNN(128,activation='tanh', return_sequences=False),
    Dense(1,activation='sigmoid')
])
rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [16]:
#Early Stopping to prevent overfitting and save training time
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)

In [17]:
rnn_model.fit(X_train,y_train,epochs=20,batch_size=64,validation_data=(X_test,y_test),callbacks=[early_stopping])

Epoch 1/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 32ms/step - accuracy: 0.5938 - loss: 0.6503 - val_accuracy: 0.8002 - val_loss: 0.4664
Epoch 2/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 29ms/step - accuracy: 0.7824 - loss: 0.4760 - val_accuracy: 0.7770 - val_loss: 0.5055
Epoch 3/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 29ms/step - accuracy: 0.8127 - loss: 0.4366 - val_accuracy: 0.6355 - val_loss: 0.6293
Epoch 4/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 29ms/step - accuracy: 0.7460 - loss: 0.5182 - val_accuracy: 0.7803 - val_loss: 0.4993
Epoch 5/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 29ms/step - accuracy: 0.8478 - loss: 0.3611 - val_accuracy: 0.8054 - val_loss: 0.4355
Epoch 6/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 29ms/step - accuracy: 0.8837 - loss: 0.2903 - val_accuracy: 0.7885 - val_loss: 0.4623
Epoch 7/20
[1m5

<keras.src.callbacks.history.History at 0x7d1eaa041f30>

In [18]:
#Model Evaluation
y_pred_probs_rnn = rnn_model.predict(X_test)  
y_pred_rnn = (y_pred_probs_rnn > 0.5).astype(int).flatten()

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step


In [19]:
#Performance metrics for RNN
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
accuracy = accuracy_score(y_test, y_pred_rnn)
precision = precision_score(y_test, y_pred_rnn)
recall = recall_score(y_test, y_pred_rnn)
f1 = f1_score(y_test, y_pred_rnn)
roc_auc = roc_auc_score(y_test, y_pred_probs_rnn)

# Print metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"AUC-ROC: {roc_auc:.2f}")

# Classification Report
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_rnn))

Accuracy: 0.83
Precision: 0.84
Recall: 0.83
F1 Score: 0.83
AUC-ROC: 0.91

Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.84      0.83      6157
           1       0.84      0.83      0.83      6343

    accuracy                           0.83     12500
   macro avg       0.83      0.83      0.83     12500
weighted avg       0.83      0.83      0.83     12500



In [20]:
#LSTM Model
from tensorflow.keras.layers import LSTM
lstm_model = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=max_len),
    LSTM(128, return_sequences=False),
    Dense(1, activation='sigmoid')
])
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [21]:
lstm_model.fit(X_train,y_train,epochs=20,batch_size=64,validation_data=(X_test,y_test),callbacks=[early_stopping])

Epoch 1/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 0.7454 - loss: 0.5000 - val_accuracy: 0.8409 - val_loss: 0.3668
Epoch 2/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.8959 - loss: 0.2666 - val_accuracy: 0.8926 - val_loss: 0.2631
Epoch 3/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.9237 - loss: 0.1987 - val_accuracy: 0.8942 - val_loss: 0.2740
Epoch 4/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.9505 - loss: 0.1402 - val_accuracy: 0.8922 - val_loss: 0.3053
Epoch 5/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.9589 - loss: 0.1120 - val_accuracy: 0.8839 - val_loss: 0.3810
Epoch 6/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.9735 - loss: 0.0784 - val_accuracy: 0.8810 - val_loss: 0.3984
Epoch 7/20
[1m5

<keras.src.callbacks.history.History at 0x7d1e48146350>

In [22]:
#Model Evaluation
y_pred_probs_lstm = lstm_model.predict(X_test)  
y_pred_lstm = (y_pred_probs_lstm > 0.5).astype(int).flatten()

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


In [23]:
#Performance metrics for LSTM

accuracy = accuracy_score(y_test, y_pred_lstm)
precision = precision_score(y_test, y_pred_lstm)
recall = recall_score(y_test, y_pred_lstm)
f1 = f1_score(y_test, y_pred_lstm)
roc_auc = roc_auc_score(y_test, y_pred_probs_lstm)

# Print metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"AUC-ROC: {roc_auc:.2f}")

# Classification Report
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_lstm))

Accuracy: 0.89
Precision: 0.89
Recall: 0.90
F1 Score: 0.90
AUC-ROC: 0.96

Classification Report:

              precision    recall  f1-score   support

           0       0.90      0.88      0.89      6157
           1       0.89      0.90      0.90      6343

    accuracy                           0.89     12500
   macro avg       0.89      0.89      0.89     12500
weighted avg       0.89      0.89      0.89     12500



In [24]:
#GRU Model
from tensorflow.keras.layers import GRU

gru_model = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=max_len),
    GRU(128, return_sequences=False),
    Dense(1, activation='sigmoid')
])

gru_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [25]:
gru_model.fit(X_train,y_train,epochs=20,batch_size=64,validation_data=(X_test,y_test),callbacks=[early_stopping])

Epoch 1/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 18ms/step - accuracy: 0.6944 - loss: 0.5477 - val_accuracy: 0.8824 - val_loss: 0.2935
Epoch 2/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9047 - loss: 0.2403 - val_accuracy: 0.8961 - val_loss: 0.2557
Epoch 3/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9473 - loss: 0.1477 - val_accuracy: 0.9002 - val_loss: 0.2748
Epoch 4/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9689 - loss: 0.0935 - val_accuracy: 0.8893 - val_loss: 0.3137
Epoch 5/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9815 - loss: 0.0606 - val_accuracy: 0.8862 - val_loss: 0.3792
Epoch 6/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9892 - loss: 0.0389 - val_accuracy: 0.8867 - val_loss: 0.4555
Epoch 7/20
[1m5

<keras.src.callbacks.history.History at 0x7d1e403f7850>

In [26]:
#Model Evaluation
y_pred_probs_gru = gru_model.predict(X_test)  
y_pred_gru = (y_pred_probs_gru > 0.5).astype(int).flatten()

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


In [27]:
#Performance metrics for GRU

accuracy = accuracy_score(y_test, y_pred_gru)
precision = precision_score(y_test, y_pred_gru)
f1 = f1_score(y_test, y_pred_gru)
roc_auc = roc_auc_score(y_test, y_pred_probs_gru)

# Print metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"AUC-ROC: {roc_auc:.2f}")

# Classification Report
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_gru))

Accuracy: 0.90
Precision: 0.87
Recall: 0.90
F1 Score: 0.90
AUC-ROC: 0.96

Classification Report:

              precision    recall  f1-score   support

           0       0.93      0.85      0.89      6157
           1       0.87      0.94      0.90      6343

    accuracy                           0.90     12500
   macro avg       0.90      0.90      0.90     12500
weighted avg       0.90      0.90      0.90     12500



In [28]:
#Encoder-Decoder Model with RNN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, RepeatVector, TimeDistributed, Dense
enc_dec_model = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=max_len),
    SimpleRNN(128, return_sequences=False),
    RepeatVector(max_len),
    SimpleRNN(128, return_sequences=True),
    TimeDistributed(Dense(1, activation='sigmoid'))
])
enc_dec_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [29]:
y_train_seq = np.repeat(y_train[:, np.newaxis], max_len, axis=1)
y_test_seq = np.repeat(y_test[:, np.newaxis], max_len, axis=1)

In [30]:
enc_dec_model.fit(X_train, y_train_seq, batch_size=64,epochs=20, validation_data=(X_test, y_test_seq), callbacks=[early_stopping])

Epoch 1/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 162ms/step - accuracy: 0.5056 - loss: 0.7029 - val_accuracy: 0.5560 - val_loss: 0.6686
Epoch 2/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 94ms/step - accuracy: 0.6124 - loss: 0.6468 - val_accuracy: 0.6980 - val_loss: 0.5792
Epoch 3/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 94ms/step - accuracy: 0.7707 - loss: 0.4851 - val_accuracy: 0.6536 - val_loss: 0.6211
Epoch 4/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 94ms/step - accuracy: 0.7792 - loss: 0.4714 - val_accuracy: 0.7857 - val_loss: 0.4931
Epoch 5/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 95ms/step - accuracy: 0.8418 - loss: 0.3817 - val_accuracy: 0.7672 - val_loss: 0.4824


<keras.src.callbacks.history.History at 0x7d1e401b79d0>

In [31]:
# Model Evaluation
y_pred_probs_enc_dec = enc_dec_model.predict(X_test)
y_pred_enc_dec_seq = (y_pred_probs_enc_dec > 0.5).astype(int)

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 42ms/step


In [32]:
# Flatten predictions and labels for evaluation
y_pred_enc_dec_rnn = y_pred_enc_dec_seq[:, -1].flatten()
y_test_flat = y_test

In [33]:
# Performance metrics
accuracy = accuracy_score(y_test_flat, y_pred_enc_dec_rnn)
precision = precision_score(y_test_flat, y_pred_enc_dec_rnn)
recall = recall_score(y_test_flat, y_pred_enc_dec_rnn)
f1 = f1_score(y_test_flat, y_pred_enc_dec_rnn)
roc_auc = roc_auc_score(y_test_flat, y_pred_enc_dec_rnn)

# Print metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"AUC-ROC: {roc_auc:.2f}")

# Classification Report
print("\nClassification Report:\n")
print(classification_report(y_test_flat, y_pred_enc_dec_rnn))

Accuracy: 0.56
Precision: 0.64
Recall: 0.28
F1 Score: 0.39
AUC-ROC: 0.56

Classification Report:

              precision    recall  f1-score   support

           0       0.53      0.84      0.65      6157
           1       0.64      0.28      0.39      6343

    accuracy                           0.56     12500
   macro avg       0.59      0.56      0.52     12500
weighted avg       0.59      0.56      0.52     12500



In [34]:
#Encoder-Decoder Model with LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, RepeatVector, TimeDistributed, Dense
enc_dec_model_lstm = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=max_len),
    LSTM(128, return_sequences=False),
    RepeatVector(max_len),
    LSTM(128, return_sequences=True),
    TimeDistributed(Dense(1, activation='sigmoid'))
])
enc_dec_model_lstm.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [35]:
enc_dec_model_lstm.fit(X_train, y_train_seq, batch_size=64,epochs=20, validation_data=(X_test, y_test_seq), callbacks=[early_stopping])

Epoch 1/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 62ms/step - accuracy: 0.7565 - loss: 0.4791 - val_accuracy: 0.8722 - val_loss: 0.3178
Epoch 2/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 59ms/step - accuracy: 0.9000 - loss: 0.2641 - val_accuracy: 0.8846 - val_loss: 0.3114
Epoch 3/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 58ms/step - accuracy: 0.9345 - loss: 0.1766 - val_accuracy: 0.8843 - val_loss: 0.2820
Epoch 4/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 59ms/step - accuracy: 0.9537 - loss: 0.1310 - val_accuracy: 0.8908 - val_loss: 0.3317
Epoch 5/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 59ms/step - accuracy: 0.9604 - loss: 0.1119 - val_accuracy: 0.8530 - val_loss: 0.3769


<keras.src.callbacks.history.History at 0x7d1df72473d0>

In [36]:
# Model Evaluation
y_pred_probs_enc_dec = enc_dec_model_lstm.predict(X_test)
y_pred_enc_dec_seq = (y_pred_probs_enc_dec > 0.5).astype(int)

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step


In [37]:
# Flatten predictions and labels for evaluation
y_pred_enc_dec_lstm = y_pred_enc_dec_seq[:, -1].flatten()
y_test_flat = y_test

In [38]:
# Performance metrics
accuracy = accuracy_score(y_test_flat, y_pred_enc_dec_lstm)
precision = precision_score(y_test_flat, y_pred_enc_dec_lstm)
recall = recall_score(y_test_flat, y_pred_enc_dec_lstm)
f1 = f1_score(y_test_flat, y_pred_enc_dec_lstm)
roc_auc = roc_auc_score(y_test_flat, y_pred_enc_dec_lstm)

# Print metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"AUC-ROC: {roc_auc:.2f}")

# Classification Report
print("\nClassification Report:\n")
print(classification_report(y_test_flat, y_pred_enc_dec_lstm))

Accuracy: 0.87
Precision: 0.87
Recall: 0.88
F1 Score: 0.87
AUC-ROC: 0.87

Classification Report:

              precision    recall  f1-score   support

           0       0.87      0.87      0.87      6157
           1       0.87      0.88      0.87      6343

    accuracy                           0.87     12500
   macro avg       0.87      0.87      0.87     12500
weighted avg       0.87      0.87      0.87     12500



In [39]:
#Encoder-Decoder with GRU
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,GRU, RepeatVector, TimeDistributed, Dense
enc_dec_model_gru = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=max_len),
    GRU(128, return_sequences=False),
    RepeatVector(max_len),
    GRU(128, return_sequences=True),
    TimeDistributed(Dense(1, activation='sigmoid'))
])
enc_dec_model_gru.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [40]:
enc_dec_model_gru.fit(X_train, y_train_seq, batch_size=64,epochs=20, validation_data=(X_test, y_test_seq), callbacks=[early_stopping])

Epoch 1/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 62ms/step - accuracy: 0.7090 - loss: 0.5337 - val_accuracy: 0.8848 - val_loss: 0.2847
Epoch 2/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 58ms/step - accuracy: 0.9051 - loss: 0.2468 - val_accuracy: 0.8882 - val_loss: 0.2805
Epoch 3/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 58ms/step - accuracy: 0.9434 - loss: 0.1557 - val_accuracy: 0.8980 - val_loss: 0.2582
Epoch 4/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 58ms/step - accuracy: 0.9664 - loss: 0.1004 - val_accuracy: 0.8898 - val_loss: 0.3331
Epoch 5/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 58ms/step - accuracy: 0.9810 - loss: 0.0578 - val_accuracy: 0.8858 - val_loss: 0.3492


<keras.src.callbacks.history.History at 0x7d1df7247ca0>

In [41]:
# Model Evaluation
y_pred_probs_enc_dec = enc_dec_model_gru.predict(X_test)
y_pred_enc_dec_seq = (y_pred_probs_enc_dec > 0.5).astype(int)

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step


In [42]:
# Flatten predictions and labels for evaluation
y_pred_enc_dec_gru = y_pred_enc_dec_seq[:, -1].flatten()
y_test_flat = y_test

In [43]:
# Performance metrics
accuracy = accuracy_score(y_test_flat, y_pred_enc_dec_gru)
precision = precision_score(y_test_flat, y_pred_enc_dec_gru)
recall = recall_score(y_test_flat, y_pred_enc_dec_gru)
f1 = f1_score(y_test_flat, y_pred_enc_dec_gru)
roc_auc = roc_auc_score(y_test_flat, y_pred_enc_dec_gru)

# Print metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"AUC-ROC: {roc_auc:.2f}")

# Classification Report
print("\nClassification Report:\n")
print(classification_report(y_test_flat, y_pred_enc_dec_gru))

Accuracy: 0.88
Precision: 0.87
Recall: 0.91
F1 Score: 0.89
AUC-ROC: 0.88

Classification Report:

              precision    recall  f1-score   support

           0       0.90      0.86      0.88      6157
           1       0.87      0.91      0.89      6343

    accuracy                           0.88     12500
   macro avg       0.89      0.88      0.88     12500
weighted avg       0.89      0.88      0.88     12500



In [44]:
#Simple transformer architecture
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, LayerNormalization, Embedding, Input, GlobalAveragePooling1D
from tensorflow.keras.models import Model
import numpy as np

class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, max_len, embed_dim):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(max_len, embed_dim)

    def positional_encoding(self, max_len, embed_dim):
        angle_rads = self.get_angles(np.arange(max_len)[:, np.newaxis],np.arange(embed_dim)[np.newaxis, :], embed_dim)
        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])  # apply sin to even indices
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])  # apply cos to odd indices
        pos_encoding = angle_rads[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)

    def get_angles(self, pos, i, embed_dim):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(embed_dim))
        return pos * angle_rates

    def call(self, x):
        return x + self.pos_encoding[:, :tf.shape(x)[1], :]


class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        assert embed_dim % num_heads == 0, "Embedding dimension must be divisible by number of heads"
        self.num_heads = num_heads
        self.projection_dim = embed_dim // num_heads
        self.query_dense = Dense(embed_dim)
        self.key_dense = Dense(embed_dim)
        self.value_dense = Dense(embed_dim)
        self.combine_heads = Dense(embed_dim)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        query = self.split_heads(query, batch_size)
        key = self.split_heads(key, batch_size)
        value = self.split_heads(value, batch_size)
        attention = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.num_heads * self.projection_dim))
        return self.combine_heads(concat_attention)

class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation='relu'),
            Dense(embed_dim)
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

def build_transformer_model(vocab_size, max_len, embed_dim=128, num_heads=8, ff_dim=512, num_layers=4, dropout_rate=0.1):
    inputs = Input(shape=(max_len,))
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs)
    positional_encoding = PositionalEncoding(max_len, embed_dim)(embedding_layer)

    x = positional_encoding
    for i in range(num_layers):
        x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate)(x, training=True)
    x = GlobalAveragePooling1D()(x)
    x = Dropout(0.1)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.1)(x)
    outputs = Dense(1, activation='sigmoid')(x)
    return Model(inputs, outputs)

transformer_model = build_transformer_model(vocab_size=20000, max_len=250, embed_dim=128, num_heads=8, ff_dim=512, num_layers=4, dropout_rate=0.1)
transformer_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [45]:
history = transformer_model.fit(X_train, y_train,batch_size=64,epochs=20,validation_data=(X_test, y_test),callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=5)])

Epoch 1/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 78ms/step - accuracy: 0.5007 - loss: 0.7128 - val_accuracy: 0.5074 - val_loss: 0.6931
Epoch 2/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 55ms/step - accuracy: 0.5001 - loss: 0.6939 - val_accuracy: 0.4926 - val_loss: 0.6933
Epoch 3/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 55ms/step - accuracy: 0.5025 - loss: 0.6932 - val_accuracy: 0.5074 - val_loss: 0.6963
Epoch 4/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 55ms/step - accuracy: 0.5003 - loss: 0.6938 - val_accuracy: 0.4926 - val_loss: 0.6933
Epoch 5/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 56ms/step - accuracy: 0.5023 - loss: 0.6935 - val_accuracy: 0.4926 - val_loss: 0.6933
Epoch 6/20
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 55ms/step - accuracy: 0.5040 - loss: 0.6932 - val_accuracy: 0.4926 - val_loss: 0.6933


In [46]:
# Predict probabilities for the Transformer model
y_pred_probs_transformer = transformer_model.predict(X_test)
y_pred_transformer = (y_pred_probs_transformer > 0.5).astype(int).flatten()

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step


In [47]:
#Performance metrics
accuracy_transformer = accuracy_score(y_test, y_pred_transformer)
precision_transformer = precision_score(y_test, y_pred_transformer)
recall_transformer = recall_score(y_test, y_pred_transformer)
f1_transformer = f1_score(y_test, y_pred_transformer)
roc_auc_transformer = roc_auc_score(y_test, y_pred_probs_transformer)

# Print metrics
print(f"Transformer Model Performance:")
print(f"Accuracy: {accuracy_transformer:.2f}")
print(f"Precision: {precision_transformer:.2f}")
print(f"Recall: {recall_transformer:.2f}")
print(f"F1 Score: {f1_transformer:.2f}")
print(f"AUC-ROC: {roc_auc_transformer:.2f}")

# Classification Report
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_transformer))

Transformer Model Performance:
Accuracy: 0.49
Precision: 0.00
Recall: 0.00
F1 Score: 0.00
AUC-ROC: 0.50

Classification Report:

              precision    recall  f1-score   support

           0       0.49      1.00      0.66      6157
           1       0.00      0.00      0.00      6343

    accuracy                           0.49     12500
   macro avg       0.25      0.50      0.33     12500
weighted avg       0.24      0.49      0.33     12500



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
