<a href="https://colab.research.google.com/github/mazenmagdii/IMDB-Sentiment-Classification/blob/main/IMDB_without_Pretrained_Embedding_layer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Embedding,LSTM,Dropout,Bidirectional,GRU
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import SpatialDropout1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten,BatchNormalization

In [2]:
from gensim.models import Word2Vec
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import imdb
(x_train,y_train),(x_test,y_test)=imdb.load_data(num_words=20000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
X=np.concatenate((x_train,x_test))
Y=np.concatenate((y_train,y_test))
x_train,x_temp,y_train,y_temp=train_test_split(X,Y,test_size=0.3,shuffle=True,random_state=42)
x_val,x_test,y_val,y_test=train_test_split(x_temp,y_temp,test_size=0.5,shuffle=True,random_state=42)

In [4]:
word_i= imdb.get_word_index()
reverse_word_i=dict([(value,key) for (key,value) in word_i.items()])
def decode_review(encoded_review):
  return ' '.join([reverse_word_i.get(i-3,'?') for i in encoded_review])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [5]:
train_texts = [decode_review(review) for review in x_train]
val_texts = [decode_review(review) for review in x_val]
test_texts = [decode_review(review) for review in x_test]

In [6]:
tok=Tokenizer(num_words=20000)
tok.fit_on_texts(train_texts)

In [7]:
X_train_seq = tok.texts_to_sequences(train_texts)
X_val_seq = tok.texts_to_sequences(val_texts)
X_test_seq = tok.texts_to_sequences(test_texts)

In [8]:
max_length = 500
X_train_pad = pad_sequences(X_train_seq, maxlen=max_length)
X_val_pad = pad_sequences(X_val_seq, maxlen=max_length)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length)

In [9]:
model = Sequential([
    Embedding(input_dim=20000, output_dim=100, input_length=max_length, mask_zero=True),
    SpatialDropout1D(0.4),
    LSTM( , recurrent_dropout=0.3),
    Dense(1, activation='sigmoid')

])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])



In [10]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=True, patience=10)
mc = ModelCheckpoint('best_model.keras', monitor='val_accuracy', mode='max', verbose=True, save_best_only=True)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.25, patience=3, verbose=True,min_lr=0.0000001 )

In [16]:
model.load_weights('best_model.keras')

In [11]:
history = model.fit(X_train_pad, y_train, epochs=50, batch_size=64, validation_data=(X_val_pad, y_val), verbose=True, callbacks=[es,mc,rlr])

Epoch 1/50
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 982ms/step - accuracy: 0.6977 - loss: 0.5787
Epoch 1: val_accuracy improved from -inf to 0.86693, saving model to best_model.keras
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m576s[0m 1s/step - accuracy: 0.6978 - loss: 0.5786 - val_accuracy: 0.8669 - val_loss: 0.3310 - learning_rate: 0.0010
Epoch 2/50
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 974ms/step - accuracy: 0.8451 - loss: 0.3670
Epoch 2: val_accuracy did not improve from 0.86693
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m618s[0m 1s/step - accuracy: 0.8451 - loss: 0.3670 - val_accuracy: 0.8572 - val_loss: 0.3569 - learning_rate: 0.0010
Epoch 3/50
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 981ms/step - accuracy: 0.8685 - loss: 0.3212
Epoch 3: val_accuracy improved from 0.86693 to 0.87533, saving model to best_model.keras
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [16]:
x_test_seq = tok.texts_to_sequences([' '.join([reverse_word_i.get(i - 3, '?') for i in review]) for review in x_test])
x_test_padded = pad_sequences(x_test_seq, maxlen=max_length)
test_loss, test_accuracy = model.evaluate(x_test_padded, y_test)
print(f'Test Accuracy: {test_accuracy:.4f}')

[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 109ms/step - accuracy: 0.8670 - loss: 0.3855
Test Accuracy: 0.8656


If you wanna add ur review just change test_review variable to what u want, or assign test_review = Input()


In [15]:
test_review = ["The movie was good, i enjoyed watching it.It was a fantastic one."]
review_sequence = tok.texts_to_sequences(test_review)
padded_review = pad_sequences(review_sequence, maxlen=max_length)

prediction = model.predict(padded_review)

print(f"Prediction: {prediction[0]}")

if prediction >= 0.5:
    print("Positive review!")
else:
    print("Negative review!")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
Prediction: [0.9734551]
Positive review!
