
# AAI612: Deep Learning & its Applications

*Notebook 6.4: How do you feel about that movie?*

<a href="https://colab.research.google.com/github/harmanani/AAI612/blob/main/Week6/Notebook6.4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

df = pd.DataFrame()
df = pd.read_csv('https://raw.githubusercontent.com/harmanani/AAI612/main/Week6/data/movie_data.csv', encoding='utf-8')
df.head(3)

Unnamed: 0,review,sentiment
0,I went and saw this movie last night after bei...,1
1,Actor turned director Bill Paxton follows up h...,1
2,As a recreational golfer with some knowledge o...,1


In [2]:
X_train = df.loc[:24999, 'review'].values
y_train = df.loc[:24999, 'sentiment'].values
X_test = df.loc[25000:, 'review'].values
y_test = df.loc[25000:, 'sentiment'].values

In [3]:
X = np.concatenate((X_train, X_test), axis=0)
y = np.concatenate((y_train, y_test), axis=0)

In [4]:
# summarize size
print("Training data: ")
print(X.shape)
print(y.shape)

Training data: 
(50000,)
(50000,)


In [6]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer_obj = Tokenizer()
total_reviews = X_train + X_test
tokenizer_obj.fit_on_texts(total_reviews) 

# pad sequences
max_length = 100 # try other options like mean

# define vocabulary size
vocab_size = len(tokenizer_obj.word_index) + 1

X_train_tokens =  tokenizer_obj.texts_to_sequences(X_train)
X_test_tokens = tokenizer_obj.texts_to_sequences(X_test)


X_train_pad = pad_sequences(X_train_tokens, maxlen=max_length, padding='post')
X_test_pad = pad_sequences(X_test_tokens, maxlen=max_length, padding='post')

In [7]:
print(vocab_size)

125602


In [8]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, GRU
from tensorflow.keras.layers import Embedding

EMBEDDING_DIM = 100

print('Build model...')

model = Sequential()
model.add(Embedding(vocab_size, EMBEDDING_DIM, input_length=max_length))
model.add(GRU(units=32,  dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print('Summary of the built model...')
print(model.summary())

Build model...
Summary of the built model...


2025-07-12 06:28:32.151450: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


None


In [9]:
print('Train...')

model.fit(X_train_pad, y_train, batch_size=128, epochs=25, validation_data=(X_test_pad, y_test), verbose=2)

Train...
Epoch 1/25
196/196 - 40s - 202ms/step - accuracy: 0.6851 - loss: 0.5762 - val_accuracy: 0.7844 - val_loss: 0.4719
Epoch 2/25
196/196 - 34s - 176ms/step - accuracy: 0.8521 - loss: 0.3608 - val_accuracy: 0.8335 - val_loss: 0.3814
Epoch 3/25
196/196 - 35s - 177ms/step - accuracy: 0.9129 - loss: 0.2318 - val_accuracy: 0.8356 - val_loss: 0.3981
Epoch 4/25
196/196 - 35s - 177ms/step - accuracy: 0.9489 - loss: 0.1457 - val_accuracy: 0.8297 - val_loss: 0.4381
Epoch 5/25
196/196 - 34s - 176ms/step - accuracy: 0.9713 - loss: 0.0886 - val_accuracy: 0.8245 - val_loss: 0.5268
Epoch 6/25
196/196 - 36s - 183ms/step - accuracy: 0.9847 - loss: 0.0481 - val_accuracy: 0.8208 - val_loss: 0.6036
Epoch 7/25
196/196 - 37s - 187ms/step - accuracy: 0.9928 - loss: 0.0262 - val_accuracy: 0.8190 - val_loss: 0.6951
Epoch 8/25
196/196 - 36s - 184ms/step - accuracy: 0.9950 - loss: 0.0167 - val_accuracy: 0.8149 - val_loss: 0.7616
Epoch 9/25
196/196 - 34s - 176ms/step - accuracy: 0.9973 - loss: 0.0093 - val_a

<keras.src.callbacks.history.History at 0x7f41fc1b5c90>

In [10]:
print('Testing...')
score, acc = model.evaluate(X_test_pad, y_test, batch_size=128)

print('Test score:', score)
print('Test accuracy:', acc)

print("Accuracy: {0:.2%}".format(acc))

Testing...
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 35ms/step - accuracy: 0.7493 - loss: 1.6692
Test score: 1.3684719800949097
Test accuracy: 0.7903599739074707
Accuracy: 79.04%


In [11]:
#Let us test some  samples
test_sample_1 = "This movie is fantastic! I really like it because it is so good!"
test_sample_2 = "Good movie!"
test_sample_3 = "Maybe I like this movie."
test_sample_4 = "Not to my taste, will skip and watch another movie"
test_sample_5 = "if you like action, then this movie might be good for you."
test_sample_6 = "Bad movie!"
test_sample_7 = "Not a good movie!"
test_sample_8 = "This movie really sucks! Can I get my money back please?"
test_samples = [test_sample_1, test_sample_2, test_sample_3, test_sample_4, test_sample_5, test_sample_6, test_sample_7, test_sample_8]

test_samples_tokens = tokenizer_obj.texts_to_sequences(test_samples)
test_samples_tokens_pad = pad_sequences(test_samples_tokens, maxlen=max_length)

#predict
model.predict(x=test_samples_tokens_pad)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 543ms/step


array([[9.9962837e-01],
       [8.8705337e-01],
       [8.5703921e-01],
       [2.8630244e-04],
       [4.5266222e-02],
       [1.1997067e-01],
       [7.7446914e-01],
       [2.0309440e-04]], dtype=float32)

In [12]:
#let us check how the model predicts
classes = model.predict(X_test_pad[:10], batch_size=128)
for i in range (0,10):
    if(classes[i] > 0.5 and y_test[i] == 1 or (classes[i] <= 0.5 and y_test[i] == 0)):
        print( classes[i], y_test[i], " Right prdiction")
    else :
        print( classes[i], y_test[i], " Wrong prdiction")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 525ms/step
[0.0004154] 1  Wrong prdiction
[0.0001244] 1  Wrong prdiction
[0.03361538] 1  Wrong prdiction
[0.06252406] 1  Wrong prdiction
[0.9998524] 1  Right prdiction
[0.9994927] 1  Right prdiction
[0.99995434] 1  Right prdiction
[5.123401e-07] 1  Wrong prdiction
[0.00582552] 1  Wrong prdiction
[0.99998116] 1  Right prdiction


In [13]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.preprocessing import sequence
from keras.layers import Dense, Embedding, LSTM, GRU
from tensorflow.keras.layers import Embedding

# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

max_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

print('Build model...')

model = Sequential()
model.add(Embedding(top_words, 100, input_length=max_words))
model.add(LSTM(32, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print(model.summary())

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step 
Build model...


None


In [14]:
print('Train...')

model.fit(X_train, y_train, batch_size=128, epochs=25, validation_data=(X_test, y_test), verbose=2)

Train...
Epoch 1/25
196/196 - 123s - 626ms/step - accuracy: 0.7418 - loss: 0.5201 - val_accuracy: 0.8230 - val_loss: 0.4039
Epoch 2/25
196/196 - 120s - 610ms/step - accuracy: 0.8183 - loss: 0.4106 - val_accuracy: 0.7210 - val_loss: 0.5443
Epoch 3/25
196/196 - 113s - 578ms/step - accuracy: 0.8100 - loss: 0.4181 - val_accuracy: 0.8390 - val_loss: 0.3774
Epoch 4/25
196/196 - 111s - 564ms/step - accuracy: 0.8566 - loss: 0.3438 - val_accuracy: 0.8326 - val_loss: 0.3865
Epoch 5/25
196/196 - 115s - 586ms/step - accuracy: 0.8676 - loss: 0.3244 - val_accuracy: 0.8392 - val_loss: 0.3773
Epoch 6/25
196/196 - 144s - 733ms/step - accuracy: 0.8727 - loss: 0.3095 - val_accuracy: 0.8506 - val_loss: 0.3639
Epoch 7/25
196/196 - 117s - 595ms/step - accuracy: 0.8858 - loss: 0.2846 - val_accuracy: 0.8483 - val_loss: 0.3713
Epoch 8/25
196/196 - 90s - 461ms/step - accuracy: 0.8872 - loss: 0.2817 - val_accuracy: 0.8504 - val_loss: 0.3698
Epoch 9/25
196/196 - 88s - 449ms/step - accuracy: 0.8853 - loss: 0.2858 

<keras.src.callbacks.history.History at 0x7f41547cb950>

In [15]:
score, acc = model.evaluate(X_test, y_test, batch_size=128)

print('Test score:', score)
print('Test accuracy:', acc)
print("Accuracy: %.2f%%" % (acc*100))

[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 95ms/step - accuracy: 0.8465 - loss: 0.5089 
Test score: 0.495782732963562
Test accuracy: 0.849399983882904
Accuracy: 84.94%


The time to train a GRU is less than LSTM network.