In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import imdb # To load the dataset
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import Embedding,Dense,SimpleRNN
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential,load_model

In [3]:
# Load the imdb dataset

max_features = 10000 # vocabulary size
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words = max_features
                                                   )

X_train.shape,X_test.shape,y_train.shape,y_test.shape

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


((25000,), (25000,), (25000,), (25000,))

In [5]:
word_index = imdb.get_word_index()
reverse_word_index = {value:key for key,value in word_index.items()}

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
model = load_model("imdb_model.h5")
model.summary()



In [12]:
max_length = 500

X_train = sequence.pad_sequences(X_train,max_length)
X_test = sequence.pad_sequences(X_test,max_length)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 142ms/step - accuracy: 0.7994 - loss: 0.4352
Test Accuracy: 0.8030


In [13]:
model.get_weights()

[array([[ 0.0440761 , -0.00258604,  0.02626052, ...,  0.1691861 ,
          0.06694745,  0.06318133],
        [ 0.00550455, -0.00324108, -0.00308204, ..., -0.0418093 ,
         -0.03623572, -0.01037657],
        [ 0.01041485, -0.0597724 ,  0.05800388, ...,  0.04311601,
          0.0142493 ,  0.03575646],
        ...,
        [-0.02204946, -0.0502841 ,  0.02216803, ...,  0.03107506,
          0.01700203,  0.00412816],
        [-0.06522708, -0.01863162, -0.01704538, ..., -0.02289821,
         -0.03450813, -0.01316915],
        [ 0.05545262, -0.00759305,  0.02657143, ..., -0.03108518,
          0.03435009,  0.00414048]], dtype=float32),
 array([[ 0.18932207, -0.01151362, -0.03193248, ..., -0.15279052,
          0.01748233, -0.00463294],
        [ 0.15151155,  0.13113658,  0.0759565 , ..., -0.0552667 ,
          0.08003363, -0.12430901],
        [-0.10784429, -0.13061441,  0.01528569, ...,  0.1018244 ,
         -0.17884305,  0.18982476],
        ...,
        [-0.0602604 ,  0.12346987,  0.1

In [36]:
def decode_review(encoded_version):
    return ' '.join([reverse_word_index.get(i-3,'?') for i in X_train[0]])

def preprocess_text(text):
    words = text.lower().split()
    encoded_review = [word_index.get(word, 2) + 3 for word in words]
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
    return padded_review

In [37]:
# Prediction function

def predict_sentiment(review):
    preprocessed_input = preprocess_text(text=review)
    prediction = model.predict(preprocessed_input)
    sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
    return sentiment, prediction[0][0]

In [38]:
sample_review = "Loved the movie! Great acting, strong emotions, powerful story, and beautiful music. Would definitely recommend it to friends!"

predict_sentiment(sample_review)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 217ms/step


('Positive', np.float32(0.99965644))