In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv("IMDB Dataset.csv")
data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [3]:
data.shape

(50000, 2)

In [4]:
data["sentiment"].value_counts()

sentiment
positive    25000
negative    25000
Name: count, dtype: int64

In [5]:
# positive -> 1
# negative -> 0
data.replace({"sentiment": {"positive": 1, "negative": 0}}, inplace=True)
data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


In [6]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data, test_size = 0.25, random_state=42)
test_data.shape

(12500, 2)

In [7]:
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(num_words = 5000)
tokenizer.fit_on_texts(train_data["review"])

In [8]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

X_train = pad_sequences(tokenizer.texts_to_sequences(train_data["review"]), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data["review"]), maxlen=200)

In [9]:
X_train

array([[   0,    0,    0, ...,    1,   13,  190],
       [ 148,   48,    1, ...,  103,    3,  577],
       [   0,    0,    0, ...,  292,   29, 2104],
       ...,
       [   0,    0,    0, ..., 1614,    2,  593],
       [   0,    0,    0, ...,  246,  103,  125],
       [   0,    0,    0, ...,   70,   72, 2069]])

In [10]:
X_test.shape

(12500, 200)

In [11]:
Y_train = train_data["sentiment"]
Y_test = test_data["sentiment"]

In [12]:
Y_train

27434    0
13400    0
883      0
7303     0
45124    1
        ..
11284    1
44732    1
38158    0
860      1
15795    1
Name: sentiment, Length: 37500, dtype: int64

In [13]:
# LSTM MODEL BUILDING
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM

model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_shape=(200,)))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation="sigmoid"))
model.summary()

In [14]:
model.compile(optimizer = "adam", loss="binary_crossentropy", metrics=["accuracy"])

In [15]:
model.fit(X_train, Y_train, epochs = 5, batch_size = 64, validation_split = 0.2)

Epoch 1/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 361ms/step - accuracy: 0.7234 - loss: 0.5400 - val_accuracy: 0.7861 - val_loss: 0.4510
Epoch 2/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 358ms/step - accuracy: 0.8382 - loss: 0.3802 - val_accuracy: 0.8171 - val_loss: 0.3996
Epoch 3/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 339ms/step - accuracy: 0.8747 - loss: 0.3154 - val_accuracy: 0.8453 - val_loss: 0.3573
Epoch 4/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m158s[0m 337ms/step - accuracy: 0.8812 - loss: 0.2940 - val_accuracy: 0.8541 - val_loss: 0.3596
Epoch 5/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 365ms/step - accuracy: 0.8739 - loss: 0.3041 - val_accuracy: 0.8693 - val_loss: 0.3334


<keras.src.callbacks.history.History at 0x1ba35db82f0>

In [16]:
loss, accuracy = model.evaluate(X_test, Y_test)
print("Model Accuracy: {:.2f}%".format(accuracy*100))

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 62ms/step - accuracy: 0.8776 - loss: 0.3231
Model Accuracy: 87.49%


In [17]:
# Building Predictive System

def predictive_system(review):
  sequences = tokenizer.texts_to_sequences([review])
  padded_sequence = pad_sequences(sequences, maxlen=200)
  prediction = model.predict(padded_sequence)
  sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
  return sentiment

In [18]:
predictive_system("This movie was fantastic and amazing")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 370ms/step


'positive'

In [19]:
predictive_system("Overall long and slow, boring")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step


'negative'

In [20]:
model.save("model.h5")



In [21]:
import joblib
joblib.dump(tokenizer, "tokenizer.pkl")

['tokenizer.pkl']

In [22]:
from keras.models import load_model
import joblib
from tensorflow.keras.preprocessing.sequence import pad_sequences

model = load_model("model.h5")
tokenizer = joblib.load("tokenizer.pkl")

def predictive_system(review):
  sequences = tokenizer.texts_to_sequences([review])
  padded_sequence = pad_sequences(sequences, maxlen=200)
  prediction = model.predict(padded_sequence)
  sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
  return sentiment



In [23]:
review_sentiment = predictive_system("Beautiful cinematorgraphy")
review_sentiment

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 493ms/step


'positive'

In [25]:
pip install gradio

Collecting gradio
  Using cached gradio-4.38.1-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Using cached aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting altair<6.0,>=5.0 (from gradio)
  Using cached altair-5.3.0-py3-none-any.whl.metadata (9.2 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.1-py3-none-any.whl.metadata (26 kB)
Collecting ffmpy (from gradio)
  Using cached ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting gradio-client==1.1.0 (from gradio)
  Using cached gradio_client-1.1.0-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.19.3 (from gradio)
  Downloading huggingface_hub-0.24.0-py3-none-any.whl.metadata (13 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.6-cp312-none-win_amd64.whl.metadata (51 kB)
     ---------------------------------------- 0.0/51.6 kB ? eta -:--:--
     ------- -

In [26]:
import gradio as gr
title = "MOVIE SENTIMENT ANALYSIS APPLICATION"

app = gr.Interface(fn = predictive_system, inputs="textbox", outputs="textbox", title=title)

app.launch(share=True)

Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


