In [112]:
import pandas as pd
import numpy as np
import sklearn
import tensorflow
import warnings
warnings.filterwarnings("ignore")

In [114]:
data=pd.read_csv("IMDB_Dataset.csv")

In [115]:
data.head()


Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [118]:
data.shape

(50000, 2)

In [120]:
data.tail()


Unnamed: 0,review,sentiment
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative
49999,No one expects the Star Trek movies to be high...,negative


In [122]:
data["sentiment"].value_counts()

sentiment
positive    25000
negative    25000
Name: count, dtype: int64

In [124]:
data.replace({"sentiment":{"positive": 1, "negative": 0}},inplace=True)

In [126]:
data.head()


Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


In [128]:
!pip install tensorflow




In [129]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [130]:
 train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [131]:
train_data.shape


(40000, 2)

In [132]:
  test_data.shape

(10000, 2)

In [133]:
tokenizer=Tokenizer(num_words = 5000)
tokenizer.fit_on_texts(train_data["review"])


In [134]:
X_train= pad_sequences(tokenizer.texts_to_sequences(train_data["review"]),maxlen=200)
X_test= pad_sequences(tokenizer.texts_to_sequences(test_data["review"]),maxlen=200)

In [135]:
X_train

array([[1935,    1, 1200, ...,  205,  351, 3856],
       [   3, 1651,  595, ...,   89,  103,    9],
       [   0,    0,    0, ...,    2,  710,   62],
       ...,
       [   0,    0,    0, ..., 1641,    2,  603],
       [   0,    0,    0, ...,  245,  103,  125],
       [   0,    0,    0, ...,   70,   73, 2062]])

In [141]:
X_test

array([[   0,    0,    0, ...,  995,  719,  155],
       [  12,  162,   59, ...,  380,    7,    7],
       [   0,    0,    0, ...,   50, 1088,   96],
       ...,
       [   0,    0,    0, ...,  125,  200, 3241],
       [   0,    0,    0, ..., 1066,    1, 2305],
       [   0,    0,    0, ...,    1,  332,   27]])

In [142]:
Y_train = train_data["sentiment"]
Y_test = test_data["sentiment"]

In [143]:
Y_train

39087    0
30893    0
45278    1
16398    0
13653    0
        ..
11284    1
44732    1
38158    0
860      1
15795    1
Name: sentiment, Length: 40000, dtype: int64

In [144]:
Y_test

33553    1
9427     1
199      0
12447    1
39489    0
        ..
28567    0
25079    1
18707    1
15200    0
5857     1
Name: sentiment, Length: 10000, dtype: int64

In [162]:
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim= 128, input_length= 200))
model.add(LSTM(128, dropout=0.2, recurrent_dropout= 0.2))
model.add(Dense(1, activation = "sigmoid"))
model.build(input_shape=(None, 200)) 

In [164]:
model.summary()

In [166]:
model.compile(optimizer = "adam", loss= "binary_crossentropy", metrics=["accuracy"])

In [168]:
model.fit(X_train, Y_train, epochs=10, batch_size= 64, validation_split=0.2) 

Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 147ms/step - accuracy: 0.7225 - loss: 0.5249 - val_accuracy: 0.8357 - val_loss: 0.3895
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 161ms/step - accuracy: 0.8349 - loss: 0.3897 - val_accuracy: 0.8506 - val_loss: 0.3613
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 162ms/step - accuracy: 0.8581 - loss: 0.3442 - val_accuracy: 0.8431 - val_loss: 0.3646
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 154ms/step - accuracy: 0.8882 - loss: 0.2836 - val_accuracy: 0.8648 - val_loss: 0.3241
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 156ms/step - accuracy: 0.8974 - loss: 0.2560 - val_accuracy: 0.8620 - val_loss: 0.3493
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 155ms/step - accuracy: 0.9161 - loss: 0.2148 - val_accuracy: 0.8705 - val_loss: 0.3309
Epoch 7/10

<keras.src.callbacks.history.History at 0x23e4029de80>

In [172]:
loss, accuracy = model.evaluate(X_test, Y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 29ms/step - accuracy: 0.8685 - loss: 0.3579


In [174]:
print(loss)

0.35212358832359314


In [176]:
print(accuracy)


0.8712999820709229


In [211]:
def predictive_system(review):
    sequences=tokenizer.texts_to_sequences([review])
    padded_sequence = pad_sequences(sequences, maxlen=200)
    prediction = model.predict(padded_sequence)
    sentiment = "positive" if prediction[0][0]>0.5 else "negative"
    #return prediction
    return sentiment

In [213]:
predictive_system("This movie was very bad")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


'negative'

In [215]:
predictive_system("This movie was good and amazing performance by actors ")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


'positive'

In [217]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.0.1-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.25.2-py3-none-any.whl.metadata (13 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp312-none-win_amd64.whl.metadata (51 kB)
     ---------------------------------------- 0.0/51.7 kB ? eta -:--:--
     ---------------------------------------- 51.7/51.7 kB 1.3 MB/s eta 0:00:00
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipar

In [219]:
import gradio as gr
title="MOVIE REVIW SENTIMENT ANALYSIS"
app= gr.Interface(fn=predictive_system,inputs="textbox", outputs="textbox", title=title)
app.launch(share=True)



* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://f50aa7ab6b06e86d3b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


