In [1]:
! pip install kaggle



In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/



In [3]:
! kaggle datasets download -d lakshmi25npathi/imdb-dataset-of-50k-movie-reviews

Dataset URL: https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews
License(s): other
Downloading imdb-dataset-of-50k-movie-reviews.zip to /content
 19% 5.00M/25.7M [00:00<00:00, 27.0MB/s]
100% 25.7M/25.7M [00:00<00:00, 98.1MB/s]


In [4]:
import zipfile
zip_ref = zipfile.ZipFile('/content/imdb-dataset-of-50k-movie-reviews.zip','r')
zip_ref.extractall('/content')
zip_ref.close()

In [5]:
import numpy as np
import tensorflow
import pandas as pd

from tensorflow.keras.layers import Dense,Embedding,LSTM,Flatten
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os

In [6]:
df = pd.read_csv('/content/IMDB Dataset.csv')

In [7]:
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [8]:
df.shape

(50000, 2)

In [9]:
df['sentiment'].value_counts()

sentiment
positive    25000
negative    25000
Name: count, dtype: int64

In [10]:
df.replace({'sentiment' : {'positive' : 1 , 'negative' : 0}} , inplace=True)

In [11]:
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


In [30]:
train , test = train_test_split(df,test_size=0.2,random_state=42)

In [31]:
print(train.shape)
print(test.shape)

(40000, 2)
(10000, 2)


# Data Preprocessing

In [32]:
tokonizer = Tokenizer(num_words=5000)
tokonizer.fit_on_texts(train['review'])
X_train = pad_sequences(tokonizer.texts_to_sequences(train['review']) , maxlen=200)
X_test = pad_sequences(tokonizer.texts_to_sequences(test['review']) , maxlen=200)

In [33]:
print(X_train)

[[1935    1 1200 ...  205  351 3856]
 [   3 1651  595 ...   89  103    9]
 [   0    0    0 ...    2  710   62]
 ...
 [   0    0    0 ... 1641    2  603]
 [   0    0    0 ...  245  103  125]
 [   0    0    0 ...   70   73 2062]]


In [34]:
print(X_test)

[[   0    0    0 ...  995  719  155]
 [  12  162   59 ...  380    7    7]
 [   0    0    0 ...   50 1088   96]
 ...
 [   0    0    0 ...  125  200 3241]
 [   0    0    0 ... 1066    1 2305]
 [   0    0    0 ...    1  332   27]]


In [35]:
y_train = train['sentiment']
y_test = test['sentiment']

# Building the LSTM Model

In [46]:
model = Sequential()
model.add(Embedding(input_dim = 5000,output_dim=64,input_length=200))
model.add(LSTM(256,dropout=0.2,recurrent_dropout=0.2))
model.add(Dense(1,activation="sigmoid"))



In [47]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 200, 64)           320000    
                                                                 
 lstm_4 (LSTM)               (None, 256)               328704    
                                                                 
 dense_4 (Dense)             (None, 1)                 257       
                                                                 
Total params: 648961 (2.48 MB)
Trainable params: 648961 (2.48 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [48]:
# compile the model
model.compile(loss='binary_crossentropy',optimizer='adam',metrics = ['accuracy'])

In [49]:
model.fit(X_train,y_train,epochs=5,batch_size=64,validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7e76e65f9060>

# Model Evaluation

In [50]:
loss,accuracy = model.evaluate(X_test,y_test)



In [51]:
print("Test loss",loss)
print("Test accuracy",accuracy)

Test loss 0.3073202967643738
Test accuracy 0.8845000267028809


# Building the Prediction System

In [52]:
def predict_sentiment(review):
  #tokenize and pad the review
  sequence = tokonizer.texts_to_sequences([review])
  padded_sequence = pad_sequences(sequence,maxlen=200)
  prediction = model.predict(padded_sequence)
  sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
  return sentiment


In [53]:
! pip install gradio

Collecting gradio
  Downloading gradio-4.37.2-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.2 (from gradio)
  Downloading gradio_client-1.0.2-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.2/318.2 kB[0m [31m40.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━

In [54]:
import gradio as gr


demo = gr.Interface(fn=predict_sentiment, inputs="textbox", outputs="textbox")

demo.launch()  # Share your demo with just 1 extra parameter 🚀

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://4e2f79228447b272a1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


