In [34]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [4]:
data = pd.read_csv("data/IMDB_Dataset.csv")

In [5]:
data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [9]:
# Label encoding
data['sentiment'] = data['sentiment'].map({'positive': 1, 'negative': 0})

In [12]:
data['sentiment'].head()

0    1
1    1
2    1
3    0
4    1
Name: sentiment, dtype: int64

In [10]:
# Split the data into target and features
X = data['review']
y = data['sentiment']
# Do a train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
tokenizer = Tokenizer(num_words = 5000)
tokenizer.fit_on_texts(X_train)

In [19]:
X_train = pad_sequences(tokenizer.texts_to_sequences(X_train), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(X_test), maxlen=200)

In [23]:
model = keras.Sequential([
    keras.layers.Embedding(input_dim=10000, output_dim=32, input_length=250),
    keras.layers.LSTM(64, return_sequences=True),
    keras.layers.LSTM(32),
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [24]:
model.fit(X_train, y_train, epochs=5, batch_size=64)

Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 139ms/step - accuracy: 0.7328 - loss: 0.5088
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 134ms/step - accuracy: 0.8920 - loss: 0.2740
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 132ms/step - accuracy: 0.9095 - loss: 0.2265
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 134ms/step - accuracy: 0.9247 - loss: 0.1953
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 130ms/step - accuracy: 0.9351 - loss: 0.1725


<keras.src.callbacks.history.History at 0x1f4121f6240>

In [25]:
def predict_review(review):
    encoded_review = tokenizer.texts_to_sequences([review])
    padded_review = pad_sequences(encoded_review, maxlen=250, padding='post', truncating='post')
    prediction = model.predict(padded_review)[0][0]
    return "Positive" if prediction > 0.5 else "Negative"

In [26]:
sample_review = "This movie was absolutely fantastic! The storyline was engaging and the characters were well developed."
print(predict_review(sample_review))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 499ms/step
Positive


In [30]:
# gradio interface
import gradio as gr

func = gr.Interface(
    fn=predict_review, 
    inputs=gr.Textbox(
    label="Enter your review here",
    lines=3,
    max_lines=5,
    interactive=True  
), 
    outputs=gr.Textbox(label='Review')
)

In [33]:
func.launch(share=True)

Rerunning server... use `close()` to stop if you need to change `launch()` parameters.
----

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


