In [1]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.3 (from gradio)
  Downloading gradio_client-1.4.3-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [2]:
import pandas as pd
import numpy as np
import nltk
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
import pickle
import warnings
warnings.filterwarnings('ignore')

In [3]:
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [4]:
df = pd.read_json('http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz', lines=True, nrows=50000)

In [5]:
df['sentiment'] = df['overall'].apply(lambda x: 'positive' if x > 3 else 'negative' if x < 3 else 'neutral')

In [6]:
def process_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words("english"))
    tokens =  [token for token in tokens if token not in stop_words]
    lemma = WordNetLemmatizer()
    tokens =  [lemma.lemmatize(token) for token in tokens]
    return ' '.join(tokens)






In [7]:
df['preprocessedtext'] = df['reviewText'].apply(process_text)

In [8]:
label_map = {"negative":0, "positive":1, "neutral":2}

In [9]:
y = df['sentiment'].map(label_map)

In [22]:
from tensorflow.keras.utils import to_categorical

In [23]:
y = to_categorical(y)

In [26]:
y.shape

(50000, 3)

In [10]:
tfidf_vector = TfidfVectorizer(max_features=5000)

In [11]:
X = tfidf_vector.fit_transform(df['preprocessedtext']).toarray()

In [12]:
X.shape

(50000, 5000)

In [13]:
X = X.reshape(X.shape[0], 1,X.shape[1])

In [14]:
X[0][0]

array([0., 0., 0., ..., 0., 0., 0.])

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

In [28]:
model = Sequential(
    [
        LSTM(64, input_shape = (1,5000), return_sequences=True),
        LSTM(32),
        Dense(16, activation="relu"),
        Dropout(0.5),
        Dense(3, activation="softmax")

    ]
)

In [29]:
model.compile(optimizer="adam", loss = "categorical_crossentropy", metrics=['accuracy'])

In [30]:
model.summary()

In [31]:
model.fit(X_train, y_train, epochs=5,validation_split=0.2)

Epoch 1/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.8184 - loss: 0.6745 - val_accuracy: 0.8526 - val_loss: 0.4188
Epoch 2/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.8644 - loss: 0.3975 - val_accuracy: 0.8535 - val_loss: 0.4199
Epoch 3/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.8726 - loss: 0.3626 - val_accuracy: 0.8531 - val_loss: 0.4355
Epoch 4/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.8810 - loss: 0.3334 - val_accuracy: 0.8506 - val_loss: 0.4606
Epoch 5/5
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.8878 - loss: 0.3033 - val_accuracy: 0.8485 - val_loss: 0.4973


<keras.src.callbacks.history.History at 0x78d59415e980>

In [32]:
test_accuracy, test_loss = model.evaluate(X_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8457 - loss: 0.4865


In [48]:
def predict_sentiment(review):
  pro_text = process_text(review)
  vector = tfidf_vector.transform([pro_text]).toarray()
  vector = vector.reshape(1,1,5000)
  prediction_probs = model.predict(vector)[0]
  prediction_idx = np.argmax(prediction_probs)
  sentiment_map = {0:"negative", 1:"positive", 2:"neutral"}
  prediction = sentiment_map[prediction_idx]
  confidence = prediction_probs[prediction_idx]

  return {
        'Sentiment': prediction,
        'Confidence':confidence,
        'Processed Text': pro_text
    }

In [49]:
iface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(label="Enter Product Review", lines=5),
    outputs=gr.JSON(label="Prediction Results"),
    title="Product Review Sentiment Analyzer",
    description="Enter a product review to analyze its sentiment using an LSTM neural network.",
    examples=[
        ["This product exceeded my expectations. Great value for money!"],
        ["The quality is poor and it stopped working after a week."],
        ["Decent product but a bit overpriced for what it offers."]
    ]
)

# Launch the interface
iface.launch(debug=True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://969ac880225e5e0ec0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7861 <> https://969ac880225e5e0ec0.gradio.live


