In [2]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.26.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, f1_score
import nltk
import gradio as gr
import re
from nltk.corpus import stopwords
nltk.download('stopwords')

# Load CSV dataset
df = pd.read_csv('/content/IMDB Dataset.csv')  # make sure the path is correct

# Convert sentiment labels to binary
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

# Preprocess function
def preprocess(text):
    text = str(text).lower()
    text = re.sub(r'<.*?>', '', text)  # remove HTML tags
    text = re.sub(r'[^a-z\s]', '', text)  # remove punctuation and numbers
    tokens = text.split()
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(tokens)

# Apply preprocessing
df['clean_review'] = df['review'].apply(preprocess)

# Features and Labels
X = df['clean_review']
y = df['sentiment']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train models
models = {
    "Logistic Regression": LogisticRegression(),
    "Naive Bayes": MultinomialNB(),
    "SVM": LinearSVC()
}

# Evaluate models
for name, model in models.items():
    model.fit(X_train_vec, y_train)
    preds = model.predict(X_test_vec)
    print(f"--- {name} ---")
    print("Accuracy:", accuracy_score(y_test, preds))
    print("F1 Score:", f1_score(y_test, preds), "\n")

# Use best model
best_model = models["Logistic Regression"]

# Gradio Interface
def predict_sentiment(review):
    review = preprocess(review)
    review_vec = vectorizer.transform([review])
    pred = best_model.predict(review_vec)[0]
    return "Positive 😊" if pred == 1 else "Negative 😞"

gr.Interface(fn=predict_sentiment,
             inputs="textbox",
             outputs="text",
             title="Movie Review Sentiment Analyzer",
             description="Enter a movie review to see if it's positive or negative.").launch()


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


--- Logistic Regression ---
Accuracy: 0.8876
F1 Score: 0.8899118511263467 

--- Naive Bayes ---
Accuracy: 0.8506
F1 Score: 0.8520792079207921 

--- SVM ---
Accuracy: 0.8797
F1 Score: 0.8818387191827914 

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://68b4bf77ae0c87865e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [6]:
# Test reviews manually
sample_reviews = [
    "I absolutely loved this movie. It was brilliant and touching!",
    "This was the worst film I've ever seen. Complete waste of time.",
    "An average movie with some good moments, but mostly boring.",
    "Great performances and a solid storyline. Would watch again!",
    "Terrible acting and bad direction. Not recommended at all."
]

for review in sample_reviews:
    print(f"Review: {review}")
    print(f"Prediction: {predict_sentiment(review)}")
    print("-" * 50)


Review: I absolutely loved this movie. It was brilliant and touching!
Prediction: Positive 😊
--------------------------------------------------
Review: This was the worst film I've ever seen. Complete waste of time.
Prediction: Negative 😞
--------------------------------------------------
Review: An average movie with some good moments, but mostly boring.
Prediction: Negative 😞
--------------------------------------------------
Review: Great performances and a solid storyline. Would watch again!
Prediction: Positive 😊
--------------------------------------------------
Review: Terrible acting and bad direction. Not recommended at all.
Prediction: Negative 😞
--------------------------------------------------
