In [None]:
!pip install nltk



In [4]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

In [5]:
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [6]:
stop_words = set(stopwords.words('english'))

In [7]:
def preprocess(text):
    if pd.isnull(text):
        return ""
    text = re.sub(r'<[^>]+>', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()
    tokens = word_tokenize(text)
    return ' '.join([t for t in tokens if t not in stop_words])

In [8]:
reviews = pd.read_csv("rotten_tomatoes_critic_reviews.csv")
movies = pd.read_csv("rotten_tomatoes_movies.csv")

In [9]:
df = reviews.merge(movies, on='rotten_tomatoes_link')

In [10]:
df = df.dropna(subset=['review_content', 'review_type'])

In [11]:
df['cleaned_review'] = df['review_content'].apply(preprocess)
df['label'] = df['review_type'].apply(lambda x: 1 if x == 'Fresh' else 0)

In [12]:
X = df['cleaned_review']
y = df['label']


In [13]:
tfidf = TfidfVectorizer(max_features=3000)
X_tfidf = tfidf.fit_transform(X)
model = LinearSVC()
model.fit(X_tfidf, y)

In [14]:
def predict_reviews_by_movie(movie_title):
    movie_reviews = df[df['movie_title'].str.lower() == movie_title.lower()]
    if movie_reviews.empty:
        print("Movie not found or no reviews available.")
        return

    for i, row in movie_reviews.iterrows():
        processed = preprocess(row['review_content'])
        vector = tfidf.transform([processed])
        prediction = model.predict(vector)[0]
        sentiment = "Fresh" if prediction == 1 else "Rotten"
        print(f"\n🔸 Review: {row['review_content']}\n➡️ Predicted Sentiment: {sentiment}")

In [15]:
movie_input = input("Enter a movie name: ")
predict_reviews_by_movie(movie_input)

Enter a movie name: The Godfather

🔸 Review: One of the central American movies of the last 25 years, and one of very few to succed as both popular entertainment and high art.
➡️ Predicted Sentiment: Fresh

🔸 Review: Examining the meaning of family, violence, love, betrayal, and loyalty just to name a few, its quality set a standard that few films have matched.
➡️ Predicted Sentiment: Fresh

🔸 Review: The years have been kind to this timeless Mafia epic, which seems particularly rich now that studio blockbusters no longer demonstrate this kind of care with character, atmosphere and storytelling.
➡️ Predicted Sentiment: Fresh

🔸 Review: All of the filmic arts - design, cinematography, editing, music - come together to forge what is, by any measure, one of the great movies of all time.
➡️ Predicted Sentiment: Fresh

🔸 Review: Coppola was signed to direct what was thought would be a pulp gangster film, but created one of the greatest epics ever.
➡️ Predicted Sentiment: Fresh

🔸 Review: Th