In [2]:
import nltk
nltk.download('movie_reviews')
from nltk.corpus import movie_reviews
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Load NLTK movie reviews
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)

# Convert to DataFrame
reviews = [" ".join(words) for words, label in documents]
labels = [label for words, label in documents]
df = pd.DataFrame({"review": reviews, "sentiment": labels})

# Split data
X_train, X_test, y_train, y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)

# TF-IDF vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train model
model = LogisticRegression(max_iter=200)
model.fit(X_train_tfidf, y_train)

# Predictions
y_pred = model.predict(X_test_tfidf)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Test on new samples
sample_texts = ["The movie was fantastic! I loved it.", "Worst film ever, waste of time."]
sample_tfidf = vectorizer.transform(sample_texts)
preds = model.predict(sample_tfidf)

for txt, pred in zip(sample_texts, preds):
    print(f"Review: {txt} --> Sentiment: {pred}")


[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\shiva\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\movie_reviews.zip.


Accuracy: 0.8375

Classification Report:
               precision    recall  f1-score   support

         neg       0.84      0.82      0.83       191
         pos       0.84      0.85      0.85       209

    accuracy                           0.84       400
   macro avg       0.84      0.84      0.84       400
weighted avg       0.84      0.84      0.84       400

Review: The movie was fantastic! I loved it. --> Sentiment: pos
Review: Worst film ever, waste of time. --> Sentiment: neg
