In [None]:
!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!tar -xzf aclImdb_v1.tar.gz


In [None]:
import os
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import joblib

In [None]:
def load_reviews(path):
    data = []
    labels = []
    for label in ['pos', 'neg']:
        dir_path = os.path.join(path, label)
        for file in os.listdir(dir_path):
            with open(os.path.join(dir_path, file), encoding='utf-8') as f:
                data.append(f.read())
                labels.append(1 if label == 'pos' else 0)
    return data, labels

In [None]:
train_data_path = "aclImdb/train"
X, y = load_reviews(train_data_path)

In [None]:
# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:

# Step 4: Pipeline (TF-IDF + Logistic Regression)
model = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=10000)),
    ('clf', LogisticRegression())
])


In [None]:
model.fit(X_train, y_train)
print("Accuracy:", model.score(X_test, y_test))

In [None]:
def predict_review_sentiment():
    review = input("Enter a review: ")
    prediction = model.predict([review])[0]
    sentiment = "Positive 😊" if prediction == 1 else "Negative 😞"
    print(f"Sentiment: {sentiment}")
predict_review_sentiment()

In [None]:
joblib.dump(model, 'review_classifier.pkl')