In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [7]:
# Load dataset
data = pd.read_csv('C:/Users/MMP/anaconda3/IMDB Dataset.csv')
X = data['review']
y = data['sentiment'].apply(lambda x: 1 if x == 'positive' else 0)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [10]:
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)


In [11]:
y_pred = model.predict(X_test_tfidf)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))


Accuracy: 0.8941
              precision    recall  f1-score   support

           0       0.91      0.88      0.89      4961
           1       0.88      0.91      0.90      5039

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000



In [12]:
import joblib
joblib.dump(model, 'sentiment_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')


['tfidf_vectorizer.pkl']

In [13]:
def predict_sentiment(review):
    model = joblib.load('sentiment_model.pkl')
    vectorizer = joblib.load('tfidf_vectorizer.pkl')
    review_tfidf = vectorizer.transform([review])
    prediction = model.predict(review_tfidf)
    return 'Positive' if prediction[0] == 1 else 'Negative'


In [20]:
review = input("Enter a movie review: ")
sentiment = predict_sentiment(review)
print(f"The sentiment of the review is: {sentiment}")


Enter a movie review:  titanic


The sentiment of the review is: Positive
