In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report

# Read the data
df = pd.read_csv("IMDB Dataset.csv")

# Create 'Category' column
df['Category'] = df['sentiment'].apply(lambda x: 1 if x == 'positive' else 0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(df['review'], df['Category'], test_size=0.2)

# Exercise-1: Random Forest Classifier
rf_pipeline = make_pipeline(CountVectorizer(), RandomForestClassifier(n_estimators=50, criterion='entropy'))
rf_pipeline.fit(X_train, y_train)
rf_y_pred = rf_pipeline.predict(X_test)
print("Random Forest Classification Report:\n", classification_report(y_test, rf_y_pred))

# Exercise-2: K-Nearest Neighbors Classifier
knn_pipeline = make_pipeline(CountVectorizer(), KNeighborsClassifier(n_neighbors=10, metric='euclidean'))
knn_pipeline.fit(X_train, y_train)
knn_y_pred = knn_pipeline.predict(X_test)
print("\nK-Nearest Neighbors Classification Report:\n", classification_report(y_test, knn_y_pred))

# Exercise-3: Multinomial Naive Bayes Classifier
nb_pipeline = make_pipeline(CountVectorizer(), MultinomialNB())
nb_pipeline.fit(X_train, y_train)
nb_y_pred = nb_pipeline.predict(X_test)
print("\nMultinomial Naive Bayes Classification Report:\n", classification_report(y_test, nb_y_pred))

Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.84      0.84      5071
           1       0.83      0.84      0.84      4929

    accuracy                           0.84     10000
   macro avg       0.84      0.84      0.84     10000
weighted avg       0.84      0.84      0.84     10000


K-Nearest Neighbors Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.65      0.65      5071
           1       0.64      0.65      0.65      4929

    accuracy                           0.65     10000
   macro avg       0.65      0.65      0.65     10000
weighted avg       0.65      0.65      0.65     10000


Multinomial Naive Bayes Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.87      0.85      5071
           1       0.86      0.82      0.84      4929

    accuracy                           0.85     10