In [13]:
# !pip install pandas
# !pip install numpy
# !pip install scikit-learn




In [14]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

# Download necessary NLTK data
nltk.download('stopwords', quiet=True)

True

In [15]:
# Load the dataset
dataset = pd.read_csv('D:/navvspace/Danger/IBM Project/Restaurant_Reviews.tsv', delimiter='\t', quoting=3)
dataset.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [16]:
# Text preprocessing
corpus = []
for i in range(0, len(dataset)):
    review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

# Creating the Bag of Words model
cv = CountVectorizer(max_features=1500)
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values


In [17]:
# Splitting the dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    return {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred),
        'Recall': recall_score(y_true, y_pred),
        'F1 Score': f1_score(y_true, y_pred)
    }

# Dictionary to store results
results = {}

In [18]:
# Naive Bayes
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)
y_pred_nb = nb_classifier.predict(X_test)
results['Naive Bayes'] = calculate_metrics(y_test, y_pred_nb)

# Logistic Regression
lr_classifier = LogisticRegression(random_state=0)
lr_classifier.fit(X_train, y_train)
y_pred_lr = lr_classifier.predict(X_test)
results['Logistic Regression'] = calculate_metrics(y_test, y_pred_lr)

# SVM
svm_classifier = SVC(kernel='linear', random_state=0)
svm_classifier.fit(X_train, y_train)
y_pred_svm = svm_classifier.predict(X_test)
results['SVM'] = calculate_metrics(y_test, y_pred_svm)

# Random Forest
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=0)
rf_classifier.fit(X_train, y_train)
y_pred_rf = rf_classifier.predict(X_test)
results['Random Forest'] = calculate_metrics(y_test, y_pred_rf)

In [19]:
# Create metrics table
metrics_table = pd.DataFrame(results).T
metrics_table

Unnamed: 0,Accuracy,Precision,Recall,F1 Score
Naive Bayes,0.765,0.764151,0.786408,0.77512
Logistic Regression,0.71,0.758621,0.640777,0.694737
SVM,0.72,0.764045,0.660194,0.708333
Random Forest,0.715,0.819444,0.572816,0.674286


In [20]:
# Function to preprocess and predict new reviews
def predict_sentiment(review, classifier):
    review = re.sub('[^a-zA-Z]', ' ', review)
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    review = cv.transform([review]).toarray()
    prediction = classifier.predict(review)
    return "positive" if prediction[0] == 1 else "negative"


In [21]:
# Test the prediction function
test_review = "The food was amazing and the service was excellent!"
result = predict_sentiment(test_review, nb_classifier)
print(f"Test review: '{test_review}'")
print(f"Predicted sentiment: {result}")

Test review: 'The food was amazing and the service was excellent!'
Predicted sentiment: positive


In [26]:
# Interactive cell for user input
user_input = input("Enter a restaurant review (or 'quit' to exit): ")
result = predict_sentiment(user_input, nb_classifier)
print(f"Review: '{user_input}'")
print(f"Predicted sentiment: {result}")

Review: 'I got home to see the driest damn wings ever!'
Predicted sentiment: negative
