# **Sentiment Analysis of Restaurant Reviews Project**


Random Forest

**Importing required libraries**

In [26]:
import pandas as pd

**Importing dataset for the Project**

In [27]:
from google.colab import drive
drive.mount('/content/drive/')
# dataset in drive
data = pd.read_csv("/content/drive/My Drive/data set/sentimentalreview_IBMpro.tsv", delimiter='\t', quoting=3)

data.head()

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


**Spliting the data into training and testing sets**

In [28]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data['Review'], data['Liked'], test_size=0.2, random_state=2)

**Vectorize the text data using TF-IDF Vectorizer**

In [29]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=1000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

**Creating a Random Forest classifier**

In [30]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(random_state=0, n_estimators=100)

**Training the final classifier on the training data**

In [31]:
rf_classifier.fit(X_train_tfidf, y_train)

# Predictions

**Predicting sentiment labels on the test data and train data using the final classifier**




In [35]:
y_train_pred = rf_classifier.predict(X_train_tfidf)
y_pred = rf_classifier.predict(X_test_tfidf)

**Evaluating the model**

In [36]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
training_accuracy = accuracy_score(y_train, y_train_pred)
training_precision = precision_score(y_train, y_train_pred)
training_recall = recall_score(y_train, y_train_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
# Print the scores
print(f'Training accuracy: {training_accuracy*100:.2f}%')
print(f'Training precision: {training_precision*100:.2f}%')
print(f'Training Recall: {training_recall*100:.2f}%')
print(f'Test accuracy: {accuracy*100:.2f}%')
print(f'Test precision: {precision*100:.2f}%')
print(f'Test Recall: {recall*100:.2f}%')

Training accuracy: 100.00%
Training precision: 100.00%
Training Recall: 100.00%
Test accuracy: 83.00%
Test precision: 80.21%
Test Recall: 83.70%


**Displaying a classification report for more detailed metrics**

In [37]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.82      0.84       108
           1       0.80      0.84      0.82        92

    accuracy                           0.83       200
   macro avg       0.83      0.83      0.83       200
weighted avg       0.83      0.83      0.83       200



# Support Vector Machine

In [38]:
from sklearn.svm import SVC


# Split the dataset into features (X) and target labels (y)
X = data['Review']
y = data['Liked']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

# Vectorize the text data using TF-IDF
tfidf_vectorizer = TfidfVectorizer(max_features=15000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Create and train a Support Vector Machine (SVM) classifier
svm_classifier = SVC(kernel='linear', C=1.2, random_state=2)
svm_classifier.fit(X_train_tfidf, y_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(X_test_tfidf)

accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)

print(f"Accuracy: {accuracy*100:.2f} %")
print(f"Recall: {recall*100:.2f} %")
print(f"Precision: {precision*100:.2f} %")

Accuracy: 84.00 %
Recall: 89.80 %
Precision: 80.00 %
