In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
# nltk.download('all')
from nltk.corpus import stopwords
import string
import math
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
from sklearn.model_selection import learning_curve, GridSearchCV
from nltk.stem import PorterStemmer
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn

pd.set_option('display.max_columns', 10)
data = pd.read_csv('yelp.csv')
print(data.head())
print(data.columns)
print(data.dtypes)
print(data.describe(include='all'))
data['length'] = data['text'].apply(len)

x = data['text'][0:100]
y = data['stars'][0:100]

def text_process(review):
   nopunc = [word for word in review if word not in string.punctuation]
   nopunc = ''.join(nopunc)
   nonums = [word for word in nopunc if (word.isalpha()) | (word ==' ')]  ##
   nonums = ''.join(nonums)
   stop_words_english = stopwords.words('english')
   stop_words_english.extend(['use', 'us', 'make', 'also', 'tell'])
   no_stop_words = [word.lower() for word in nonums.split() if word.lower() not in stop_words_english]
   porter = PorterStemmer()
   stems = []
   for word in no_stop_words:
       stems.append(porter.stem(word))
   return stems

vector = CountVectorizer(analyzer=text_process).fit(x)
X = vector.transform(x)
print(X.toarray())
print("Shape of the sparse matrix: ", X.shape)
print("Non-Zero occurences: ", X.nnz)

# DENSITY OF THE MATRIX
density = (X.nnz/(X.shape[0]*X.shape[1]))*100
print("Density of the matrix = ", density)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
from sklearn.naive_bayes import MultinomialNB
mnb = MultinomialNB()
mnb.fit(x_train, y_train)
predmnb = mnb.predict(x_test)
print("Confusion Matrix for Multinomial Naive Bayes:")
print(confusion_matrix(y_test,predmnb))
print("\nScore:",round(accuracy_score(y_test,predmnb)*100,2))
print("\nClassification Report:\n",classification_report(y_test,predmnb))

# Decision Tree
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(x_train,y_train)
preddt = dt.predict(x_test)
print("Confusion Matrix for Decision Tree:")
print(confusion_matrix(y_test,preddt))
print("\nScore:",round(accuracy_score(y_test,preddt)*100,2))
print("\nClassification Report:\n",classification_report(y_test,preddt))

# K Nearest Neighbour Algorithm
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(x_train,y_train)
predknn = knn.predict(x_test)
print("Confusion Matrix for K Neighbors Classifier:")
print(confusion_matrix(y_test,predknn))
print("\nScore: ",round(accuracy_score(y_test,predknn)*100,2))
print("\nClassification Report:\n")
print(classification_report(y_test,predknn))

# XGBoost Classifier
import xgboost
from xgboost import XGBClassifier
xgb = XGBClassifier()
xgb.fit(x_train,y_train)
predxgb = xgb.predict(x_test)
print("Confusion Matrix for XGBoost Classifier:")
print(confusion_matrix(y_test,predxgb))
print("\nScore: ",round(accuracy_score(y_test,predxgb)*100,2))
print("\nClassification Report:\n")
print(classification_report(y_test,predxgb))

# POSITIVE REVIEW
pr = data['text'][0]
print(pr)
print("Actual Rating: ",data['stars'][0])
pr_t = vector.transform([pr])
print("Predicted Rating:")
print(knn.predict(pr_t)[0])

              business_id        date               review_id  stars  \
0  9yKzy9PApeiPPOUJEtnvkg  2011-01-26  fWKvX83p0-ka4JS3dc6E5A      5   
1  ZRJwVLyzEJq1VAihDhYiow  2011-07-27  IjZ33sJrzXqU-0X6U8NwyA      5   
2  6oRAC4uyJCsJl1X0WZpVSA  2012-06-14  IESLBzqUCLdSzSqm0eCSxQ      4   
3  _1QQZuf4zZOyFCvXc0o6Vg  2010-05-27  G-WvGaISbqqaMHlNnByodA      5   
4  6ozycU1RpktNG2-1BroVtw  2012-01-05  1uJFq2r5QfJG_6ExMRCaGw      5   

                                                text    type  \
0  My wife took me here on my birthday for breakf...  review   
1  I have no idea why some people give bad review...  review   
2  love the gyro plate. Rice is so good and I als...  review   
3  Rosie, Dakota, and I LOVE Chaparral Dog Park!!...  review   
4  General Manager Scott Petello is a good egg!!!...  review   

                  user_id  cool  useful  funny  
0  rLtl8ZkDX5vH5nAx9C3q5Q     2       5      0  
1  0a2KyEL0d3Yb1V6aivbIuQ     0       0      0  
2  0hT2KtfLiobPvh6cDC8JQg     0    

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


Confusion Matrix for XGBoost Classifier:
[[0 0 1]
 [0 6 5]
 [0 6 2]]

Score:  40.0

Classification Report:

              precision    recall  f1-score   support

           1       0.00      0.00      0.00         1
           4       0.50      0.55      0.52        11
           5       0.25      0.25      0.25         8

    accuracy                           0.40        20
   macro avg       0.25      0.27      0.26        20
weighted avg       0.38      0.40      0.39        20

My wife took me here on my birthday for breakfast and it was excellent.  The weather was perfect which made sitting outside overlooking their grounds an absolute pleasure.  Our waitress was excellent and our food arrived quickly on the semi-busy Saturday morning.  It looked like the place fills up pretty quickly so the earlier you get here the better.

Do yourself a favor and get their Bloody Mary.  It was phenomenal and simply the best I've ever had.  I'm pretty sure they only use ingredients from their g

In [3]:
import pandas as pd

pd.read_csv('yelp.csv').head()

Unnamed: 0,business_id,date,review_id,stars,text,type,user_id,cool,useful,funny
0,9yKzy9PApeiPPOUJEtnvkg,2011-01-26,fWKvX83p0-ka4JS3dc6E5A,5,My wife took me here on my birthday for breakf...,review,rLtl8ZkDX5vH5nAx9C3q5Q,2,5,0
1,ZRJwVLyzEJq1VAihDhYiow,2011-07-27,IjZ33sJrzXqU-0X6U8NwyA,5,I have no idea why some people give bad review...,review,0a2KyEL0d3Yb1V6aivbIuQ,0,0,0
2,6oRAC4uyJCsJl1X0WZpVSA,2012-06-14,IESLBzqUCLdSzSqm0eCSxQ,4,love the gyro plate. Rice is so good and I als...,review,0hT2KtfLiobPvh6cDC8JQg,0,1,0
3,_1QQZuf4zZOyFCvXc0o6Vg,2010-05-27,G-WvGaISbqqaMHlNnByodA,5,"Rosie, Dakota, and I LOVE Chaparral Dog Park!!...",review,uZetl9T0NcROGOyFfughhg,1,2,0
4,6ozycU1RpktNG2-1BroVtw,2012-01-05,1uJFq2r5QfJG_6ExMRCaGw,5,General Manager Scott Petello is a good egg!!!...,review,vYmM4KTsC8ZfQBg-j5MWkw,0,0,0
