# Sentimental Analysis

In [1]:
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import warnings
warnings.filterwarnings('ignore')

[nltk_data] Downloading package stopwords to C:\Users\Harsha
[nltk_data]     S\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
#load data
data=pd.read_csv(r"D:\Excelr\ananya\NLP-food-review.csv")
data

Unnamed: 0,review,reaction
0,Service is friendly and inviting.,1
1,Awesome service and food.,1
2,Waitress was a little slow in service.,0
3,"Come hungry, leave happy and stuffed!",1
4,Horrible - don't waste your time and money.,0
...,...,...
995,This was my first time and I can't wait until ...,1
996,Great service and food.,1
997,I paid the bill but did not tip because I felt...,0
998,The one down note is the ventilation could use...,0


In [3]:
#check stop words in english
stopwords.words('english')

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [4]:
stemmer=PorterStemmer()

In [5]:
' '.join(['i','hate','you'])

'i hate you'

In [6]:
def preprocessing(text):
    text=text.lower()#converting to lower case
    text=re.sub('[^a-z ]','',text) #filtering a-z
    text=text.split() #splitting the review
    text=[stemmer.stem(word) for word in text if word not in stopwords.words('english')] #removing stop words
    text=" ".join(text) #joining review
    return text

In [7]:
#preprocessed data
preprocessing("i love the pizza")

'love pizza'

In [8]:
#storing preprocessed data in review
data['review']=data['review'].apply(preprocessing)

In [9]:
data

Unnamed: 0,review,reaction
0,servic friendli invit,1
1,awesom servic food,1
2,waitress littl slow servic,0
3,come hungri leav happi stuf,1
4,horribl dont wast time money,0
...,...,...
995,first time cant wait next,1
996,great servic food,1
997,paid bill tip felt server terribl job,0
998,one note ventil could use upgrad,0


In [10]:
#train test split
x_train,x_test,y_train,y_test=train_test_split(data['review'],data['reaction'],test_size=.20)

In [11]:
c=CountVectorizer()
x_train=c.fit_transform(x_train).toarray()
x_test=c.transform(x_test).toarray()

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [13]:
models=[DecisionTreeClassifier(),LogisticRegression()]

for model in models:
    model.fit(x_train,y_train)
    pred=model.predict(x_test)
    print(str(model))
    print(classification_report(y_test,pred))
    print('='*50)

DecisionTreeClassifier()
              precision    recall  f1-score   support

           0       0.67      0.78      0.72        97
           1       0.76      0.63      0.69       103

    accuracy                           0.70       200
   macro avg       0.71      0.71      0.70       200
weighted avg       0.71      0.70      0.70       200

LogisticRegression()
              precision    recall  f1-score   support

           0       0.73      0.79      0.76        97
           1       0.79      0.72      0.75       103

    accuracy                           0.76       200
   macro avg       0.76      0.76      0.75       200
weighted avg       0.76      0.76      0.75       200



# inference

In [14]:
#Input Review
text='the food is good'
text=preprocessing(text)
vector=c.transform([text]).toarray()

In [15]:
#predicting review
review=model.predict(vector)
if review==1:
    print('Positive Review')
else:
    print('Negative Review')

Positive Review
