# Text to Sentiment value converter

## Read file

In [1]:
import os
import numpy as np
import pandas as pd

inputFile = 'not_reviews.csv'        # enter original file name that include review text column
outputFile = 'not_reviews_NLP.csv'   # enter oupput file name that will add new sentiment features

In [4]:
# Add empty column columns
df = pd.read_csv(inputFile)
header_list_new = ['numSentence', 'numWords', 'totSentiment', 'avgSentiment', 'Sfreq0','Sfreq1','Sfreq2','Sfreq3','Sfreq4','Sfreq5']
for i, name in enumerate(header_list_new):
    df[name] = 0

In [3]:
df.head()

Unnamed: 0,date,friends,has_photo,localtion,photos,rating,restaurant_id,review,reviews,user_name,numSentence,numWords,totSentiment,avgSentiment,Sfreq0,Sfreq1,Sfreq2,Sfreq3,Sfreq4,Sfreq5
0,3/16/2018,0,False,"Frankfort, IL",0,1,617,"Well, if u just can't get enough of that ride ...",3,James E.,0,0,0,0,0,0,0,0,0,0
1,8/6/2017,0,False,"Wesley Chapel, FL",0,1,617,Ordered the Canarsie Chicken as a take-away. A...,1,Colin E.,0,0,0,0,0,0,0,0,0,0
2,11/11/2016,0,False,"Lakeland, FL",0,1,617,am sadden by my new pizza spot .ii am sad beca...,2,Yolanda B.,0,0,0,0,0,0,0,0,0,0
3,8/21/2016,24,True,"Clearwater, FL",0,1,617,"Never tried the pizza, the staff was overwhelm...",1,Luke B.,0,0,0,0,0,0,0,0,0,0
4,7/1/2016,1,True,"Naperville, IL",6,1,617,We arrived several hours after they opened and...,5,Heavenly K.,0,0,0,0,0,0,0,0,0,0


## Requirements to run Stanford Core NLP

#### Running Stanford Core NLP server
-Download NLP https://stanfordnlp.github.io/CoreNLP/index.html#license, and unzip
-Install Java

-Run Stanford Core NLP Server by typing below on a command prompt (Anaconda prompt) from the unzipped directory

cd Documents\Python Scripts\stanford-corenlp-full-2018-01-31

java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -annotators "tokenize,ssplit,pos,lemma,parse,sentiment" -port 9000 -timeout 30000


#### Package required
Install a wrapper, e.g. stanfordcorenlp https://pypi.python.org/pypi/stanfordcorenlp

(base) C:\Users\ML\Documents\Python Scripts>pip install stanfordcorenlp-3.8.0.1-py2.py3-none-any.whl

Reference: https://www.khalidalnajjar.com/setup-use-stanford-corenlp-server-python/

## Sentiment analysis function

In [4]:
from pycorenlp import StanfordCoreNLP
nlp = StanfordCoreNLP('http://localhost:9000')

# Function; Output = # sentence, # words, avg.sentimentValue, sentimentHist
def stanford_sentiment(text_str):
    res = nlp.annotate(text_str,
                   properties={
                       'annotators': 'sentiment',
                       'outputFormat': 'json',
                       'timeout': 30000,
                   })
    numSentence = len(res["sentences"])
    numWords = len(text_str.split())
    
    # data arrangement
    arraySentVal = np.zeros(numSentence)

    for i, s in enumerate(res["sentences"]):
        arraySentVal[i] = int(s["sentimentValue"])

    # sum of sentiment values for all sentences in a text/review
    totSentiment = sum(arraySentVal)

    # avg. of sentiment values for all sentences in a text/review
    avgSentiment = np.mean(arraySentVal)

    # frequency of sentimentValue in a text/review; {1 : Negative, 2 : Neutral, 3 : Positive}
    bins = [0,1,2,3,4,5,6]
    freq = np.histogram(arraySentVal, bins)[0]    # getting freq. only w/o bins

    return(numSentence, numWords, totSentiment, avgSentiment, freq)   

## Text to Sentiment Score conversion

In [5]:
%%time

# sentiment score calculation
# input = review text w/ removing '\n' characters in text (which does not affect the sentiment analysis much)
# output = number of sentences and words, 
#          sum of all sentiment score from each sentence in a review
#          avg of sentiment scores
#          hist.frequency values of sentiment score (0 to 5); {1 : Negative, 2 : Neutral, 3 : Positive}

dfLength = len(df)

for i in range(dfLength):
    try:
        numSentence, numWords, totSentiment, avgSentiment, freq = stanford_sentiment(df.review[i].replace('\n'," "))
        df.loc[i,'numSentence'] = numSentence
        df.loc[i,'numWords'] = numWords
        df.loc[i,'totSentiment'] = totSentiment
        df.loc[i,'avgSentiment'] = avgSentiment
        df.loc[i,'Sfreq0'] = freq[0]
        df.loc[i,'Sfreq1'] = freq[1]
        df.loc[i,'Sfreq2'] = freq[2]
        df.loc[i,'Sfreq3'] = freq[3]
        df.loc[i,'Sfreq4'] = freq[4]
        df.loc[i,'Sfreq5'] = freq[5]
    except:
        print("error where i =", i)

error where i = 9623
error where i = 10001
error where i = 15685
error where i = 16756
error where i = 23807
error where i = 25526
error where i = 25636
error where i = 26865
Wall time: 2h 6min 11s


In [6]:
df.review[9623]

"when you enter in side you see movie Star came here big list picture's on the wall wow\ndolly patron my favorite lady\nNicolas cage,\nJon voight ,\nPerter folk,\nLeonardo DiCaprio's,\nSorry LONG LIST  I don't remembers all  \nIf you like Indian food this is the place to enjoy real Indian food, that's by they had great Reviews all over the site's  \n\nFood is Excellent ( yes No greasy )\nChicken Tikka Masala is top of the Game\nLamb Rogan josh is very tender lamb Curry Dish\nGarlic Naan and Chili Naan very fresh\ndessert Gagar Halwa !!1\n\nIndian Movie playing on plasma TV ( Hindi Movie) Great !!!!\n\nStaff very helpful\nGo check out you self ( make sure friday and Satuday make a reservations get nice booth round Table)"

In [7]:
df.review[10001]

'Today we did a day trip from San Francisco to Carmel and stopped at Monterrey for lunch. We are a family of 5. Kids are 7, 4 and 2. After checking out all the different options at the Fisherman\'s Wharf... We decided to enter the "Old Fisherman\'s Grotto", not because of the outside presentation of the food but because at the entrance I saw crayons and paper and thought it would be a relaxed, family friendly restaurant... They sat us at the middle of the restaurant, which I thought not a good idea when you have children, but again, I thought they must be a very family friendly restaurant... Service was snappy and hurried, but we said nothing about it... We ordered food, kids were just drawing, and at some point the 2 year old wanted to play with his glass of water, we didn\'t let him do it and he cried loudly as he does... for about 1 minute... We did everything to settle him as soon as possible, we apologized to everyone around, and the hostess approached my husband asking him to sta

In [8]:
df.review[15685]

"This place is probably the best kbbq place in NorCal right now. I came on the 2nd day of their opening with 2 of my boys and it was pretty crowded and I had to wait for 45 minutes. The place is decorated nicely and has kind of a lounge feel to it. The staff was very attentive and like to check up on our orders a lot. Not sure if it's the norm but the way the meat came out was different than what I've ever done. They give you a menu and you order what you want and you get a portion enough for 2-3 people to be happy with and you continue that throughout until you're satisfied.\n\n  The good stuff :)\n\nGen premium steak: You should probably start out with this every time, it's a slab of steak with a thin layer of fat and if you cook it rare, it'll stay tender and delicious\n\nBeef bulgogi : Standard bulgogi, very good marinade\n\nMiso pork belly : super good, seems like they've marinaded it for a long time and it was super flavorful\n\nBlack angus brisket: very thinly sliced and is not 

In [9]:
df.review[16756]

'The Bear Rating: (out of 5)\n1) Taste: 4\n2) Presentation: 3\n3) Ambiance: 3 1/2\n4) Service: 1\n5) Creativity: 3 1/2\n\nWhat we ate:\nMongolian beef\n"Steak" sandwich\n\nPros:\n+A variety of food to select from (Thai, Chinese, Vietnamese, American, dessert, etc.)\n+Atmosphere is very calming\n\nCons:\n-It\'s always really quiet in restaurant, every time we eat there we feel like we\'re the only people talking\n-Staff is so so, have an uncaring air to them'

## Write output into a csv file

In [6]:
df.to_csv(outputFile, encoding='utf-8', index=False )