# Add Sentiment Analysis Scores to Data Frame

---

### Import Modules & Read in Data Frame

In [41]:
import pandas as pd
from random import randrange
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Import module from a parent folder 
import os,sys,inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir) 
import project_functions

In [21]:
df = pd.read_csv('../csv/comments_metadata.csv')

---

In [26]:
df.comment.index[df.comment.isna()]

Int64Index([2614, 6544, 6668, 9166, 13485], dtype='int64')

In [27]:
# remove empty values
df = df.drop([2614, 6544, 6668, 9166, 13485])

In [28]:
df = df.reset_index()

In [29]:
df.drop(['level_0', 'index'], inplace=True, axis=1)

### Get polarity Scores

In [31]:
# Initialize analyzer
analyzer = SentimentIntensityAnalyzer()
# Apply analyzer to comments column and save as polarity_scores
df['polarity_scores'] = df['comment'].apply(analyzer.polarity_scores)

Because polarity scores are given as a dictionary we create a column for each individual score

In [34]:
df['pos'] = [score['pos'] for score in df['polarity_scores']]
df['neg'] = [score['neg'] for score in df['polarity_scores']]
df['neu'] = [score['neu'] for score in df['polarity_scores']]
df['compound'] = [score['compound'] for score in df['polarity_scores']]

Remove polarity_scores column

In [35]:
df = df.drop(['polarity_scores'], axis=1)

---

### Change Compound Score to Binary (pos/neg)

In [36]:
df['compound_binary'] = df['compound'].apply(lambda x: 0 if x < 0 else (1 if x > 0 else x))

---

### Save New Data Frame

In [50]:
# Convert to csv
df.to_csv('comments_metadata_sentiment.csv', index=False)

In [7]:
# # Convert to csv
# df.to_csv('sqr_comments_sentiment.csv', index=False)

---

### Get a random school's stats

Takes function defined in project_functions.py. This function displays a random small snapshot of a NYC k-8 school.   

In [48]:
def random_school(df):
    index = randrange(985)
    print('School: ' + str(df.loc[index]['dbn']))
    print('Borough: ' + str(df.loc[index]['borough']))
    print('Compound: ' + str(df.loc[index]['compound']))
    print('Pct Positive: ' + str(df.loc[index]['pos']))
    print('Pct Neutral: ' + str(df.loc[index]['neu']))
    print('Pct Negative: ' + str(df.loc[index]['neg']))

In [49]:
random_school(df)

School: 01M015
Borough: manhattan
Compound: 0.3612
Pct Positive: 0.116
Pct Neutral: 0.884
Pct Negative: 0.0
