# Sentiment Analyzer for News Quotes

1. Read cleaned data into a pandas dataframe
2. Pass quotes from each article into sentiment analyzer
3. Save output into new columns 'negative', 'neutral', 'positive', 'compound'
4. Save output into new excel sheet with two sheets, one for quotes and one for non-quotes

In [None]:
# run this code if connecting to a Google drive
from google.colab import drive

drive.mount('/content/drive')

In [None]:
!pip install vaderSentiment

In [3]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

## Extracting Data from Excel Files

In [22]:
# replace with quotes_input.xlsx
fp = '/content/drive/My Drive/evaluation_quotes/quotes_input.xlsx'

quotes_df = pd.read_excel(fp, usecols = ["text_id", "text_name", "quote", "speaker", "verb"])

non_quotes_df = pd.read_excel(fp, usecols = ["text_id", "text_name", "non_quoted_text"])

In [23]:
# add new columns to dataframes
quotes_df['negative'] = pd.Series(dtype='float')
quotes_df['neutral'] = pd.Series(dtype='float')
quotes_df['positive'] = pd.Series(dtype='float')
quotes_df['compound'] = pd.Series(dtype='float')

non_quotes_df['negative'] = pd.Series(dtype='float')
non_quotes_df['neutral'] = pd.Series(dtype='float')
non_quotes_df['positive'] = pd.Series(dtype='float')
non_quotes_df['compound'] = pd.Series(dtype='float')

## Running Quotes through VADER

Vader Documentation: https://github.com/cjhutto/vaderSentiment

Guide to using Vader: https://medium.com/@rslavanyageetha/vader-a-comprehensive-guide-to-sentiment-analysis-in-python-c4f1868b0d2e

Vader sentiment analyzer returns a dictionary of sentiment intensity scores for
a particular text input with the following sentiments: negative, neutral,
positive, and compound for overall sentiment intensity. The negative, neutral,
and positive scores have a value from 0 to 1 and compound scores have a
value from -1 to 1, with -1 indicating entirely negative, 0 indicating
entirely neutral, and 1 indicating entirely positive.


In [24]:
# helper function to extract scores for each story
# args: dataframe, column name of text to be analyzed as a string
def get_sentiment_score(df, col):

  for index, row in df.iterrows():
    analyzer = SentimentIntensityAnalyzer()
    score = analyzer.polarity_scores(df[col][index])

    df.loc[index, 'negative'] = score['neg']
    df.loc[index, 'neutral'] = score['neu']
    df.loc[index, 'positive'] = score['pos']
    df.loc[index, 'compound'] = score['compound']

In [25]:
get_sentiment_score(quotes_df, 'quote')

In [None]:
quotes_df.head()

In [27]:
get_sentiment_score(non_quotes_df, 'non_quoted_text')

In [None]:
non_quotes_df.head()

In [None]:
# create a third dataframe to directly compare compound scores between quotes and non quotes
scores_comp_df = pd.merge(quotes_df[['text_id', 'text_name', 'compound']],
                          non_quotes_df[['text_id', 'text_name', 'compound']],
                          on=['text_id', 'text_name'],
                          suffixes=('_quotes', '_non_quotes'))

scores_comp_df.rename(columns={'compound_quotes': 'quote_score', 'compound_non_quotes': 'non_quote_score'}, inplace=True)

scores_comp_df.head()

## Write Output to New Workbook

In [None]:
!pip install xlsxwriter

In [30]:
# replace with 'quotes_sentiment.xlsx'
output = '/content/drive/My Drive/evaluation_quotes/quotes_sentiment.xlsx'

# create excel writer object to initialize new workbook
writer = pd.ExcelWriter(output, engine="xlsxwriter")

# write dataframes to different worksheets
quotes_df.to_excel(writer, sheet_name="quotes", index=False)
non_quotes_df.to_excel(writer, sheet_name="non_quotes", index=False)
scores_comp_df.to_excel(writer, sheet_name="scores_comp", index=False)

# close the excel writer and output file
writer.close()