# Sentiment Analyzer for News Quotes

1. Read cleaned data into a pandas dataframe
2. Pass quotes from each article into sentiment analyzer
3. Save output into new columns 'negative', 'neutral', 'positive', 'compound'
4. Save output into new excel sheet with two sheets, one for quotes and one for non-quotes

In [1]:
# run this code if connecting to a Google drive
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/126.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [23]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

## Extracting Data from Excel Files

In [4]:
# replace with quotes_input.xlsx
fp = '/content/drive/My Drive/evaluation_quotes/test_CBC_20.xlsx'

quotes_df = pd.read_excel(fp, usecols = ["text_id", "text_name", "quote", "speaker", "verb"])

non_quotes_df = pd.read_excel(fp, usecols = ["text_id", "text_name", "non_quoted_text"])

In [8]:
# add new columns to dataframes
quotes_df['negative'] = pd.Series(dtype='str')
quotes_df['neutral'] = pd.Series(dtype='str')
quotes_df['positive'] = pd.Series(dtype='str')
quotes_df['compound'] = pd.Series(dtype='str')

non_quotes_df['negative'] = pd.Series(dtype='str')
non_quotes_df['neutral'] = pd.Series(dtype='str')
non_quotes_df['positive'] = pd.Series(dtype='str')
non_quotes_df['compound'] = pd.Series(dtype='str')

## Running Quotes through VADER

Vader Documentation: https://github.com/cjhutto/vaderSentiment

Guide to using Vader: https://medium.com/@rslavanyageetha/vader-a-comprehensive-guide-to-sentiment-analysis-in-python-c4f1868b0d2e


In [31]:
# running sentiment analysis on quotes
for index, row in quotes_df.iterrows():
  # variables to hold list of sentiment scores
  neg_quotes = []
  neu_quotes = []
  pos_quotes = []
  compound_quotes = []

  quotes = row['quote'].split('\n')

  # create new analyzer for each row
  analyzer = SentimentIntensityAnalyzer()

  for quote in quotes:

    # to avoid analyzing empty lines
    if quote.strip():
      # get scores associated with each quote
      score = analyzer.polarity_scores(quote)

      neg_score = str(score['neg'])
      neu_score = str(score['neu'])
      pos_score = str(score['pos'])
      compound_score = str(score['compound'])

    else:
      neg_score = '\n'
      neu_score = '\n'
      pos_score = '\n'
      compound_score = '\n'

    # get scores associated with each quote and append to list
    neg_quotes.append(neg_score)
    neu_quotes.append(neu_score)
    pos_quotes.append(pos_score)
    compound_quotes.append(compound_score)

  # write list of scores as newline separated strings to sentiment columns
  quotes_df.loc[index, 'negative'] = '\n'.join(neg_quotes)
  quotes_df.loc[index, 'neutral']  = '\n'.join(neu_quotes)
  quotes_df.loc[index, 'positive']  = '\n'.join(pos_quotes)
  quotes_df.loc[index, 'compound']  = '\n'.join(compound_quotes)


In [32]:
quotes_df.head()

Unnamed: 0,text_id,text_name,quote,speaker,verb,negative,neutral,positive,compound
0,225d6e59c7f2348678db2c7121a5809e,CBC_18079,"""On the issues raised by the employer in this ...",Justice Kathryn Gregory\nGregory\nthe statemen...,wrote\nsaid\nsaid\nsaid\naccording to\nargued\...,0.188\n0.0\n0.0\n\n\n0.232\n0.0\n0.0\n\n\n0.0\...,0.776\n0.831\n1.0\n\n\n0.768\n0.93\n1.0\n\n\n0...,0.036\n0.169\n0.0\n\n\n0.0\n0.07\n0.0\n\n\n0.2...,-0.6883\n0.2584\n0.0\n\n\n-0.6632\n0.128\n0.0\...
1,25cb38bd3b25a94903b6dd298423dbdf,CBC_14882,the vehicle located off Birch Point Road in So...,A news release\nthe release,said\naccording to,0.0\n0.0\n\n\n\n,1.0\n1.0\n\n\n\n,0.0\n0.0\n\n\n\n,0.0\n0.0\n\n\n\n
2,29bfccacecc572566db7aea886fe3c28,CBC_16613,he likes the concept of the pedestrian corrido...,Pettigrew\nMayor DiManno\nthe town.\n\n\nDarre...,said\nsaid\naccording to\nsaid\nhighlighted\ns...,0.0\n0.0\n0.0\n0.0\n\n\n0.0\n0.0\n0.0\n0.0\n0.0,0.714\n1.0\n1.0\n1.0\n\n\n0.825\n0.803\n1.0\n0...,0.286\n0.0\n0.0\n0.0\n\n\n0.175\n0.197\n0.0\n0...,0.4215\n0.0\n0.0\n0.0\n\n\n0.7003\n0.4019\n0.0...
3,332daa7b43c5a159eb89340d9af25334,CBC_12784,Shohei Ohtani can opt out of his $700 million ...,a person familiar with the agreement\nthe pers...,told\nsaid\nreported\nsaid\nsaid\nreported\nre...,0.0\n0.099\n0.0\n0.0\n0.05\n0.0\n\n\n\n\n0.101...,1.0\n0.901\n1.0\n0.901\n0.95\n1.0\n\n\n\n\n0.8...,0.0\n0.0\n0.0\n0.099\n0.0\n0.0\n\n\n\n\n0.0\n0...,0.0\n-0.5267\n0.0\n0.4939\n-0.296\n0.0\n\n\n\n...
4,3f87d6483ff30767ac0040771e0e18c5,CBC_6356,that the death was initially considered suspic...,"Jared Buhler, who was the lead investigator fo...",testified\nsaid\nsaid\nsaid\nsaid\ntold\nnoted...,0.561\n0.0\n0.0\n0.213\n0.295\n0.239\n0.0\n0.0...,0.439\n1.0\n1.0\n0.787\n0.705\n0.761\n1.0\n1.0...,0.0\n0.0\n0.0\n0.0\n0.0\n0.0\n0.0\n0.0\n0.0\n0...,-0.7506\n0.0\n0.0\n-0.3491\n-0.7717\n-0.5267\n...


In [33]:
# running sentiment analysis on non_quotes
for index, row in non_quotes_df.iterrows():
  # variables to hold list of sentiment scores
  neg_texts = []
  neu_texts = []
  pos_texts = []
  compound_texts = []

  texts = row['non_quoted_text'].split('\n')

  # create new analyzer for each row
  analyzer = SentimentIntensityAnalyzer()

  for text in texts:

    # to avoid analyzing empty lines
    if text.strip():
      # get scores associated with each line of text
      score = analyzer.polarity_scores(text)

      neg_score = str(score['neg'])
      neu_score = str(score['neu'])
      pos_score = str(score['pos'])
      compound_score = str(score['compound'])

    else:
      neg_score = '\n'
      neu_score = '\n'
      pos_score = '\n'
      compound_score = '\n'

    # get scores associated with each quote and append to list
    neg_texts.append(neg_score)
    neu_texts.append(neu_score)
    pos_texts.append(pos_score)
    compound_texts.append(compound_score)

  # write list of scores as newline separated strings to sentiment columns
  non_quotes_df.loc[index, 'negative'] = '\n'.join(neg_texts)
  non_quotes_df.loc[index, 'neutral']  = '\n'.join(neu_texts)
  non_quotes_df.loc[index, 'positive']  = '\n'.join(pos_texts)
  non_quotes_df.loc[index, 'compound']  = '\n'.join(compound_texts)

In [34]:
non_quotes_df.head()

Unnamed: 0,text_id,text_name,non_quoted_text,negative,neutral,positive,compound
0,225d6e59c7f2348678db2c7121a5809e,CBC_18079,he New Brunswick government has lost its bid t...,0.219\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\...,0.781\n\n\n1.0\n\n\n1.0\n\n\n0.775\n\n\n1.0\n\...,0.0\n\n\n0.0\n\n\n0.0\n\n\n0.225\n\n\n0.0\n\n\...,-0.7096\n\n\n0.0\n\n\n0.0\n\n\n0.4404\n\n\n0.0...
1,25cb38bd3b25a94903b6dd298423dbdf,CBC_14882,Cape Breton Regional Police found a body near ...,0.0\n\n\n0.0\n\n\n0.122\n\n\n0.0\n\n\n0.194,1.0\n\n\n1.0\n\n\n0.751\n\n\n0.845\n\n\n0.73,0.0\n\n\n0.0\n\n\n0.127\n\n\n0.155\n\n\n0.076,0.0\n\n\n0.0\n\n\n0.0258\n\n\n0.5106\n\n\n-0.5719
2,29bfccacecc572566db7aea886fe3c28,CBC_16613,"anff Avenue's temporary pedestrian corridor, w...",0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n0...,1.0\n\n\n0.928\n\n\n0.952\n\n\n0.847\n\n\n0.69...,0.0\n\n\n0.072\n\n\n0.048\n\n\n0.153\n\n\n0.30...,0.0\n\n\n0.4019\n\n\n0.2263\n\n\n0.4404\n\n\n0...
3,332daa7b43c5a159eb89340d9af25334,CBC_12784,if either of two key executives is no longer i...,0.128\n\n\n0.0\n\n\n0.106\n\n\n0.0\n\n\n\n\n\n...,0.872\n\n\n1.0\n\n\n0.894\n\n\n1.0\n\n\n\n\n\n...,0.0\n\n\n0.0\n\n\n0.0\n\n\n0.0\n\n\n\n\n\n\n0....,-0.296\n\n\n0.0\n\n\n-0.357\n\n\n0.0\n\n\n\n\n...
4,3f87d6483ff30767ac0040771e0e18c5,CBC_6356,ay Three of a fatality inquiry into the deaths...,0.198\n\n\n0.129\n\n\n0.11\n\n\n0.153\n\n\n0.1...,0.802\n\n\n0.871\n\n\n0.821\n\n\n0.847\n\n\n0....,0.0\n\n\n0.0\n\n\n0.069\n\n\n0.0\n\n\n0.105\n\...,-0.8555\n\n\n-0.6486\n\n\n-0.3818\n\n\n-0.5574...


## Write Output to New Workbook

In [27]:
!pip install xlsxwriter

Collecting xlsxwriter
  Downloading XlsxWriter-3.2.0-py3-none-any.whl.metadata (2.6 kB)
Downloading XlsxWriter-3.2.0-py3-none-any.whl (159 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/159.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m153.6/159.9 kB[0m [31m5.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.9/159.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.0


In [35]:
# replace with 'quotes_sentiment.xlsx'
output = '/content/drive/My Drive/evaluation_quotes/CBC_20_sentiment.xlsx'

# create excel writer object to initialize new workbook
writer = pd.ExcelWriter(output, engine="xlsxwriter")

# write dataframes to different worksheets
quotes_df.to_excel(writer, sheet_name="quotes")
non_quotes_df.to_excel(writer, sheet_name="non_quotes")

# close the excel writer and output file
writer.close()