<a href="https://colab.research.google.com/github/souradipta93/NLP/blob/main/sentiment_debates.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Sentiment analysis of debates

* Analyze debate texts of 2016 US elections pertaining to Trump and Clinton
* Come up with comparative sentiment scores


In [None]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [None]:
from matplotlib import pyplot as plt
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from collections import defaultdict
import seaborn as sns

pd.set_option('display.max_columns', None)

In [None]:
## Read the file
debates = pd.read_csv('debates_small.csv')
debates.columns

Index(['Line', 'Speaker', 'Text', 'Date', 'Party', 'Location', 'URL'], dtype='object')

In [None]:
##remove redundant columns
debate1 = debates.drop(['Line', 'Location', 'Date', 'URL'], axis=1)

In [None]:
#number of debates - trump vs clinton
debate1['Speaker'].value_counts()

Trump      878
Clinton    704
Name: Speaker, dtype: int64

In [None]:
## Function to extract sentiment scores
def score_speaker(speaker_df):
    df_dict = {}
    for i,response in speaker_df.iterrows():
        scorer = SentimentIntensityAnalyzer()
        scores = scorer.polarity_scores(response.Text)
        df_dict[i] = scores
    df = pd.DataFrame.from_dict(df_dict)
    df = df.T
    return df

In [None]:
## Create a dataframe with debate text
df1 = debate1[['Text']]
df1.head()

Unnamed: 0,Text
0,Thank you.
1,I'm running for president to knock down all th...
2,"Judy, I think that the best analysis that I've..."
3,I can only say that we both share the goal of ...
4,And why I am a staunch supporter of President ...


In [None]:
## Get sentiment scores using function
df_sent = score_speaker(df1)

In [None]:
df_sent

Unnamed: 0,neg,neu,pos,compound
0,0.000,0.286,0.714,0.3612
1,0.059,0.801,0.140,0.9522
2,0.000,0.924,0.076,0.8954
3,0.000,0.833,0.167,0.8225
4,0.052,0.852,0.097,0.8777
...,...,...,...,...
1577,0.000,1.000,0.000,0.0000
1578,0.000,0.870,0.130,0.2732
1579,0.000,0.783,0.217,0.3612
1580,0.000,1.000,0.000,0.0000


In [None]:
## Add speaker as a column and re-arrange columns
df_sent['Speaker'] = debate1['Speaker']
df_sent = df_sent.reindex(columns=['Speaker','neg','neu','pos', 'compound'])

In [None]:
df_sent.head()

Unnamed: 0,Speaker,neg,neu,pos,compound
0,Clinton,0.0,0.286,0.714,0.3612
1,Clinton,0.059,0.801,0.14,0.9522
2,Clinton,0.0,0.924,0.076,0.8954
3,Clinton,0.0,0.833,0.167,0.8225
4,Clinton,0.052,0.852,0.097,0.8777


In [None]:
#Mean score per speaker
df_sent.groupby('Speaker').mean()

Unnamed: 0_level_0,neg,neu,pos,compound
Speaker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Clinton,0.054351,0.814574,0.131087,0.301563
Trump,0.088413,0.784694,0.12689,0.100825
