In [157]:
import os
import re
import unicodedata
import numpy as np
import pandas as pd
import warnings
import nltk
from nltk.corpus import stopwords
import en_core_web_md
import matplotlib.pyplot as plt
import plotly.express as px
from nrclex import NRCLex
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from collections import defaultdict
from textblob import TextBlob
from nltk.tokenize import TreebankWordTokenizer
from nltk.tokenize import sent_tokenize
from nltk.corpus import cmudict
nltk.download("punkt")
from nltk.stem import WordNetLemmatizer
nltk.download ('wordnet')

[nltk_data] Downloading package punkt to /home/muddy/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/muddy/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
paths = ['./Data/']

speeches = []

for path in paths:
    list_of_files = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith('.txt'):
                list_of_files.append(os.path.join(root,file))
   
    for file in list_of_files:
        with open(file, encoding='utf-8') as f:
            text = f.read()
        f.close()
        speeches.append(text)

#clean out goofy unicode  space characters 
speeches = [unicodedata.normalize("NFKD", speech) for speech in speeches if len(speech)>0 ]
#clean out xa0 space characters
[speech.replace(u'\xa0', '') for speech in speeches]; # ; supresses output
# remove [stuff] in between square brackets
def remove_bracket(text):
    return re.sub(r'(\[[^w]*\]\s)', '',text)
speeches = [remove_bracket(speech) for speech in speeches]

# # lemmatize text with spacy
# nlp = spacy.load("en_core_web_md", exclude=["parser", "ner"])
# for i, doc in enumerate(nlp.pipe(speeches)):
#     words_lemmas_list = [token.lemma_ for token in doc]
#     speeches[i]= ' '.join(words_lemmas_list)
    
# Remove punctuation - only care about words here
# speeches = [re.sub(r'[^\w\s]', '', speech) for speech in speeches]

# remove numbers
# speeches = [re.sub(r'\d', '', speech) for speech in speeches]

# Clean up whitespace
speeches = [re.sub('[\s+]', ' ', speech) for speech in speeches]

df = pd.DataFrame({'filepath' : list_of_files,
                   'text' : speeches})
datetitle = pd.read_csv('datetitle.csv')
#datetitle.url = [file.replace('Data/', './Data/') for file in datetitle.url]
datetitle.date = pd.to_datetime(datetitle.date, format='%Y-%m-%d')
datetitle = datetitle.drop('title', axis=1)
datetitle = datetitle.rename(columns={'url': 'filepath'})
df = pd.merge(df, datetitle, how='inner', on='filepath')
df = df.sort_values(by='date', ignore_index=True)
df = df[['date', 'filepath', 'text']]
df['source'] = 'oba'
df.set_index('date', inplace=True)
text_df = df.copy()
text_df.head()

Unnamed: 0_level_0,filepath,text,source
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2002-10-02,./Data/barackobamairaqwarspeechfederalplaza.txt,Good afternoon. Let begin by saying that altho...,oba
2005-01-06,./Data/barackobamasenatespeechonohioelectoralv...,"Thank you very much, Mr. President; Ladies and...",oba
2005-10-25,./Data/barackobamasenatespeechrosaparks.txt,"Mr. President, today the nation mourns a genui...",oba
2005-12-15,./Data/barackobamasenatefloorspeechpatriotact.txt,"Thank you very much, Mr. President. You know, ...",oba
2006-01-31,./Data/barackobamasenatespeechoncorettascottki...,"Thank you very much, Mr. President. Today, we ...",oba


In [5]:
text_df['TBsubjectivity']=[TextBlob(text).sentiment.subjectivity for text in text_df['text']]
text_df['TBpolarity']=[TextBlob(text).sentiment.polarity for text in text_df['text']]

In [8]:
text_df[text_df.TBsubjectivity == text_df.TBsubjectivity.max()]

Unnamed: 0_level_0,filepath,text,source,TBsubjectivity,TBpolarity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-01-21,./Data/barackobamainauguralluncheontoast.txt,Michelle and the Speaker of the House came to ...,oba,0.601327,0.180204


In [9]:
text_df[text_df.TBsubjectivity == text_df.TBsubjectivity.min()]

Unnamed: 0_level_0,filepath,text,source,TBsubjectivity,TBpolarity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-03-19,./Data/barackobamalibyaltdmilitaryaction.txt,"Good afternoon, everybody. Today I authorized...",oba,0.291015,0.179076


In [10]:
text_df[text_df.TBpolarity == text_df.TBpolarity.max()]

Unnamed: 0_level_0,filepath,text,source,TBsubjectivity,TBpolarity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-11-10,./Data/barackobamadonaldtrumpmeeting.txt,"Well, I just had the opportunity to have an ex...",oba,0.506327,0.431944


In [11]:
text_df[text_df.TBpolarity == text_df.TBpolarity.min()]

Unnamed: 0_level_0,filepath,text,source,TBsubjectivity,TBpolarity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-02-26,./Data/barackobamarosaparksstatue.txt,"Mr. Speaker, Leader Reid, Leader McConnell, Le...",oba,0.486788,-0.008304


In [15]:
fig = px.line(text_df, x=text_df.index, y="TBsubjectivity", hover_name=text_df.index,
             title='TextBlob subjectivity')
fig.show()

In [156]:
plot_df = text_df.iloc[15:]
fig = px.line(plot_df, x=plot_df.index, y="TBpolarity", hover_name=plot_df.index,
             title='TextBlob polarity')
fig.show()

In [21]:
fig.write_image("./plots/textblob_polarity_oba_amrhet.png", format='png', engine='kaleido')

In [47]:
text_df.sort_values(by='TBpolarity').tail(3)

Unnamed: 0_level_0,filepath,text,source,TBsubjectivity,TBpolarity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-02-13,./Data/barackobamaantoninscaliapassing.txt,"Good evening, everybody. For almost 30 years,...",oba,0.576438,0.333186
2016-06-09,./Data/barackobamahillaryclintonpresidentialen...,"For more than a year now, across thousands of ...",oba,0.50619,0.358175
2016-11-10,./Data/barackobamadonaldtrumpmeeting.txt,"Well, I just had the opportunity to have an ex...",oba,0.506327,0.431944


In [52]:
most_pos = ['./Data/barackobamaantoninscaliapassing.txt',
            './Data/barackobamahillaryclintonpresidentialendorsement.txt',
            './Data/barackobamadonaldtrumpmeeting.txt']
# first count max number of sentences
count = []
for i, doc in enumerate(most_pos):
    sentences = sent_tokenize(text_df.query('filepath==@doc').text[0])
    count.append(len(sentences))
max_count = max(count)
speeches = pd.DataFrame(index=range(max_count))

for i, doc in enumerate(most_pos):
    sentences = sent_tokenize(text_df.query('filepath==@doc').text[0])
    col = 'polarity'+str(i)
    polarity = [TextBlob(text).sentiment.polarity for text in sentences]
    polarity = polarity + [0]*(max_count-len(sentences))
    speeches[col] = polarity

In [53]:
speeches.mean()

polarity0    0.212879
polarity1    0.228625
polarity2    0.141026
dtype: float64

In [54]:
speeches.std()

polarity0    0.299185
polarity1    0.309561
polarity2    0.282095
dtype: float64

In [58]:
fig = px.line(speeches, x=speeches.index, y="polarity1",
             title='TextBlob polarity')
fig.show()

polarity0    0.75
polarity1    0.80
polarity2    1.00
dtype: float64

In [85]:
most_pos = pd.DataFrame(text_df.filepath)
most_pos.set_index(text_df.index)
# first count max number of sentences
count = []
for i, doc in enumerate(most_pos.filepath):
    sentences = sent_tokenize(text_df.query('filepath==@doc').text[0])
    count.append(len(sentences))
max_count = max(count)
allspeeches = pd.DataFrame(index=range(max_count))

for i, doc in enumerate(most_pos.filepath):
    sentences = sent_tokenize(text_df.query('filepath==@doc').text[0])
    #col = 'polarity'+str(i)
    col = str(most_pos.index[i])[:10]
    polarity = [TextBlob(text).sentiment.polarity for text in sentences]
    polarity = polarity + [0]*(max_count-len(sentences))
    allspeeches[col] = polarity


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

In [95]:
allspeeches.mean().sort_values()

2013-02-26   -0.001724
2002-10-02   -0.000784
2011-04-26    0.001042
2009-01-05    0.001369
2012-09-12    0.001476
                ...   
2014-01-29    0.096669
2011-12-06    0.105179
2015-01-21    0.106200
2013-07-24    0.124776
2015-07-28    0.127265
Length: 405, dtype: float64

In [94]:
top_3 = allspeeches.mean().sort_values().tail(3).index
top_3

Index(['2015-01-21', '2013-07-24', '2015-07-28'], dtype='object')

In [93]:
text_df.loc[top_3]

Unnamed: 0,filepath,text,source,TBsubjectivity,TBpolarity
2015-01-21,./Data/stateoftheunion2015.txt,"Mr. Speaker, Mr. Vice President, Members of Co...",oba,0.448952,0.158219
2013-07-24,./Data/barackobamaknoxcollegeeconomy.txt,"Hello, Galesburg! Well, it’s good to be home ...",oba,0.439958,0.165498
2015-07-28,./Data/barackobamaafricancontinentreps.txt,Thank you. Thank you so much. Madam Chairwoma...,oba,0.433907,0.198884


In [102]:
allspeeches['2015-07-28']

0.127264509947045

In [147]:
plot_this= pd.DataFrame(allspeeches.loc[0:342, '2015-07-28'])
fig = px.line(plot_this, x=plot_this.index, y="2015-07-28",
              labels={'2015-07-28':'polarity'},
             title='TextBlob polarity: 2015 African Union speech')
fig.show()

In [148]:
fig.write_image("./plots/textblob_polarity_african_union_speech.png", format='png', engine='kaleido')

In [120]:
allspeeches['2015-07-28'].sort_values()

24    -0.80
29    -0.50
166   -0.50
236   -0.50
263   -0.35
       ... 
287    0.85
253    0.85
235    0.85
3      1.00
114    1.00
Name: 2015-07-28, Length: 428, dtype: float64

In [117]:
sent_tokenize(text_df.query('filepath=="./Data/barackobamaafricancontinentreps.txt"').text[0])[24]

'Dignity was seen as a virtue reserved to those of rank and privilege, kings and elders.'

In [122]:
sent_tokenize(text_df.query('filepath=="./Data/barackobamaafricancontinentreps.txt"').text[0])[114]

'History shows that the nations that do best are the ones that invest in the education of their people.'

In [200]:
text_df['nrc'] = [NRCLex(speech).affect_frequencies for speech in text_df.text]
# remove 'anticip'
for i, dct in enumerate(text_df.nrc):
    dctn = dct.pop('anticip')
    text_df.nrc.loc[i] = dct

In [204]:
columns = ['fear', 'anger', 'trust', 'surprise', 'positive', 'negative',
           'sadness', 'disgust', 'joy', 'anticipation']

In [216]:
text_df[columns] = [list(dct.values()) for dct in text_df.nrc]

In [218]:
# Compare TextBlob to NRCLex
text_df.corr(numeric_only=True)

Unnamed: 0,TBsubjectivity,TBpolarity,fear,anger,trust,surprise,positive,negative,sadness,disgust,joy,anticipation
TBsubjectivity,1.0,0.307927,-0.134265,-0.039109,0.024463,0.137095,-0.078407,-0.125458,0.044761,0.095737,0.298766,0.020468
TBpolarity,0.307927,1.0,-0.513125,-0.503859,0.411718,0.290082,0.43646,-0.575041,-0.428331,-0.405011,0.552948,0.392072
fear,-0.134265,-0.513125,1.0,0.714736,-0.552972,-0.366343,-0.620553,0.670347,0.387517,0.468183,-0.654472,-0.668047
anger,-0.039109,-0.503859,0.714736,1.0,-0.63128,-0.279816,-0.714942,0.716355,0.499507,0.579551,-0.600012,-0.622592
trust,0.024463,0.411718,-0.552972,-0.63128,1.0,-0.065755,0.535617,-0.550722,-0.543114,-0.524052,0.30962,0.270503
surprise,0.137095,0.290082,-0.366343,-0.279816,-0.065755,1.0,0.020387,-0.381353,-0.197942,-0.052437,0.385424,0.441185
positive,-0.078407,0.43646,-0.620553,-0.714942,0.535617,0.020387,1.0,-0.771317,-0.686729,-0.644578,0.417397,0.421034
negative,-0.125458,-0.575041,0.670347,0.716355,-0.550722,-0.381353,-0.771317,1.0,0.704629,0.513586,-0.713103,-0.625609
sadness,0.044761,-0.428331,0.387517,0.499507,-0.543114,-0.197942,-0.686729,0.704629,1.0,0.481453,-0.424974,-0.456612
disgust,0.095737,-0.405011,0.468183,0.579551,-0.524052,-0.052437,-0.644578,0.513586,0.481453,1.0,-0.380394,-0.470532


In [219]:
text_df.mean(numeric_only=True)

TBsubjectivity    0.464998
TBpolarity        0.148944
fear              0.083926
anger             0.056321
trust             0.161268
surprise          0.043152
positive          0.259467
negative          0.116806
sadness           0.054008
disgust           0.025886
joy               0.091556
anticipation      0.107609
dtype: float64

In [235]:
text_df[['trust', 'joy', 'anticipation']].sum(axis=1).sort_values()

date
2014-09-24    0.171745
2002-10-02    0.174419
2012-12-19    0.219512
2011-03-18    0.220280
2013-09-22    0.220690
                ...   
2016-02-20    0.495238
2015-12-10    0.503778
2016-02-13    0.521739
2010-03-13    0.523438
2016-11-10    0.571429
Length: 433, dtype: float64

In [236]:
# top 3 most hopefull, assuming "hope" is a combiation of trust, joy and anticipation
top_3_h = text_df[['trust', 'joy', 'anticipation']].sum(axis=1).sort_values().tail(3).index
text_df.loc[top_3_h]

Unnamed: 0_level_0,filepath,text,source,TBsubjectivity,TBpolarity,nrc,fear,anger,trust,surprise,positive,negative,sadness,disgust,joy,anticipation
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2016-02-13,./Data/barackobamaantoninscaliapassing.txt,"Good evening, everybody. For almost 30 years,...",oba,0.576438,0.333186,"{'fear': 0.050724637681159424, 'anger': 0.0144...",0.050725,0.014493,0.246377,0.021739,0.304348,0.043478,0.043478,0.0,0.137681,0.137681
2010-03-13,./Data/barackobamanochildleftbehind.txt,Lost in the news of the week was a headline th...,oba,0.37685,0.138842,"{'fear': 0.03515625, 'anger': 0.01953125, 'tru...",0.035156,0.019531,0.21875,0.03125,0.304688,0.042969,0.023438,0.019531,0.128906,0.175781
2016-11-10,./Data/barackobamadonaldtrumpmeeting.txt,"Well, I just had the opportunity to have an ex...",oba,0.506327,0.431944,"{'fear': 0.0, 'anger': 0.0, 'trust': 0.2619047...",0.0,0.0,0.261905,0.095238,0.261905,0.047619,0.02381,0.0,0.142857,0.166667
