In [1]:
import glob
import json
import time
from src import tone_analyzer
import pandas as pd
from pandasql import sqldf

## Load text files and tone analyze by speech

In [82]:
# Use glob to loop through files
speeches = {}
for doc in glob.glob('./state_of_the_unions/*.txt'):
    with open(doc) as f:
        data = f.read().replace('\n', ' ')
        speeches[doc[22:29]] = data

In [87]:
# Run speeches through IBM Watson tone analyzer
tones = {}
for k, v in speeches.items():
    tone = tone_analyzer.tone_analyzer(text=v, prints=False)
    tones[k] = tone
    time.sleep(10)

In [91]:
# Create tones by year for easier future plotting
tones_year = {k[-4:]:v for k, v in tones.items()}

## Save speeches and tones as json's

In [94]:
with open('./data/speeches.json', 'w') as f:
    json.dump(speeches, f)
with open('./data/tones.json', 'w') as f:
    json.dump(tones, f)    
with open('./data/tones_year.json', 'w') as f:
    json.dump(tones_year, f)

## Save tones by year as csv

In [14]:
df = pd.read_json('./data/tones_year.json')
df = df.T
df.head()

Unnamed: 0,Anger,Disgust,Fear,Joy,Sadness
1961,0.144487,0.499208,0.548613,0.508618,0.553114
1962,0.139498,0.147811,0.169758,0.599229,0.557545
1963,0.131624,0.149008,0.541204,0.470642,0.569984
1964,0.477675,0.181133,0.133369,0.506948,0.56623
1965,0.132977,0.162332,0.14959,0.537658,0.571627


In [15]:
df.to_csv('./data/tones_year.csv')

## Make tall dataframe and save as csv

In [16]:
df_ind = df.reset_index()
df_ind = df_ind.rename(columns = {'index': 'Year'})
df_ind.head()

Unnamed: 0,Year,Anger,Disgust,Fear,Joy,Sadness
0,1961,0.144487,0.499208,0.548613,0.508618,0.553114
1,1962,0.139498,0.147811,0.169758,0.599229,0.557545
2,1963,0.131624,0.149008,0.541204,0.470642,0.569984
3,1964,0.477675,0.181133,0.133369,0.506948,0.56623
4,1965,0.132977,0.162332,0.14959,0.537658,0.571627


In [17]:
q = """
    SELECT
        Year
        ,'Anger' AS Tone
        ,Anger AS Score
    FROM
        df_ind
    UNION
    SELECT
        Year
        ,'Disgust' AS Tone
        ,Disgust AS Score
    FROM
        df_ind
    UNION
    SELECT
        Year
        ,'Fear' AS Tone
        ,Fear AS Score
    FROM
        df_ind
    UNION
    SELECT
        Year
        ,'Joy' AS Tone
        ,Joy AS Score
    FROM
        df_ind
    UNION
    SELECT
        Year
        ,'Sadness' AS Tone
        ,Sadness AS Score
    FROM
        df_ind
    """

df_tall = sqldf(q, locals())
df_tall.head(10)

Unnamed: 0,Year,Tone,Score
0,1961,Anger,0.144487
1,1961,Disgust,0.499208
2,1961,Fear,0.548613
3,1961,Joy,0.508618
4,1961,Sadness,0.553114
5,1962,Anger,0.139498
6,1962,Disgust,0.147811
7,1962,Fear,0.169758
8,1962,Joy,0.599229
9,1962,Sadness,0.557545


In [18]:
df_tall.to_csv('./data/tall_tones_year.csv')