In [2]:
import glob
import json
import time
from src import tone_analyzer, extract_tones
import pandas as pd
from pandasql import sqldf

## Load text and tone analyze by speech section

In [10]:
# Use glob to loop through files
speeches = {}
for doc in glob.glob('./state_of_the_unions/*.txt'):
    with open(doc) as f:
        data = f.read().replace('\n', ' ')
        speeches[doc[22:29]] = data

In [17]:
# Break speeches into 40 pieces fo tone analysis
speech_chunks = {}
for k, v in speeches.items():
    chunks = []
    split = len(v) // 40
    start = 0
    end = split 
    for n in range(1,41):
        chunks.append(v[start:end])
        start += split
        end += split
    speech_chunks[k] = chunks

In [19]:
# Run speech chunks through IBM Watson tone analyzer
chunk_tones = {}
for k, v in speech_chunks.items():
    tones = []
    for chunk in v:
        tone = tone_analyzer.tone_analyzer(text=chunk, prints=False)
        tones.append(tone)
        time.sleep(2)
    chunk_tones[k] = tones

In [28]:
# Create chunk tones by year for easier future plotting
chunk_tones_year = {k[-4:]:v for k, v in chunk_tones.items()}

## Save chunk tones to json

In [31]:
with open('./data/chunk_tones_year.json', 'w') as f:
    json.dump(chunk_tones_year, f)

## Save chunk tones to csv

In [3]:
df = pd.read_json('./data/chunk_tones_year.json')
df = df.T
chunk_num = list(range(1,41))
cols = ['chunk' + str(num) for num in chunk_num]
df.columns = cols
df.head()

Unnamed: 0,chunk1,chunk2,chunk3,chunk4,chunk5,chunk6,chunk7,chunk8,chunk9,chunk10,...,chunk31,chunk32,chunk33,chunk34,chunk35,chunk36,chunk37,chunk38,chunk39,chunk40
1961,"{'Anger': 0.08889899999999999, 'Disgust': 0.12...","{'Anger': 0.19658699999999998, 'Disgust': 0.49...","{'Anger': 0.173543, 'Disgust': 0.2350479999999...","{'Anger': 0.49676000000000003, 'Disgust': 0.15...","{'Anger': 0.13317199999999998, 'Disgust': 0.58...","{'Anger': 0.147623, 'Disgust': 0.167987, 'Fear...","{'Anger': 0.281902, 'Disgust': 0.275376, 'Fear...","{'Anger': 0.35187199999999996, 'Disgust': 0.15...","{'Anger': 0.16261599999999998, 'Disgust': 0.11...","{'Anger': 0.096089, 'Disgust': 0.1083000000000...",...,"{'Anger': 0.067941, 'Disgust': 0.2129039999999...","{'Anger': 0.549941, 'Disgust': 0.34425, 'Fear'...","{'Anger': 0.18562599999999999, 'Disgust': 0.20...","{'Anger': 0.563981, 'Disgust': 0.087225, 'Fear...","{'Anger': 0.12394799999999999, 'Disgust': 0.09...","{'Anger': 0.242272, 'Disgust': 0.114008, 'Fear...","{'Anger': 0.272648, 'Disgust': 0.1896459999999...","{'Anger': 0.200266, 'Disgust': 0.6296400000000...","{'Anger': 0.127914, 'Disgust': 0.417979, 'Fear...","{'Anger': 0.09537899999999999, 'Disgust': 0.13..."
1962,"{'Anger': 0.13712000000000002, 'Disgust': 0.45...","{'Anger': 0.102074, 'Disgust': 0.065832, 'Fear...","{'Anger': 0.517985, 'Disgust': 0.06081, 'Fear'...","{'Anger': 0.276896, 'Disgust': 0.27059, 'Fear'...","{'Anger': 0.44609099999999996, 'Disgust': 0.16...","{'Anger': 0.157693, 'Disgust': 0.1738529999999...","{'Anger': 0.19042, 'Disgust': 0.14618899999999...","{'Anger': 0.502047, 'Disgust': 0.254703, 'Fear...","{'Anger': 0.175252, 'Disgust': 0.192016, 'Fear...","{'Anger': 0.173117, 'Disgust': 0.478634, 'Fear...",...,"{'Anger': 0.14623, 'Disgust': 0.22111799999999...","{'Anger': 0.145502, 'Disgust': 0.145321, 'Fear...","{'Anger': 0.114594, 'Disgust': 0.529397, 'Fear...","{'Anger': 0.544439, 'Disgust': 0.591132, 'Fear...","{'Anger': 0.47235000000000005, 'Disgust': 0.08...","{'Anger': 0.24657099999999998, 'Disgust': 0.38...","{'Anger': 0.49203, 'Disgust': 0.18161000000000...","{'Anger': 0.204033, 'Disgust': 0.132291, 'Fear...","{'Anger': 0.105768, 'Disgust': 0.171016, 'Fear...","{'Anger': 0.087667, 'Disgust': 0.138453, 'Fear..."
1963,"{'Anger': 0.132646, 'Disgust': 0.4823949999999...","{'Anger': 0.152613, 'Disgust': 0.174865, 'Fear...","{'Anger': 0.159996, 'Disgust': 0.430592, 'Fear...","{'Anger': 0.102126, 'Disgust': 0.588295, 'Fear...","{'Anger': 0.187793, 'Disgust': 0.4751329999999...","{'Anger': 0.144062, 'Disgust': 0.5423009999999...","{'Anger': 0.183519, 'Disgust': 0.120636, 'Fear...","{'Anger': 0.109542, 'Disgust': 0.4727119999999...","{'Anger': 0.12218599999999999, 'Disgust': 0.16...","{'Anger': 0.553422, 'Disgust': 0.104276, 'Fear...",...,"{'Anger': 0.11157199999999999, 'Disgust': 0.15...","{'Anger': 0.569527, 'Disgust': 0.4281089999999...","{'Anger': 0.514794, 'Disgust': 0.28591, 'Fear'...","{'Anger': 0.54434, 'Disgust': 0.17609000000000...","{'Anger': 0.138288, 'Disgust': 0.131208, 'Fear...","{'Anger': 0.173096, 'Disgust': 0.157556, 'Fear...","{'Anger': 0.15884099999999998, 'Disgust': 0.12...","{'Anger': 0.11548000000000001, 'Disgust': 0.12...","{'Anger': 0.139936, 'Disgust': 0.157237, 'Fear...","{'Anger': 0.08893000000000001, 'Disgust': 0.12..."
1964,"{'Anger': 0.1342, 'Disgust': 0.458430000000000...","{'Anger': 0.260388, 'Disgust': 0.526565, 'Fear...","{'Anger': 0.290051, 'Disgust': 0.1085769999999...","{'Anger': 0.323909, 'Disgust': 0.424047, 'Fear...","{'Anger': 0.062148999999999996, 'Disgust': 0.2...","{'Anger': 0.057158, 'Disgust': 0.10273, 'Fear'...","{'Anger': 0.107134, 'Disgust': 0.2120989999999...","{'Anger': 0.47148799999999996, 'Disgust': 0.18...","{'Anger': 0.126195, 'Disgust': 0.226552, 'Fear...","{'Anger': 0.010147999999999999, 'Disgust': 0.3...",...,"{'Anger': 0.29257299999999997, 'Disgust': 0.51...","{'Anger': 0.154395, 'Disgust': 0.154137, 'Fear...","{'Anger': 0.32349, 'Disgust': 0.048561, 'Fear'...","{'Anger': 0.343831, 'Disgust': 0.086438, 'Fear...","{'Anger': 0.07946399999999999, 'Disgust': 0.19...","{'Anger': 0.483043, 'Disgust': 0.255602, 'Fear...","{'Anger': 0.128361, 'Disgust': 0.146402, 'Fear...","{'Anger': 0.212053, 'Disgust': 0.184279, 'Fear...","{'Anger': 0.19159299999999999, 'Disgust': 0.20...","{'Anger': 0.23374499999999998, 'Disgust': 0.05..."
1965,"{'Anger': 0.108288, 'Disgust': 0.139257, 'Fear...","{'Anger': 0.087406, 'Disgust': 0.1490179999999...","{'Anger': 0.115052, 'Disgust': 0.117863, 'Fear...","{'Anger': 0.16899899999999998, 'Disgust': 0.13...","{'Anger': 0.238305, 'Disgust': 0.206286, 'Fear...","{'Anger': 0.20449199999999998, 'Disgust': 0.15...","{'Anger': 0.537481, 'Disgust': 0.140686, 'Fear...","{'Anger': 0.450013, 'Disgust': 0.150955, 'Fear...","{'Anger': 0.120727, 'Disgust': 0.159585, 'Fear...","{'Anger': 0.449879, 'Disgust': 0.2151249999999...",...,"{'Anger': 0.164096, 'Disgust': 0.088202, 'Fear...","{'Anger': 0.26644599999999996, 'Disgust': 0.24...","{'Anger': 0.034342, 'Disgust': 0.0582769999999...","{'Anger': 0.14479799999999998, 'Disgust': 0.10...","{'Anger': 0.021792, 'Disgust': 0.0530659999999...","{'Anger': 0.210967, 'Disgust': 0.4643879999999...","{'Anger': 0.100862, 'Disgust': 0.484786, 'Fear...","{'Anger': 0.44388299999999997, 'Disgust': 0.45...","{'Anger': 0.12433799999999999, 'Disgust': 0.15...","{'Anger': 0.098493, 'Disgust': 0.4607569999999..."


In [4]:
# Make individual tone dataframes
df_anger = df.applymap(extract_tones.anger)
df_disgust = df.applymap(extract_tones.disgust)
df_sadness = df.applymap(extract_tones.sadness)
df_joy = df.applymap(extract_tones.joy)
df_fear = df.applymap(extract_tones.fear)

In [5]:
df_anger.to_csv('./data/chunk_anger.csv')
df_disgust.to_csv('./data/chunk_disgust.csv')
df_sadness.to_csv('./data/chunk_sadness.csv')
df_joy.to_csv('./data/chunk_joy.csv')
df_fear.to_csv('./data/chunk_fear.csv')