In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import statsmodels.api as sm
import plotly
from plotly.graph_objs import Scatter, Layout
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go
import colorlover as cl
#plotly.offline.init_notebook_mode(connected=True)
%matplotlib inline

  from pandas.core import datetools


In [2]:
df = pd.read_csv('../for_images.csv', index_col=0)

In [3]:
df = df.drop(['date_diffs', 'dates_in_wiki', 'Wiki Content', 'Inaugural',
         'clean_wiki', 'clean_inaug'], axis=1)

In [4]:
dead_df = df.dropna(subset=['years_lived_after_pres'])

In [5]:
dead_df.columns

Index(['First Year of Presidency', 'President Name', 'Num Wiki References',
       'Words on Page', 'rank', 'birth_year', 'death_year', 'years_lived',
       'age_become_pres', 'years_lived_after_pres', 'profile_dates_mean',
       'profile_dates_std', 'presidency_length', 'cos_similarity_wiki_inaug',
       'more_than_10000_deaths', 'sim_rank'],
      dtype='object')

In [6]:
dead_df = dead_df.sort_values(by='First Year of Presidency')

In [93]:
#basic scatter, with horizontal orientation
trace1 = go.Scatter(x=dead_df['First Year of Presidency'], 
                    y=dead_df['profile_dates_mean'],
                   name = 'Mean Difference in Years',
                    mode='markers',
                    marker = dict(
                    size = 10,
                    color= 'rgba(20, 30, 200, .8)',
                    line= dict(width = 2)),
                    text = dead_df['President Name'],
                   hoverinfo= 'text')

trace2 = go.Bar(x=dead_df['First Year of Presidency'],
                y= dead_df['years_lived_after_pres'], 
                name = 'Years lived after Presidency',
                width=2.4,
                marker = dict(color='rgba(200,30,20,.8)'),
                opacity=0.32,
                 yaxis='y2',
               text = dead_df['years_lived_after_pres'],
               hoverinfo='text')

data = go.Data([trace1, trace2])

layout = go.Layout(title = 'Average Date on Wikipedia Page',
                   xaxis = dict(title = 'First year of Presidency'),
                   yaxis= dict(title = 'Difference between average year and first year',
                              titlefont=dict(color='rgba(20, 30, 200, .8)')),
                   yaxis2= dict(title= 'Years lived after Presidency', overlaying='y', side='right', 
                                titlefont=dict(color='rgba(200,30,20,.8)'))
                  )
#hovermode will put the y and x axis values into 

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [79]:
inaug = pd.read_csv('./inaug_sims.csv', index_col=0)
names = inaug.index.tolist()
names = [name.split()[-1] for name in names]
colors = cl.scales['9']['div']['RdYlBu']


trace = go.Heatmap(z=inaug.values.tolist(),
                  colorscale=[[0, colors[8]],
                             [0.10, colors[7]],
                             [0.15, colors[6]],
                             [0.20, colors[5]],
                             [0.25, colors[4]],
                             [0.30, colors[3]],
                             [0.35, colors[2]],
                             [0.40, colors[1]],
                             [1, colors[0]]],
                  colorbar = dict(tickmode='array',
                                  tickvals=[0.15, 0.95],
                                 ticktext = ['Less Similar', 'More Similar'],
                                 ticks= 'outside'))

layout = go.Layout(title = 'Similarities in Inaugural Addresses',
    yaxis= dict(ticktext = names , tickvals = np.arange(39)),
    xaxis=dict(ticktext = names , tickvals = np.arange(39)),
                   height=650,
                   width=650
                )

data = [trace]

fig = go.Figure(data=data, layout=layout, )
py.iplot(fig)

In [7]:
#reversing the order (so bigger is better)
dead_df['rank'] = np.abs(dead_df['rank'] - 43).values
dead_df = dead_df.sort_values(by='rank')

In [8]:
#getting color scale for graph
colors = cl.scales['3']['div']['PiYG']

In [115]:
#getting ols parameters for line of best fit
X = dead_df[['Words on Page']]
X = sm.add_constant(X)
ols = sm.OLS(X, dead_df['rank']).fit()
y_values = [ols.params.iloc[0, 0] + (ols.params.iloc[0, 1] * i) for i in range(0, 43)]

In [128]:
trace = go.Scatter(x=dead_df['rank'], y=dead_df['Words on Page'],
                  mode='markers',
                  marker=dict(size= 16,
                              line= dict(width=2, color=(0,0,0,1)),
                              color=dead_df['more_than_10000_deaths'],
                             colorscale=[[0, colors[2]],
                                        [0, colors[2]],
                                        [1,colors[0]],
                                        [1,colors[0]]],
                             showscale=True,
                           colorbar = dict(tickmode='array',
                                      tickvals=[0.15, 0.95],
                                     ticktext = ['Fewer than 10,000 war deaths', 'More than 10,000 war deaths'],
                                 ticks= 'outside')),
                  text = dead_df['President Name'],
                  hoverinfo='text')

trace1 = go.Scatter(x=np.arange(29), y=y_values[10:-4],
                   name='OLS line',
                   line=dict(color=('rgba(0, 0, 0, 0.7)'),
                             width=4,
                        dash='dash'))

data = go.Data([trace, trace1])
layout = go.Layout(title='Determining Page Length',
                   xaxis=dict(title='CSPAN Rank'),
                  yaxis=dict(title='Words on Wikipedia page'), 
                  plot_bgcolor='rgba(56, 240, 249, 0.3)',
                  showlegend=False,
                  annotations= [dict(x=2, y=32000, xref='x', yref='y', text='OLS line', showarrow=True,
                                    arrowhead=1, ax=40, ay=0)])

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [21]:
#building KDE plot
models = pd.read_csv('./models_images.csv', index_col=0)

#DIDNT USE BELOW
# to_array = []
# for x in range(1, 13):
#     num = models.copy()
#     num = num.loc[num['length'] == x]
#     rs = num.adj_r.tolist()
#     to_array.append(np.array(rs))
# z = pd.DataFrame(to_array).values

In [77]:
trace0 = go.Histogram2dContour(x=models['length'], y=models['adj_r'],
                 colorscale='Jet',showscale=True, hoverinfo="y"
                 )

trace1 = go.Histogram2dContour(x=models['wo_length'], y=models['wo_adj_r'], name='no',
                               colorscale='Jet', showscale=True, hoverinfo="y"
                               )

data = [trace0, trace1]

updatemenus = list([dict(active=-1, buttons=list([
                                               dict(label='With Number of Wiki References',
                                                   method='update',
                                                   args=[{'visible': [True, False]}]),
                                               dict(label='Without Number of Wiki References',
                                                     method='update',
                                                      args=[{'visible': [False, True]}]),
                                               ]))])

layout = dict(title='Typical Model Performance',
                   xaxis=dict(title= 'Number of variables in regression'),
                  yaxis=dict(title= 'Adjusted R-squared'),
                  updatemenus=updatemenus)

fig = dict(data=data, layout=layout)

py.iplot(fig)

In [187]:
#prepping for next figure
df = dead_df.copy()
df = df.dropna(subset=['cos_similarity_wiki_inaug'])

#making line for OLS
X = df[['cos_similarity_wiki_inaug']]
X = sm.add_constant(X)
ols = sm.OLS(np.sqrt(df['Words on Page']), X).fit()
y_values = [ols.params.iloc[0] + (ols.params.iloc[1] * i) for i in df['cos_similarity_wiki_inaug'].sort_values()]
x_values = np.linspace(0, 0.38, 33)

In [195]:
trace0 = go.Scatter(x=df['cos_similarity_wiki_inaug'], y=np.sqrt(df['Words on Page']),
                    mode='markers',
                    marker=dict(size=14, color=df['presidency_length'], colorscale='Electric', 
                                 colorbar=dict(title='Length of Presidency (Years)'),
                                 line=dict(color='rgba(0,0,0,0.5)', width=2),
                                 reversescale=True, showscale=True),
                    hoverinfo='text', text=df['President Name'])

trace1 = go.Scatter(x=x_values, y=y_values, line=dict(color='rgba(0,0,0,0.5)', width=4, dash='dash'))

data = go.Data([trace0, trace1])

layout= go.Layout(xaxis=dict(title='Cosine Similarity Between Inaugural Addresses & Wikipedia Page',
                            titlefont=dict(color='red')),
                 yaxis=dict(title='(square root of) Wiki Page Length'),
                 title='Setting simple goals',
                 plot_bgcolor='rgba(18, 12, 18, 0.04)',
                 annotations=[dict(x=0.0425, y=210, text='Less Similar', showarrow=False, 
                                   font=dict(size=16, color='red')), 
                             dict(x=0.35, y=210, text='More Similar', showarrow=False, 
                                   font=dict(size=16, color='red')),
                             dict(x=0.34, y=267, text='OLS line', showarrow=True, xref='x', yref='y',
                                 arrowhead=1, ax=0, ay=-30)],
                 showlegend=False)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [60]:
#importing trends and switching index
mean_trends = pd.read_csv('../with_mean_trend.csv', index_col=0)
trends = pd.read_csv('./google_trends.csv', index_col=0)
trends.index = pd.to_datetime(trends.index)

In [50]:
from datetime import date
traces = []

for name in trends.columns:
    x = go.Scatter(x=trends.index, y=trends[name], name=name)
    traces.append(x)
    
data = go.Data(traces)

layout = go.Layout(title='Google Trends for U.S. Presidents',
                   paper_bgcolor= 'rgba(253,245,253,1)',
                  yaxis=dict(title='Google Trends (relative to 100 peak interest)'),
                  xaxis=dict(title='Date',
                             rangeselector=dict(buttons=list([
                                 dict(count=1,
                                     label='Last year',
                                     step='year',
                                     stepmode='backward'),
                                 dict(count=5,
                                     label='Last five years',
                                     step='year',
                                     stepmode='backward'),
                                 dict(step='all')
                             ])),
                            rangeslider=dict(),
                            type='date'),
                  annotations=[dict(x=date(2013, 1, 1), y=90, text="Reagan died in 2004 with the actual peak interest, I decided <br>to start his timeline in 2005, giving Washington top billing.", showarrow=False)])

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [None]:
#prepping for figure
mean_trends = mean_trends.dropna().reset_index(drop=True)
mean_trends['rank'] = np.abs(mean_trends['rank'] - mean_trends['rank'].max()) 
mean_trends = mean_trends.sort_values(by='mean_trend')

In [87]:
#ready to plot tomorrow
trace0 = go.Bar(x=mean_trends['President Name'], y=mean_trends['mean_trend'], name='Avg. Google trend')
trace1 = go.Bar(x=mean_trends['President Name'], y=mean_trends['rank'], name='CSPAN rank')
trace2 = go.Bar(x=mean_trends['President Name'], y=mean_trends['Words on Page'], name='Words on Wiki page', yaxis='y2',
               opacity=0.2)

data = go.Data([trace0, trace1, trace2])

layout= go.Layout(height=550,barmode='group',
                 xaxis=dict(tickangle=-45, title='President'),
                  yaxis=dict(title='Rank and Avg. Google Trend'),
                  yaxis2=dict(title='Words on Wiki page', titlefont=dict(color='rgba(63,151,63,1)'),
                              overlaying='y', side='right', range=[30000, 110000]),
                 margin=dict(b=125),
                 legend=dict(x=0, y=1),
                 title='Rank & Trend vs. Wiki Length',
                 paper_bgcolor='rgba(241, 74, 149, 0.045)')

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

PlotlyRequestError: Hi there, you've reached the threshold of 100 combined image exports and chart saves per day. If you need to raise your daily limit, consider upgrading to a Student or Personal Plan (see: https://plot.ly/products/cloud).