In [1]:
import numpy as np # we will use this later, so import it now
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure, output_file, show

In [2]:
output_notebook()

In [3]:
def get_and_clean_data(path):
    data = pd.read_csv(path)
    #print(data.head())
    #data = data.drop(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1'], axis=1)
    data['date'] = pd.to_datetime(data['date'].astype(str), errors='coerce')
    return data

In [4]:
def get_mean_scores_by_date(df):
    df['date'] = df['date'].dt.date
    df2 = df.groupby('date')['score'].mean().to_frame().reset_index()
    return df2

In [5]:
def get_plot_data(path):
    df = get_and_clean_data(path)
    df = get_mean_scores_by_date(df)
    
    return df[['date', 'score']]

In [8]:
def plot_chart(rep_df, dem_df, rep_cand, dem_cand, title):
    p = figure(x_axis_type="datetime", title=title, plot_height=350, plot_width=900)
    p.xgrid.grid_line_color=None
    p.ygrid.grid_line_alpha=0.5
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Sentiment Score'

    p.line(rep_df.date, rep_df.score, line_color="red", 
           line_width=4, line_alpha=0.6, 
           legend_label= rep_cand + ' (' + str(round(rep_df["score"].mean(), 5)) + ')')
    p.circle(rep_df.date, rep_df.score, fill_color="red", size=5, color="red")

    p.line(dem_df.date, dem_df.score, line_color="blue", 
           line_width=4, line_alpha=0.6, 
           legend_label= dem_cand + ' (' + str(round(df_biden["score"].mean(), 5))  + ')')
    p.circle(dem_df.date, dem_df.score, fill_color="blue", size=5, color="blue")

    p.legend.location = "bottom_right"

    show(p)

In [9]:
df_trump = get_plot_data('data/processed/candidates/2020/trump/headlines.csv')
df_biden = get_plot_data('data/processed/candidates/2020/biden/headlines.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden', "Candidate Sentiment - 2020 - Last 90 Days")

In [10]:
df_trump = get_plot_data('data/processed/economy/2020/trump/headlines.csv')
df_biden = get_plot_data('data/processed/economy/2020/biden/headlines.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden', "Candidate Sentiment - 2020 - Last 90 Days")

In [12]:
df_trump = get_plot_data('data/processed/environment/2020/trump/headlines.csv')
#df_biden = get_plot_data('data/processed/environment/2020/biden/headlines.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden', "Candidate Sentiment - 2020 - Last 90 Days")

In [13]:
df_trump = get_plot_data('data/processed/foreign-policy/2020/trump/headlines.csv')
df_biden = get_plot_data('data/processed/foreign-policy/2020/biden/headlines.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden', "Candidate Sentiment - 2020 - Last 90 Days")

In [15]:
df_trump = get_plot_data('data/processed/guns/2020/trump/headlines.csv')
#df_biden = get_plot_data('data/processed/guns/2020/biden/headlines.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden', "Candidate Sentiment - 2020 - Last 90 Days")

In [17]:
df_trump = get_plot_data('data/processed/health/2020/trump/headlines.csv')
#df_biden = get_plot_data('data/processed/health/2020/biden/headlines.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden', "Candidate Sentiment - 2020 - Last 90 Days")

In [19]:
df_trump = get_plot_data('data/processed/immigration/2020/trump/headlines.csv')
#df_biden = get_plot_data('data/processed/immigration/2020/biden/headlines.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden', "Candidate Sentiment - 2020 - Last 90 Days")

In [20]:
df_trump = get_plot_data('data/processed/party/2020/trump/headlines.csv')
df_biden = get_plot_data('data/processed/party/2020/biden/headlines.csv')

plot_chart(df_trump, df_biden, 'Trump', 'Biden', "Candidate Sentiment - 2020 - Last 90 Days")