In [130]:
import os
import psycopg2
import datetime

import pandas as pd
import numpy as np

from bokeh.io import output_notebook, show, reset_output
from bokeh.plotting import figure
from bokeh.palettes import all_palettes
from bokeh.embed import file_html
from bokeh.resources import CDN
from bokeh.models import DatetimeTickFormatter, Range1d, WheelZoomTool, HoverTool, ColumnDataSource, CustomJS
from bokeh.models import Arrow, VeeHead, Label, Text, Legend, BoxAnnotation

os.chdir(r'C:\users\barby\downloads')

%matplotlib inline

km_in_a_mile = 1.60934

def readConfig(key):
    config = pd.read_table(r'c:\users\barby\documents\config.txt', header = None)
    config = [c.split('=') for c in config[0]]
    out = [c[1] for c in config if c[0] == key][0]
    return(out)

def dbGetQuery(q):
    pw = readConfig('pw')
    conn_string = "host='kavdb.c9lrodma91yx.us-west-2.rds.amazonaws.com' dbname='kavdb' user='lkavenagh' password='" + pw + "'"
    conn = psycopg2.connect(conn_string)
    conn.autocommit = True
    dat = pd.read_sql(q, conn)
    conn.close()
    return(dat)

In [2]:
reset_output()
output_notebook()

# YTD activities

In [16]:
dat = dbGetQuery("SELECT * FROM runs.activities")
dat = dat.sort_values('start_date')

dat = dat.append(dat.tail(1)).reset_index(drop = True)
dat.loc[len(dat)-1, 'start_date'] = datetime.datetime.now()
dat.loc[len(dat)-1, 'distance'] = 0

dat['year'] = [c.year for c in dat.start_date]
dat['doy'] = [c.timetuple().tm_yday for c in dat.start_date]

dat['distance_miles'] = [(c/1000)/km_in_a_mile for c in dat.distance]
dat['ytd_distance'] = dat.groupby('year', as_index = False).cumsum()['distance_miles']
dat['generic_date'] = [datetime.date(1900,c.month,c.day) for c in dat.start_date]


In [49]:
target = 800
x = (datetime.datetime.now().date() - datetime.date(datetime.datetime.now().date().year, 1, 1)).days + 1
today_target = (target/max(dat.doy)) * x
actual = dat.loc[dat.start_date == max(dat.start_date), 'ytd_distance'].item()

p = figure(plot_width=800, 
            plot_height=400, 
            title = str(datetime.datetime.now().date()) + ' - YTD miles |'
            ' Target: ' + str(int(today_target)) + ' miles |' + 
            ' Actual: ' + str(int(actual)) + ' miles |' +
            ' Surplus: ' + str(int(actual - today_target)) + ' miles', 
            tools="pan,wheel_zoom,box_zoom,reset", 
            active_scroll = 'wheel_zoom',
            x_axis_type = 'datetime')

p.x_range = Range1d(datetime.date(1899,12,31),datetime.date(1900,12,31))

plot_dat = dat[['generic_date', 'year', 'ytd_distance']]
plot_dat = plot_dat.groupby(['generic_date', 'year'], as_index = False).max()
plot_dat = plot_dat.pivot_table(index = 'generic_date', columns = 'year')
plot_dat.columns = plot_dat.columns.droplevel().rename(None)
plot_dat = plot_dat.reset_index()
plot_dat = plot_dat.apply(lambda series: series.loc[:series.last_valid_index()].ffill())

x = range(len(plot_dat))
y = [(target / len(plot_dat)) * x for x in x]
plot_dat['target'] = y

plot_dat = plot_dat.reset_index(drop = True)
plot_dat.columns = [str(c) for c in plot_dat.columns]

mypalette = all_palettes['Category10'][len(plot_dat.columns)-1]

plot_dat['datestr'] = [c.strftime('%d %b') for c in plot_dat['generic_date']]

source = ColumnDataSource(data = plot_dat)

legend_it = []
to_plot = plot_dat.columns[1:-1]
for i,l in enumerate(to_plot):
    if l == 'target':
        c = p.line(x = 'generic_date',
            y = l,
            line_color = 'black',
            line_dash = 'dashed',
            line_width = 2,
            source = source)
    elif i == (len(to_plot)-2):
        c = p.line(x = 'generic_date',
            y = l,
            line_color = mypalette[i],
            line_width = 4,
            source = source)
    elif i > (len(to_plot) - 5):
        c = p.line(x = 'generic_date',
            y = l,
            line_color = mypalette[i],
            line_width = 2,
            source = source)
    else:
        c = p.line(x = 'generic_date',
            y = l,
            line_color = mypalette[i],
            line_width = 2,
            visible = False,
            source = source)
    
    legend_it.append((l.title(), [c]))

p.xaxis.formatter = DatetimeTickFormatter(
        hours=["%B %d"],
        days=["%B %d"],
        months=["%B"],
        years=["%B"]
    )
p.xaxis.major_label_orientation = 3/4

legend = Legend(items = legend_it, location=(10, 120))
legend.click_policy = "hide"
p.add_layout(legend, 'right')

show(p)

html = file_html(p, CDN, "YTD totals")
text_file = open(r"C:\Users\barby\Documents\GitHub\runs\ytd_totals.html", "w")
text_file.write(html)
text_file.close()

# Monthly average pace

In [68]:
dat = dbGetQuery("SELECT start_date, average_speed, distance FROM runs.activities")
dat = dat.sort_values('start_date')
dat['month'] = [datetime.date(c.year, c.month, 1) for c in dat.start_date]
dat['average_pace'] = [(km_in_a_mile/60)/((0.00001+c) / 1000) for c in dat.average_speed]
dat.distance = [(c/1000)/km_in_a_mile for c in dat.distance]

In [79]:
months = list(set(dat.month))
out = pd.DataFrame(columns = ['month', 'average_pace'])

for m in months:
    this_dat = dat.loc[dat.month == m]
    weights = this_dat.distance / sum(this_dat.distance)
    avg_pace = sum(weights * this_dat.average_pace)
    out = out.append(pd.DataFrame([[m, avg_pace]], columns = ['month', 'average_pace']))

out = out.sort_values('month')

In [168]:
p1 = figure(plot_width=800, 
           plot_height=400, 
           title = 'Monthly pace',
           tools="pan,wheel_zoom,box_zoom,reset", 
           active_scroll = 'wheel_zoom',
           x_axis_type = 'datetime')
p1.xaxis.formatter = DatetimeTickFormatter(
        hours=["%b-%Y"],
        days=["%b-%Y"],
        months=["%b-%Y"],
        years=["%b-%Y"]
    )

p1.y_range = Range1d(6,10)

line = p1.line(x = out.month,
        y = out.average_pace,
        line_color = 'blue',
        line_width = 2,
        visible = True)
p1.xaxis.major_label_orientation = 3/4

show(p1)

# Monthly elevation gain

In [160]:
dat = dbGetQuery("SELECT start_date, total_elevation_gain FROM runs.activities")
dat = dat.sort_values('start_date')
dat['month'] = [datetime.date(c.year, c.month, 1) for c in dat.start_date]
dat = dat.groupby('month', as_index = False).sum()
dat = dat.sort_values('month')
dat.month = [c.strftime('%b-%y') for c in dat.month]
dat = dat.reset_index(drop = True)

In [162]:
p1 = figure(plot_width=900, 
           plot_height=400, 
           title = 'Monthly elevation gain',
           tools="xpan,xwheel_zoom,box_zoom,reset", 
           active_scroll = 'xwheel_zoom',
           x_range=list(dat.month))

p1.xaxis.major_label_orientation = 3/4

line = p1.vbar(x = dat.month,
        top = dat.total_elevation_gain,
        line_color = 'blue',
        width = 0.75,
        visible = True)

p1.xgrid.grid_line_color = None
p1.y_range.start = 0

l = dat.loc[dat.month == 'May-14'].index.item()
r = dat.loc[dat.month == 'Jun-17'].index.item()
mid_box = BoxAnnotation(left = l, 
                        right = r, 
                        fill_alpha=0.1, fill_color='green')
p1.add_layout(mid_box)

mytext = Label(x = l + (r-l)/2, 
               y = 2000,
               text = 'San Francisco',
              text_align = 'center',
              text_font_style = 'bold')

p1.add_layout(mytext)

show(p1)

# Monthly mileage

In [163]:
dat = dbGetQuery("SELECT start_date, distance FROM runs.activities")
dat.distance = [(c/1000)/km_in_a_mile for c in dat.distance]
dat = dat.sort_values('start_date')
dat['month'] = [datetime.date(c.year, c.month, 1) for c in dat.start_date]
dat = dat.groupby('month', as_index = False).sum()
dat = dat.sort_values('month')
dat.month = [c.strftime('%b-%y') for c in dat.month]
dat = dat.reset_index(drop = True)

In [164]:
p1 = figure(plot_width=900, 
           plot_height=400, 
           title = 'Monthly distance (miles)',
           tools="xpan,xwheel_zoom,box_zoom,reset", 
           active_scroll = 'xwheel_zoom',
           x_range=list(dat.month))

p1.xaxis.major_label_orientation = 3/4

line = p1.vbar(x = dat.month,
        top = dat.distance,
        line_color = 'blue',
        width = 0.75,
        visible = True)

p1.xgrid.grid_line_color = None
p1.y_range.start = 0

l = dat.loc[dat.month == 'May-14'].index.item()
r = dat.loc[dat.month == 'Jun-17'].index.item()
mid_box = BoxAnnotation(left = l, 
                        right = r, 
                        fill_alpha=0.1, fill_color='green')
p1.add_layout(mid_box)

mytext = Label(x = l + (r-l)/2, 
               y = 150,
               text = 'San Francisco',
              text_align = 'center',
              text_font_style = 'bold')

p1.add_layout(mytext)

show(p1)