In [8]:
import numpy as np
import pandas as pd
import pyarrow.parquet as pq

import seaborn as sns
import matplotlib.pyplot as plt

from ipywidgets import *
from helper import autoFill

In [9]:
# load cleaned data
df = pq.read_table('cleaned.parquet').to_pandas()

# Select a parkrun and compare the stats with Australia overall

In [10]:
# plot data
def standard_plots(df, run, state):

    fig,ax = plt.subplots(2,2, figsize = (8,5))
    plt.suptitle(f'Statistics of {run} parkrun compared across {state}')
    
    df_run = df.query('parkrun == @run')
    
    # distribution of parkrunners
    if state != 'Australia':
        df = df.query('state == @state')
    
    tmp = (df.groupby(['parkrun','gender','agegroup'])['name'].count()[run]/len(df_run)).reset_index()
    sns.lineplot(
        ax = ax[0,0], data = tmp, x = 'agegroup', y = 'name', 
        hue = 'gender', style = 'gender', marker = 'o', mew = 0
    )
    [(df.groupby(['gender','agegroup'])['name'].count()/len(df)).loc[gen].plot(ax = ax[0,0],c=col,ls=sty,lw=1,alpha=0.5) for gen,col,sty in zip(['F', 'M'],sns.color_palette('tab10')[:2], ['-', '--'])]
    ax[0,0].set(title = 'Distribution of total number of runners', ylabel = '')
    
    # mean number of runs
    tmp = df.query('parkrun == @run').groupby(['gender','agegroup'])['n'].mean().reset_index()
    sns.lineplot(
        ax = ax[0,1], data = tmp, x = 'agegroup', y = 'n', 
        hue = 'gender', style = 'gender', marker = 'o', mew = 0
    )
    [df.groupby(['gender','agegroup'])['n'].mean().loc[gen].plot(ax = ax[0,1],c=col,ls=sty,lw=1,alpha=0.5) for gen,col,sty in zip(['F', 'M'],sns.color_palette('tab10')[:2], ['-', '--'])]
    ax[0,1].set(title = 'Mean number of runs per runner', ylim = [0,15])
    
    # best time
    tmp = df_run.groupby(['gender','agegroup'])['time'].min().reset_index()
    sns.lineplot(
        ax = ax[1,0],data = tmp, x = 'agegroup', y = 'time', 
        hue = 'gender', style = 'gender', marker = 'o', mew = 0
    )
    [df.groupby(['gender','agegroup'])['time'].min().loc[gen].plot(ax = ax[1,0],c=col,ls=sty,lw=1,alpha=0.5) for gen,col,sty in zip(['F', 'M'],sns.color_palette('tab10')[:2], ['-', '--'])]
    ax[1,0].set(ylim = [10,30], ylabel = 'time (min)', title = 'Best time')
    
    # average time
    tmp = df_run.groupby(['gender','agegroup'])['time'].mean().reset_index()
    sns.lineplot(
        ax = ax[1,1],data = tmp, x = 'agegroup', y = 'time', 
        hue = 'gender', style = 'gender', marker = 'o', mew = 0
    )
    [df.groupby(['gender','agegroup'])['time'].mean().loc[gen].plot(ax = ax[1,1],c=col,ls=sty,lw=1,alpha=0.5) for gen,col,sty in zip(['F', 'M'],sns.color_palette('tab10')[:2], ['-', '--'])]
    ax[1,1].set(ylim = [20,50], ylabel = 'time (min)', title = 'Average time')
    
    [ax_.set(xlabel = '') for ax_ in ax.flat]
    [ax_.tick_params(axis='x', rotation=90) for ax_ in ax.flat]
    plt.tight_layout()
    plt.show()

In [11]:
def select_parkrun():
    
    out = Output()
        
    autofill = autoFill(df.parkrun.unique().tolist(), placehold = 'Type name of parkrun here')

    @out.capture()
    def on_submit(b):
        out.clear_output()
        run = autofill.children[0].children[0].value
        if toggle_button.value == 'state':
            state = df.query('parkrun == @run').state.iloc[0]
        else:
            state = 'Australia'
        display(boxes)
        if run in df.parkrun.unique():
            standard_plots(df, run, state)
        else:
            display("We couldn't find this parkrun, please reset selection and try again")

    @out.capture()
    def on_reset_clicked(b):
        out.clear_output()
        select_parkrun()
    
    toggle_button = ToggleButtons(value = 'state', options = ['state', 'country'], description = '', layout = dict(width = '200px'))
    display_button = Button(description = 'Display stats', style = dict(button_color = 'green', text_color = 'white'))
    display_button.on_click(on_submit)
    reset_button = Button(description = 'Reset selection', button_style = 'danger',style = dict(text_color = 'white'))
    reset_button.on_click(on_reset_clicked)
    
    boxes = HBox([autofill, toggle_button, VBox([display_button, reset_button])])
    
    with out:
        display(boxes)
        
    display(out)
    
select_parkrun()

Output()