# Rank parkruns by relative difference with overall PB

In [1]:
import numpy as np
import pandas as pd
import pyarrow.parquet as pq

from ipywidgets import *
import plotly.express as px

In [2]:
# load data
df = pq.read_table('../data/cleaned.parquet').to_pandas()

# add PB and relative difference with PB
df['PB'] = df.groupby(['name','gender','agegroup']).time.transform(min).values
df['rel_diff'] = (df.time-df.PB)/df.PB

  df['PB'] = df.groupby(['name','gender','agegroup']).time.transform(min).values
  df['PB'] = df.groupby(['name','gender','agegroup']).time.transform(min).values


In [6]:
def update_df(df, state, measure, n_runs, n_attempts, min_rel_diff):
    
    # select course PBs in state
    df = df.query('state == @state')
    
    # select runners with at least n_attempts in at least n_runs in this state
    freq = df.value_counts(['name','gender','agegroup'])
    df = df.set_index(['name','gender','agegroup']).loc[freq[freq >= n_runs].index].reset_index()
    df = df.query('n >= @n_attempts').reset_index(drop = True)

    # select course PBs min_rel_diff within their overall PB
    df = df.query('rel_diff <= @min_rel_diff')

    tmp = df.groupby(['parkrun'], observed = False).rel_diff.agg([measure])
    tmp['n'] = df.groupby('parkrun', observed = False).n.sum()
    
    tmp = tmp.query('n >= 100')
    tmp = tmp[~tmp[measure].isna()]
    
    return tmp

def plot_hover_graph(df, w1, w2, w3, w4, w5):
    
    state, measure, n_runs, n_attemps, min_rel_diff = w1, w2, w3, w4, w5

    tmp = update_df(df, state, measure, n_runs, n_attemps, min_rel_diff)
    if state in ['NSW', 'QLD', 'VIC']: width = 600
    else: width = 500
    fig = px.bar(tmp.sort_values(measure)[::-1].reset_index(), x = measure, y="parkrun", hover_name="parkrun", width=width, height=200+15*len(tmp))
    fig.update_layout(yaxis_title='', xaxis_title='', title = f'Parkruns in {state} ranked by {measure} relative difference',  yaxis={'side': 'right'}, xaxis={'side': 'top'})
    fig.show()    

In [7]:
w1 = Dropdown(options=df.state.unique().sort_values(), value='ACT', description='', layout = dict(width = '200px'))
w2 = ToggleButtons(value = 'mean', options=['mean', 'median'], description='', layout = dict(width = '250px'))
w3 = IntSlider(value = 2, min = 2, max = 10, description='', layout = dict(width = '250px'))
w4 = IntSlider(value = 1, min = 1, max = 5, description='', layout = dict(width = '250px'))
w5 = FloatSlider(value=1, min=0.1, max=1, step=0.1, description='', layout = dict(width = '250px'))

labels = [HTML(val, layout = dict(display="flex", justify_content="flex-start")) for val in ['State', 'Aggregate relative difference as', 'Minimum number of parkruns / runner', 'Minimum number of attempts / parkrun', 'Minimum relative difference with overall PB to count as a PB attempt']]

In [8]:
controls = VBox([VBox([l, w]) for l, w in zip(labels, [w1, w2, w3, w4, w5])])
out = widgets.interactive_output(plot_hover_graph, dict(df = fixed(df), w1 = w1, w2 = w2, w3 = w3, w4 = w4, w5 = w5))

display(HBox([controls, out]))

HBox(children=(VBox(children=(VBox(children=(HTML(value='State', layout=Layout(display='flex', justify_content…