# Preliminary Analysis/Plotting

In [None]:
from os.path import join
import numpy as np
import pandas as pd

from bokeh.plotting import figure, show
from bokeh.layouts import column
from bokeh.io import output_notebook, curdoc
from bokeh.models import ColumnDataSource, Span, FactorRange
from bokeh.transform import jitter
from bokeh.palettes import *

output_notebook()
curdoc().theme = 'light_minimal'

In [None]:
var = 'height'

### Load Data

In [None]:
wc = pd.read_csv(join('..', 'data', 'pro', 'workout_statistics.csv'))
#take only Men's and Women's individual divisions, workouts with >5 recorded heights
wc = wc[(wc.divisionNumber < 3) & (wc['N_'+var] > 5)]
#ignore this filler workout
wc = wc[wc.workoutName != 'Stage 1 Points']
display(wc)

In [None]:
#colors for plots categorizing only by these Comp-Gender combos
competitionDivision2color = {
    'Games: Men': Paired8[1],
    'Games: Women': Paired8[5],
    'Open: Men': Paired8[3],
    'Open: Women': Paired8[7]
}
#functions for computing alpha and size values for markers, from p values
psize = lambda p: 10 - 6*np.power(p, 1/4)
palpha = lambda p: 1 - (3/4)*np.power(p, 1/3)

### Rank Correlations for Individual Workouts

In [None]:
wc['competitionDivisionColor'] = wc.competitionDivision.map(competitionDivision2color)
wc['p_size'] = psize(wc['p_'+var])
wc['p_alpha'] = palpha(wc['p_'+var])

In [None]:
p = figure(
    title=f'Rank Correlation—Athlete {var.title()} & Workout Placement',
    y_axis_label='Spearman Rank Correlation',
    height=400,
    width=1600,
    x_range=sorted(wc.competitionDivision.unique()),
    active_scroll='wheel_zoom',
    active_drag='pan',
    tooltips=[
        ('year', '@year'),
        ('workout', '@workoutName'),
        ('correlation', '@c_'+var),
        ('p-value', '@p_'+var),
        ('N', '@N_'+var)
    ]
)
p.scatter(
    x=jitter('competitionDivision', width=0.6, range=p.x_range),
    y='c_'+var,
    fill_color='competitionDivisionColor',
    fill_alpha='p_alpha',
    line_color='black',
    line_alpha='p_alpha',
    size='p_size',
    source=ColumnDataSource(wc)
)
span = Span(
    dimension='width',
    location=0,
    line_color='gray',
    line_width=2,
    line_alpha=0.75
)
p.add_layout(span)
p.y_range.flipped = True
span.level = 'underlay'
show(p)

In [None]:
figs = []
for K, df in wc.groupby('competitionDivision'):
    
    df = df.sort_values(['c_'+var]).reset_index(drop=True)
    source = dict(x=np.arange(len(df)))
    for col in df.columns:
        source[col] = df[col].values
    
    p = figure(
        title=f'Rank Correlation—Athlete {var.title()} & Workout Placement ({K})',
        y_axis_label='Spearman Rank Correlation',
        height=250,
        width=1600,
        tooltips=[
            ('year', '@year'),
            ('workout', '@workoutName'),
            ('correlation', '@c_'+var),
            ('p-value', '@p_'+var),
            ('N', '@N_'+var)
        ]
    )
    p.vbar(
        x='x',
        top='c_'+var,
        fill_color='competitionDivisionColor',
        fill_alpha='p_alpha',
        line_color='gray',
        line_alpha='p_alpha',
        width='p_alpha',
        source=ColumnDataSource(source)
    )
    span = Span(
        dimension='width',
        location=0,
        line_color='gray',
        line_width=2,
        line_alpha=0.75
    )
    p.add_layout(span)
    span = Span(
        dimension='height',
        location=(df['c_'+var] < 0).sum() - 0.5,
        line_color='gray',
        line_width=1
    )
    p.add_layout(span)
    p.y_range.flipped = True
    span.level = 'underlay'
    figs.append(p)
show(column(figs))


In [None]:
figs = []
for K, df in wc.groupby('competitionDivision'):

    df.sort_values(['year', 'workoutNumber'], inplace=True)
    source = dict(
        x=list(
            zip(
                df.year.astype(str),
                df.workoutNumber.astype(str)
            )
        )
    )
    for col in df.columns:
        source[col] = df[col].values
    palette = Category10_7
    source['color'] = list(map(lambda i: palette[i % len(palette)], df.year))

    p = figure(
        title=f'Rank Correlation—Athlete {var.title()} & Workout Placement ({K})',
        y_axis_label='Spearman Rank Correlation',
        height=350,
        width=1600,
        x_range=FactorRange(
            *source['x'],
            group_padding=2
        ),
        tooltips=[
            ('year', '@year'),
            ('workout', '@workoutName'),
            ('correlation', '@c_'+var),
            ('p-value', '@p_'+var),
            ('N', '@N_'+var)
        ]
    )
    p.vbar(
        x='x',
        top='c_'+var,
        width='p_alpha',
        fill_color='color',
        fill_alpha='p_alpha',
        line_color='gray',
        line_alpha='p_alpha',
        source=ColumnDataSource(source)
    )
    span = Span(
        dimension='width',
        location=0,
        line_color='gray',
        line_width=2,
        line_alpha=0.75
    )
    p.add_layout(span)
    span.level = 'underlay'
    p.xaxis.major_label_text_alpha = 0
    p.y_range.flipped = True
    figs.append(p)
figs = column(figs)
show(figs)

### Rank Correlations for Entire Events

In [None]:
ec = pd.read_csv(join('..', 'data', 'pro', 'competition_statistics.csv'))
ec = ec[(ec.divisionNumber < 3) & (ec['N_'+var] > 5)]
ec['p_alpha'] = 1 - ec['p_'+var]/2
ec['competitionDivisionColor'] = ec.competitionDivision.map(competitionDivision2color)

In [None]:
df = ec.sort_values(['year', 'competitionDivision'])
source = dict(
    x=list(
        zip(
            df.year.astype(str),
            df.competitionDivision
        )
    )
)
for col in df.columns:
    source[col] = df[col].values

p = figure(
    title=f'Rank Correlation—Althete {var.title()} & Event Placement',
    y_axis_label='Spearman Rank Correlation',
    height=400,
    width=1600,
    x_range=FactorRange(
        *source['x'],
        group_padding=3
    ),
    tooltips=[
        ('year', '@year'),
        ('division', '@divisionName'),
        ('competition', '@competitionType'),
        ('correlation', '@c_'+var),
        ('p-value', '@p_'+var),
        ('N', '@N_'+var)
    ]
)
p.vbar(
    x='x',
    top='c_'+var,
    width='p_alpha',
    fill_color='competitionDivisionColor',
    fill_alpha='p_alpha',
    line_color='gray',
    line_alpha='p_alpha',
    legend_group='competitionDivision',
    source=ColumnDataSource(source)
)
span = Span(
    dimension='width',
    location=0,
    line_color='gray',
    line_width=2,
    line_alpha=0.75
)
p.add_layout(span)
span.level = 'underlay'
p.xaxis.major_label_text_alpha = 0
p.y_range.flipped = True
show(p)