# INTRODUCTION

## Imports

In [1]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

## Environment variables

In [None]:
PATH = 'data/openpowerlifting-2020-10-16/openpowerlifting-2020-10-16.csv'
TOP_TO_SHOW = 10

In [137]:
# Imports
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go


# Functions
def get_weight_classes(federation, sex):
    """
    Get weight classes for a given sex and federation.

    :param str federation: federation to take weight classes from. 'IPF' or 'WRPF'.
    :param str sex: sex to take weight classes from. 'M' or 'F'.
    :return list bins:
    :return list labels:
    """
    bins = []
    labels = []
    if federation == 'IPF':
        if sex == 'M':
            bins = [0.0, 59.0, 66.0, 74.0, 83.0, 93.0, 105.0, 120.0, 1000.0]
            labels = ['59', '66', '74', '83', '93', '105', '120', '120+']
        elif sex == 'F':
            bins = [0.0, 47.0, 52.0, 57.0, 63.0, 72.0, 84.0, 1000.0]
            labels = ['47', '52', '57', '63', '72', '84', '84+']

    elif federation == 'WRPF':
        if sex == 'M':
            bins = [0.0, 56.0, 60.0, 67.5, 75.0, 82.5, 90.0, 100.0, 110.0, 125.0, 140.0, 1000.0]
            labels = ['56', '60', '67.5', '75', '82.5', '90', '100', '110', '125', '140', '140+']
        elif sex == 'F':
            bins = [0.0, 44.0, 48.0, 52.0, 56.0, 60.0, 67.5, 75.0, 82.5, 90.0, 1000.0]
            labels = ['44', '48', '52', '56', '60', '67.5 ', '75', '82.5', '90', '90+']

    return bins, labels


def download_data():
    a = 1
    return a


def load_data(path):
    # Load data
    data = pd.read_csv(path,
                       header=0,
                       names=['Name',
                              'Sex',
                              'Event',
                              'Equipment',
                              'Age',
                              'AgeClass',
                              'BirthYearClass',  #
                              'Division',
                              'Bodyweight',      # 'Bodyweight'
                              'WeightClass',     # 'WeightClassKg'
                              'Squat1',          # 'Squat1Kg'
                              'Squat2',          # 'Squat2Kg'
                              'Squat3',          # 'Squat3Kg'
                              'Squat4',          # 'Squat4Kg'
                              'Squat',           # 'Best3SquatKg'
                              'Bench1',          # 'Bench1Kg'
                              'Bench2',          # 'Bench2Kg'
                              'Bench3',          # 'Bench3Kg'
                              'Bench4',          # 'Bench4Kg'
                              'Bench',           # 'Best3BenchKg'
                              'Deadlift1',       # 'Deadlift1Kg'
                              'Deadlift2',       # 'Deadlift2Kg'
                              'Deadlift3',       # 'Deadlift3Kg'
                              'Deadlift4',       # 'Deadlift4Kg'
                              'Deadlift',        # 'Best3DeadliftKg'
                              'Total',           # 'TotalKg'
                              'Place',
                              'Dots',
                              'Wilks',
                              'Glossbrenner',
                              'Goodlift',
                              'Tested',
                              'Country',
                              'Federation',
                              'ParentFederation',
                              'Date',
                              'MeetCountry',
                              'MeetState',
                              'MeetTown',
                              'Meet'               # 'MeetName'
                              ],
                       usecols=['Name',
                                'Country',
                                'Sex',
                                'Age',
                                'Bodyweight',
                                'WeightClass',
                                'Date',
                                'Federation',
                                'ParentFederation',
                                'Meet',
                                'Event',
                                'Equipment',
                                'Squat1',
                                'Squat2',
                                'Squat3',
                                'Squat',
                                'Bench1',
                                'Bench2',
                                'Bench3',
                                'Bench',
                                'Deadlift1',
                                'Deadlift2',
                                'Deadlift3',
                                'Deadlift',
                                'Total',
                                'Wilks'
                                ],
                       dtype={'Name': 'str',
                              'Country': 'str',
                              'Sex': 'str',
                              'Age': 'float',
                              'Bodyweight': 'float',
                              'WeightClass': 'str',
                              'Federation': 'str',
                              'ParentFederation': 'str',
                              'Meet': 'str',
                              'Event': 'str',
                              'Equipment': 'str',
                              'Squat1': 'float',
                              'Squat2': 'float',
                              'Squat3': 'float',
                              'Squat': 'float',
                              'Bench1': 'float',
                              'Bench2': 'float',
                              'Bench3': 'float',
                              'Bench': 'float',
                              'Deadlift1': 'float',
                              'Deadlift2': 'float',
                              'Deadlift3': 'float',
                              'Deadlift': 'float',
                              'Total': 'float',
                              'Wilks': 'float'
                              },
                       parse_dates=['Date']
                       )

    # Perform some universal cleaning
    data = data.loc[data['Event'] == 'SBD']
    data = data.loc[data['Sex'] != 'Mx']
    #data = data.loc[data['Equipment'].isin(['Raw', 'Wraps'])]

    # Drop null values
    data = data.dropna(subset=['Squat', 'Bench', 'Deadlift', 'Total'])

    return data


def clean_data(data, federation, equipment):
    """
    Clean data using the filters selected by the user.

    :param pandas.DataFrame data: raw data from all the meets.
    :param federation:
    :param equipment:
    :return:
    """
    # Copy data
    df = data.copy()

    # Obtain weight classes
    men_bins, men_labels = get_weight_classes(federation=federation, sex='M')
    women_bins, women_labels = get_weight_classes(federation=federation, sex='F')

    # Clean weight classes
    df.loc[df['Sex'] == 'M', 'WeightClass'] = pd.cut(df.loc[df['Sex'] == 'M', 'Bodyweight'],
                                                     bins=men_bins,
                                                     labels=men_labels
                                                     )
    df.loc[df['Sex'] == 'F', 'WeightClass'] = pd.cut(df.loc[df['Sex'] == 'F', 'Bodyweight'],
                                                     bins=women_bins,
                                                     labels=women_labels
                                                     )

    # Filter by equipment
    if equipment == 'Raw':
        df = df.loc[df['Equipment'] == 'Raw']

    # Sort data
    df = df.sort_values(by='Wilks', ascending=False)

    return df


def get_best_lifts_per_weightclass(data, lift, sex, n=10):
    """
    Get n best lifts for weight class and sex.

    :param pandas.DataFrame data: raw data from all the meets.
    :param str sex: sex to filter. 'M' or 'F'.
    :param str lift: lift to track.
    :param int n: number of lifters to keep of each weight class.
    :return pandas.DataFrame df: data from n best lifts for weight class and sex.
    """
    # Perform the filter and the groupings
    df = data[data['Sex'] == sex]\
        .sort_values(by=['WeightClass', 'Name', lift], ascending=False)\
        .groupby(['WeightClass', 'Name'], as_index=False).first()\
        .sort_values(by=['WeightClass', lift], ascending=False)\
        .groupby('WeightClass', as_index=False).head(n)

    return df


def get_lift_plot_per_weightclass(fig, data, lift, weight_classes, colors, row, col, showlegend=False):
    for i,wc in enumerate(weight_classes):
        df = data[data['WeightClass'] == wc]
        fig.add_trace(
            go.Scatter(x=df['Bodyweight'],
                       y=df[lift],
                       customdata=df['Date'],
                       mode='markers',
                       name=wc,
                       marker=dict(color=colors[i]),
                       hovertext=df['Name'],
                       hovertemplate='<b>%{hovertext}</b><br>Bodyweight: %{x}kg <br>' + lift + ': %{y}kg<br>Date: %{customdata|%Y-%m-%d}<extra></extra>',
                       legendgroup='WeightClass',
                       showlegend=showlegend
                       ),
            row=row,
            col=col
        )
    return fig


def plot_best_lifts_per_weightclass(data, sex, federation, n):
    """
    Plot n best lifts for weight class and sex.

    :param pandas.DataFrame data: data with n best lifts for weight class and sex.
    :param str sex: sex to filter. 'M' or 'F'.
    :param str federation: federation to take weight classes from. 'IPF' or 'WRPF'.
    :param int n: number of lifters to keep of each weight class.
    :return:
    """
    # Get weight classes
    _, weight_classes = get_weight_classes(federation=federation, sex=sex)
    
    # Get colors
    colors = px.colors.qualitative.Dark24

    # Get best lifts
    df_s = get_best_lifts_per_weightclass(data, lift='Squat', sex=sex, n=n)
    df_b = get_best_lifts_per_weightclass(data, lift='Bench', sex=sex, n=n)
    df_d = get_best_lifts_per_weightclass(data, lift='Deadlift', sex=sex, n=n)
    df_t = get_best_lifts_per_weightclass(data, lift='Total', sex=sex, n=n)

    # Make figure
    fig = make_subplots(rows=1, 
                        cols=4,
                        subplot_titles=['<b>Squat</b>','<b>Bench</b>','<b>Deadlift</b>','<b>Total</b>']
                        )
    
    # Add plots of lifts
    fig = get_lift_plot_per_weightclass(fig, 
                                        data=df_s,
                                        lift='Squat',
                                        weight_classes=weight_classes,
                                        colors=colors,
                                        row=1,
                                        col=1,
                                        showlegend=True
                                        )
    fig = get_lift_plot_per_weightclass(fig, 
                                        data=df_b,
                                        lift='Bench',
                                        weight_classes=weight_classes,
                                        colors=colors,
                                        row=1,
                                        col=2,
                                        showlegend=False
                                        )
    fig = get_lift_plot_per_weightclass(fig, 
                                        data=df_d,
                                        lift='Deadlift',
                                        weight_classes=weight_classes,
                                        colors=colors,
                                        row=1,
                                        col=3,
                                        showlegend=False
                                        )
    fig = get_lift_plot_per_weightclass(fig, 
                                        data=df_t,
                                        lift='Total',
                                        weight_classes=weight_classes,
                                        colors=colors,
                                        row=1,
                                        col=4,
                                        showlegend=False
                                        )

    return fig


In [136]:
plot_best_lifts_per_weightclass(data, 'M', 'IPF', 10)

In [138]:
data = load_data('data/openpowerlifting-2020-10-16/openpowerlifting-2020-10-16.csv')
#data = clean_data(data, 'IPF', 'Raw')
#data.head()

In [140]:
data['Equipment'].value_counts()

Single-ply    785573
Raw           401484
Wraps         130417
Multi-ply      43317
Unlimited         36
Name: Equipment, dtype: int64

In [141]:
data[data['Equipment'] == 'Unlimited']

Unnamed: 0,Name,Sex,Event,Equipment,Age,Bodyweight,WeightClass,Squat1,Squat2,Squat3,...,Deadlift2,Deadlift3,Deadlift,Total,Wilks,Country,Federation,ParentFederation,Date,Meet
264690,Chris Abney,M,SBD,Unlimited,46.0,102.97,110,,,,...,,,229.06,601.01,361.67,USA,SPF,,2019-07-13,Arkansas Strength Expo
270932,Kris Treadway,M,SBD,Unlimited,,97.89,100,,,,...,,,226.8,621.42,381.51,,SPF,,2019-03-30,Little Rock Classic
675124,Mariya Zhuravleva,F,SBD,Unlimited,24.0,67.1,67.5,135.0,147.5,-152.5,...,140.0,-155.0,140.0,397.5,407.43,Russia,WPC-RUS,WPC,2018-04-07,Siberian Bear
695225,Aleksandr Boytsevskiy,M,SBD,Unlimited,31.0,104.8,110,200.0,230.0,250.0,...,230.0,-250.0,230.0,710.0,424.55,,WPC-RUS,WPC,2018-12-09,CIS New Years Cup
1034520,Artem Bykhovets,M,SBD,Unlimited,35.0,88.6,90,300.0,320.0,340.0,...,260.0,280.0,280.0,900.0,579.23,Russia,NAP,IPA,2018-09-28,Golden Tiger Classic
1048259,Rob Forell,M,SBD,Unlimited,39.0,96.98,100,-328.85,328.85,365.14,...,-290.3,290.3,290.3,1009.24,622.08,USA,XPC,XPC,2020-03-07,Arnold
1048265,Bryan Beanland,M,SBD,Unlimited,39.0,99.34,100,385.55,-408.23,408.23,...,-317.51,-317.51,306.17,1031.92,629.69,USA,XPC,XPC,2020-03-07,Arnold
1048266,Trinton Golden,M,SBD,Unlimited,34.0,109.5,110,-453.59,453.59,,...,317.51,-353.8,317.51,1088.62,641.55,USA,XPC,XPC,2020-03-07,Arnold
1424904,Logan Shady,M,SBD,Unlimited,22.0,132.45,140,,,,...,,,215.46,714.41,402.75,,MM,,2020-08-08,National Championships
1424906,Chance Beatty,M,SBD,Unlimited,29.0,123.38,125,,,,...,,,238.14,807.39,461.33,USA,MM,,2020-08-08,National Championships
