# Powerlifting

In [37]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.offline as py
import plotly as pt
import plotly.express as px
from geopy.exc import GeocoderTimedOut 
from geopy.geocoders import Nominatim 
import plotly.graph_objs as go

In [38]:
#Processing libraries
from sklearn.preprocessing import StandardScaler,MinMaxScaler

# Model libraries
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB

# Testing libraries
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, GridSearchCV, learning_curve
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix, classification_report, roc_curve, auc

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
sns.set(font_scale=1.8)

ModuleNotFoundError: No module named 'sklearn'

In [None]:
powerlifting = pd.read_csv("Powerlifting/openpowerlifting.csv")
powerlifting.head()

Unnamed: 0,MeetID,Name,Sex,Equipment,Age,Division,BodyweightKg,WeightClassKg,Squat4Kg,BestSquatKg,Bench4Kg,BestBenchKg,Deadlift4Kg,BestDeadliftKg,TotalKg,Place,Wilks
0,0,Angie Belk Terry,F,Wraps,47.0,Mst 45-49,59.6,60.0,,47.63,,20.41,,70.31,138.35,1,155.05
1,0,Dawn Bogart,F,Single-ply,42.0,Mst 40-44,58.51,60.0,,142.88,,95.25,,163.29,401.42,1,456.38
2,0,Dawn Bogart,F,Single-ply,42.0,Open Senior,58.51,60.0,,142.88,,95.25,,163.29,401.42,1,456.38
3,0,Dawn Bogart,F,Raw,42.0,Open Senior,58.51,60.0,,,,95.25,,,95.25,1,108.29
4,0,Destiny Dula,F,Raw,18.0,Teen 18-19,63.68,67.5,,,,31.75,,90.72,122.47,1,130.47


In [None]:
meets = pd.read_csv("Powerlifting/meets.csv")
meets.head()

Unnamed: 0,MeetID,MeetPath,Federation,Date,MeetCountry,MeetState,MeetTown,MeetName
0,0,365strong/1601,365Strong,2016-10-29,USA,NC,Charlotte,2016 Junior & Senior National Powerlifting Cha...
1,1,365strong/1602,365Strong,2016-11-19,USA,MO,Ozark,Thanksgiving Powerlifting Classic
2,2,365strong/1603,365Strong,2016-07-09,USA,NC,Charlotte,Charlotte Europa Games
3,3,365strong/1604,365Strong,2016-06-11,USA,SC,Rock Hill,Carolina Cup Push Pull Challenge
4,4,365strong/1605,365Strong,2016-04-10,USA,SC,Rock Hill,Eastern USA Challenge


In [None]:
openpowerlifting = pd.merge(left=powerlifting, right=meets, how='inner', on='MeetID')
openpowerlifting.head()

Unnamed: 0,MeetID,Name,Sex,Equipment,Age,Division,BodyweightKg,WeightClassKg,Squat4Kg,BestSquatKg,...,TotalKg,Place,Wilks,MeetPath,Federation,Date,MeetCountry,MeetState,MeetTown,MeetName
0,0,Angie Belk Terry,F,Wraps,47.0,Mst 45-49,59.6,60.0,,47.63,...,138.35,1,155.05,365strong/1601,365Strong,2016-10-29,USA,NC,Charlotte,2016 Junior & Senior National Powerlifting Cha...
1,0,Dawn Bogart,F,Single-ply,42.0,Mst 40-44,58.51,60.0,,142.88,...,401.42,1,456.38,365strong/1601,365Strong,2016-10-29,USA,NC,Charlotte,2016 Junior & Senior National Powerlifting Cha...
2,0,Dawn Bogart,F,Single-ply,42.0,Open Senior,58.51,60.0,,142.88,...,401.42,1,456.38,365strong/1601,365Strong,2016-10-29,USA,NC,Charlotte,2016 Junior & Senior National Powerlifting Cha...
3,0,Dawn Bogart,F,Raw,42.0,Open Senior,58.51,60.0,,,...,95.25,1,108.29,365strong/1601,365Strong,2016-10-29,USA,NC,Charlotte,2016 Junior & Senior National Powerlifting Cha...
4,0,Destiny Dula,F,Raw,18.0,Teen 18-19,63.68,67.5,,,...,122.47,1,130.47,365strong/1601,365Strong,2016-10-29,USA,NC,Charlotte,2016 Junior & Senior National Powerlifting Cha...


In [None]:
openpowerlifting.describe()

Unnamed: 0,MeetID,Age,BodyweightKg,Squat4Kg,BestSquatKg,Bench4Kg,BestBenchKg,Deadlift4Kg,BestDeadliftKg,TotalKg,Wilks
count,386414.0,147147.0,384012.0,1243.0,298071.0,1962.0,356364.0,2800.0,317847.0,363237.0,362194.0
mean,5143.015804,31.668237,86.934912,107.036404,176.569941,45.722905,118.347509,113.597193,195.040633,424.000249,301.080601
std,2552.099838,12.900342,23.140843,166.97662,69.222785,151.668221,54.84885,170.201657,61.580675,196.355147,116.360396
min,0.0,5.0,15.88,-440.5,-477.5,-360.0,-522.5,-461.0,-410.0,11.0,13.73
25%,2979.0,22.0,70.3,87.5,127.5,-90.0,79.38,110.0,147.5,272.16,237.38
50%,5960.0,28.0,83.2,145.0,174.63,90.25,115.0,157.5,195.0,424.11,319.66
75%,7175.0,39.0,100.0,212.5,217.72,167.5,150.0,219.99,238.14,565.0,379.29
max,8481.0,95.0,242.4,450.0,573.79,378.75,488.5,418.0,460.4,1365.31,779.38


In [None]:
openpowerlifting.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 386414 entries, 0 to 386413
Data columns (total 24 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   MeetID          386414 non-null  int64  
 1   Name            386414 non-null  object 
 2   Sex             386414 non-null  object 
 3   Equipment       386414 non-null  object 
 4   Age             147147 non-null  float64
 5   Division        370571 non-null  object 
 6   BodyweightKg    384012 non-null  float64
 7   WeightClassKg   382602 non-null  object 
 8   Squat4Kg        1243 non-null    float64
 9   BestSquatKg     298071 non-null  float64
 10  Bench4Kg        1962 non-null    float64
 11  BestBenchKg     356364 non-null  float64
 12  Deadlift4Kg     2800 non-null    float64
 13  BestDeadliftKg  317847 non-null  float64
 14  TotalKg         363237 non-null  float64
 15  Place           385322 non-null  object 
 16  Wilks           362194 non-null  float64
 17  MeetPath  

In [None]:
openpowerlifting.isnull().sum()
#openpowerlifting.isna().any()

MeetID                 0
Name                   0
Sex                    0
Equipment              0
Age               239267
Division           15843
BodyweightKg        2402
WeightClassKg       3812
Squat4Kg          385171
BestSquatKg        88343
Bench4Kg          384452
BestBenchKg        30050
Deadlift4Kg       383614
BestDeadliftKg     68567
TotalKg            23177
Place               1092
Wilks              24220
MeetPath               0
Federation             0
Date                   0
MeetCountry            0
MeetState          72143
MeetTown           94000
MeetName               0
dtype: int64

## Data cleaning

In [43]:
#arrotondamento dell'eta
openpowerlifting['Age']=openpowerlifting['Age'].apply(np.floor)

## Data visualization

#### Male vs Female distribution in the dataset

In [None]:
labels = ['Male','Female']
colors = ['#1e90ff', '#E1396C']
gender = openpowerlifting['Sex'].value_counts()
values = list(gender.values)

trace = go.Pie(labels=labels, values=values,hoverinfo='label+percent',marker=dict(colors=colors, 
                           line=dict(color='#000000', width=2)))

py.iplot([trace], filename='gender_chart')

#### Age distribution of powerlifters

In [44]:
age = openpowerlifting['Age'].value_counts()
x = age.index
y = age.values

layout = go.Layout(
    title='Age distribution of Powerlifters',
    xaxis=dict(
        title='Age',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Number of Powerlifters',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

data = [go.Bar(
            x=x,
            y=y
    )]
py.iplot(go.Figure(data=data, layout=layout))

#### Distribution of Bodyweight of Powerlifters

In [46]:
val = openpowerlifting['BodyweightKg'].value_counts()
trace1 = go.Scatter(
    x = val.index,
    y = val.values,
    mode='markers',
    marker=dict(
        size=16,
        color = val.values, #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    )
)

layout = go.Layout(
    title='Weight distribution of Powerlifters',
    xaxis=dict(
        title='Weight in Kg',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Number of Powerlifters',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

data = [trace1]

py.iplot(go.Figure(data=data, layout=layout))

In [47]:
from plotly import tools
ag = openpowerlifting.groupby(['Age']).mean()
deadlift = go.Scatter(
    x = ag.index,
    y = ag['BestDeadliftKg'],
    mode='markers',
    name='Deadlift'
)
Squat = go.Scatter(
    x = ag.index,
    y = ag['BestSquatKg'],
    mode='markers',
    name='Squat'

)
Bench = go.Scatter(
    x = ag.index,
    y = ag['BestBenchKg'],
    mode='markers',
    name='Bench'
)

Total = go.Scatter(
    x = ag.index,
    y = ag['TotalKg'],
    mode='markers',
    name='Total'
 
)

fig = tools.make_subplots(rows=2, cols=2, subplot_titles=('Max Deadlift', 'Max Squat',
                                                          'Max Benchpress','Total Kgs'))

fig.append_trace(deadlift, 1, 1)
fig.append_trace(Squat, 1, 2)
fig.append_trace(Bench, 2, 1)
fig.append_trace(Total, 2, 2)


fig['layout'].update(height=800, width=1000, title='Age of Powerlifters' +
                                                  ' with respect to parameters')

py.iplot(fig, filename='sub')


plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead

