In [1]:
import polars as pl
# download latest data source 
import zipfile
from io import BytesIO
from urllib.request import urlopen
import shutil

### Reading files

In [15]:
import sys
from pathlib import Path
sys.path.append(str(Path().resolve().parent))
from steps import conf


In [16]:
s3_file_path = f"https://{conf.bucket_name}.s3.ap-southeast-2.amazonaws.com/{conf.parquet_file}"

df = pl.read_parquet(s3_file_path)
df = df.unique()

In [32]:
def calculate_unique_fields(df):
    unique_fields = {}
    for col in df.columns:
        unique_fields[col] = df[col].unique().to_list()
    return unique_fields

In [33]:
filter_mapping = calculate_unique_fields(df.unique())

In [42]:
from datetime import datetime
all_dates = sorted(filter_mapping.get('Date'))
all_dates = [datetime.strptime(date, "%Y-%m-%d") for date in all_dates]

In [47]:
df.select("Name").unique()

Name
str
"""Alona Vladi"""
"""Galina Solovya…"
"""Daniil Voronin…"
"""Aleksey Krasov…"
"""Margarita Ples…"
"""Sergey Timoshe…"
"""Vladimir Karav…"
"""Tatyana Altuni…"
"""Ruslan Gasanov…"
"""Ekaterina Gluk…"


In [52]:
df.filter((pl.col("Date") > "2022-05-11") & (pl.col("Name") == "Nam Tonthat"))

Name,Sex,Event,Equipment,Age,AgeClass,BirthYearClass,Division,BodyweightKg,WeightClassKg,Squat1Kg,Squat2Kg,Squat3Kg,Squat4Kg,Best3SquatKg,Bench1Kg,Bench2Kg,Bench3Kg,Bench4Kg,Best3BenchKg,Deadlift1Kg,Deadlift2Kg,Deadlift3Kg,Deadlift4Kg,Best3DeadliftKg,TotalKg,Place,Dots,Wilks,Glossbrenner,Goodlift,Tested,Country,State,Federation,ParentFederation,Date,MeetCountry,MeetState,MeetTown,MeetName
str,str,str,str,f64,str,str,str,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64,f64,f64,str,str,str,str,str,str,str,str,str,str
"""Nam Tonthat""","""M""","""SBD""","""Raw""",28.5,"""24-34""","""24-39""","""MR-O""",69.9,"""75""",-100.0,125.0,140.0,,140.0,90.0,100.0,105.0,,105.0,170.0,185.0,200.0,,200.0,445.0,"""1""",334.6,333.84,323.55,67.3,"""Yes""","""Australia""",,"""USAPL""",,"""2022-09-17""","""Australia""",,,"""Ethos Strength…"
"""Nam Tonthat""","""M""","""SBD""","""Raw""",28.5,"""24-34""","""24-39""","""MR-O""",66.6,"""67.5""",130.0,140.0,147.5,,147.5,102.5,105.0,107.5,,107.5,190.0,202.5,207.5,,207.5,462.5,"""4""",359.97,360.48,350.13,71.75,"""Yes""","""Australia""",,"""USAPL""",,"""2022-12-03""","""Australia""","""VIC""",,"""Australia Nati…"


In [17]:
users = df["Name"].unique().to_list()
users = [user for user in users if user == "Taylor Atwood"]

In [24]:
user_df = df.select(
    pl.col(['Date', 'Name', "TotalKg", "Event", "Best3SquatKg", "Best3BenchKg", "Best3DeadliftKg", "Wilks"])
).filter(
    pl.col('Name').is_in(users)
).sort(by="Date").drop_nulls()

user_df

Date,Name,TotalKg,Event,Best3SquatKg,Best3BenchKg,Best3DeadliftKg,Wilks
str,str,f64,str,f64,f64,f64,f64
"""2013-09-14""","""Taylor Atwood""",635.0,"""SBD""",212.5,172.5,250.0,453.11
"""2014-02-01""","""Taylor Atwood""",645.0,"""SBD""",215.0,170.0,260.0,462.19
"""2014-07-17""","""Taylor Atwood""",687.5,"""SBD""",235.0,180.0,272.5,496.92
"""2014-12-06""","""Taylor Atwood""",700.0,"""SBD""",250.0,185.0,265.0,493.37
"""2015-10-15""","""Taylor Atwood""",695.0,"""SBD""",237.5,185.0,272.5,501.86
"""2016-06-19""","""Taylor Atwood""",722.5,"""SBD""",255.0,190.0,277.5,521.01
"""2016-10-13""","""Taylor Atwood""",725.0,"""SBD""",260.0,187.5,277.5,523.52
"""2017-06-14""","""Taylor Atwood""",733.0,"""SBD""",265.5,185.0,282.5,528.12
"""2017-10-10""","""Taylor Atwood""",750.0,"""SBD""",262.5,192.5,295.0,542.2
"""2018-03-03""","""Taylor Atwood""",782.5,"""SBD""",280.0,192.5,310.0,554.01


In [25]:

# unpivot data for graphing
sbd_df = user_df.melt(
    id_vars= ["Date", "Name", "Event"],
    variable_name="Lift",
    value_vars=["Best3SquatKg", "Best3BenchKg", "Best3DeadliftKg"],
    value_name="Weight"
).to_pandas()

wilks_df = user_df.select(
    pl.col(["Date", "Name", "Wilks"])
).to_pandas()

In [26]:
wilks_df

Unnamed: 0,Date,Name,Wilks
0,2013-09-14,Taylor Atwood,453.11
1,2014-02-01,Taylor Atwood,462.19
2,2014-07-17,Taylor Atwood,496.92
3,2014-12-06,Taylor Atwood,493.37
4,2015-10-15,Taylor Atwood,501.86
5,2016-06-19,Taylor Atwood,521.01
6,2016-10-13,Taylor Atwood,523.52
7,2017-06-14,Taylor Atwood,528.12
8,2017-10-10,Taylor Atwood,542.2
9,2018-03-03,Taylor Atwood,554.01


In [4]:
schema = {'Name': pl.Utf8,
 'Sex': pl.Utf8,
 'Event': pl.Utf8,
 'Equipment': pl.Utf8,
 'Age': pl.Float64,
 'AgeClass': pl.Utf8,
 'BirthYearClass': pl.Utf8,
 'Division': pl.Utf8,
 'BodyweightKg': pl.Float64,
 'WeightClassKg': pl.Utf8,
 'Squat1Kg': pl.Float64,
 'Squat2Kg': pl.Float64,
 'Squat3Kg': pl.Float64,
 'Squat4Kg': pl.Float64,
 'Best3SquatKg': pl.Float64,
 'Bench1Kg': pl.Float64,
 'Bench2Kg': pl.Float64,
 'Bench3Kg': pl.Float64,
 'Bench4Kg': pl.Float64,
 'Best3BenchKg': pl.Float64,
 'Deadlift1Kg': pl.Float64,
 'Deadlift2Kg': pl.Float64,
 'Deadlift3Kg': pl.Float64,
 'Deadlift4Kg': pl.Float64,
 'Best3DeadliftKg': pl.Float64,
 'TotalKg': pl.Float64,
 'Place': pl.Utf8,
 'Dots': pl.Float64,
 'Wilks': pl.Float64,
 'Glossbrenner': pl.Float64,
 'Goodlift': pl.Float64,
 'Tested': pl.Utf8,
 'Country': pl.Utf8,
 'State': pl.Utf8,
 'Federation': pl.Utf8,
 'ParentFederation': pl.Utf8,
 'Date': pl.Utf8,
 'MeetCountry': pl.Utf8,
 'MeetState': pl.Utf8,
 'MeetTown': pl.Utf8,
 'MeetName': pl.Utf8}

In [21]:
data=pl.read_csv('data/openipf-2023-03-18/openipf-2023-03-18-16bd0ed1.csv', infer_schema_length=None)

op_data = pl.read_csv('data/openpowerlifting-2023-03-18/openpowerlifting-2023-03-18-16bd0ed1.csv', infer_schema_length=None)

In [22]:
op_data.write_parquet('data/openpowerlifting-2023-03-18/openpowerlifting-2023-03-18-16bd0ed1.parquet')

In [23]:
op_data = pl.read_parquet('data/openpowerlifting-2023-03-18/openpowerlifting-2023-03-18-16bd0ed1.parquet')

In [39]:
op_cols = [
    "Name",
    "Sex",
    "Event",
    "Date",
    "MeetCountry",
    "MeetState",
    "Equipment",
    "Best3SquatKg",
    "Best3BenchKg",
    "Best3DeadliftKg", 
    "TotalKg",
    "Wilks",
    "Tested",
    "Federation"
]

In [41]:
filtered_data = op_data.filter(pl.col('Name') == "Nam Tonthat").select(op_cols)

In [43]:
filtered_data

Name,Sex,Event,Date,MeetCountry,MeetState,Equipment,Best3SquatKg,Best3BenchKg,Best3DeadliftKg,TotalKg,Wilks,Tested,Federation
str,str,str,str,str,str,str,f64,f64,f64,f64,f64,str,str
"""Nam Tonthat""","""M""","""SBD""","""2022-09-17""","""Australia""",,"""Raw""",140.0,105.0,200.0,445.0,333.84,"""Yes""","""USAPL"""
"""Nam Tonthat""","""M""","""SBD""","""2022-12-03""","""Australia""","""VIC""","""Raw""",147.5,107.5,207.5,462.5,360.48,"""Yes""","""USAPL"""
"""Nam Tonthat""","""M""","""SBD""","""2021-04-11""","""Australia""","""VIC""","""Raw""",140.0,103.0,190.0,433.0,334.25,"""Yes""","""PA"""
