## ETL on `OpenPowerlifting` Data via `duckdb`

In [1]:
from IPython.display import display, Markdown
import polars as pl
from datetime import datetime as dt

# read configs
import sys
from pathlib import Path

sys.path.append(str(Path().resolve().parent))
from steps import conf

s3_file_path = (
    f"https://{conf.bucket_name}.s3.ap-southeast-2.amazonaws.com/{conf.parquet_file}"
)

In [2]:
df = pl.read_parquet(s3_file_path)

In [3]:
import duckdb
import pandas as pd

# No need to import duckdb_engine
#  jupysql will auto-detect the driver needed based on the connection string!

# Import jupysql Jupyter extension to create SQL cells

In [4]:
load_ext sql
%load_ext sql
%config SqlMagic.autopolars = True
%config SqlMagic.displaycon = False
%sql duckdb:// --alias duckdb-sqlalchemy
%sql duckdb:///:default:

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [5]:
%%sql
CREATE SCHEMA IF NOT EXISTS landing;
CREATE TABLE landing.openpowerlifting AS (
    SELECT
        *
    FROM read_parquet('https://powerlifting-ml-progress.s3.ap-southeast-2.amazonaws.com/openpowerlifting-latest.parquet')
);

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [None]:
%sql select * from landing.openpowerlifting limit 10;

Unnamed: 0,Name,Sex,Event,Equipment,Age,AgeClass,BirthYearClass,Division,BodyweightKg,WeightClassKg,...,Tested,Country,State,Federation,ParentFederation,Date,MeetCountry,MeetState,MeetTown,MeetName
0,Alona Vladi,F,SBD,Raw,33.0,24-34,24-39,O,58.3,60,...,Yes,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
1,Galina Solovyanova,F,SBD,Raw,43.0,40-44,40-49,M1,73.1,75,...,Yes,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
2,Daniil Voronin,M,SBD,Raw,15.5,16-17,14-18,T,67.4,75,...,Yes,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
3,Aleksey Krasov,M,SBD,Raw,35.0,35-39,24-39,O,66.65,75,...,Yes,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
4,Margarita Pleschenkova,M,SBD,Raw,26.5,24-34,24-39,O,72.45,75,...,Yes,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
5,Sergey Timoshenko,M,SBD,Raw,15.5,16-17,14-18,T,78.8,85,...,Yes,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
6,Vladimir Karavaev,M,SBD,Raw,57.5,55-59,50-59,M2,79.65,85,...,Yes,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
7,Tatyana Altunina,F,SBD,Raw,26.0,24-34,24-39,O,96.5,105,...,Yes,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
8,Ruslan Gasanov,M,SBD,Raw,31.5,24-34,24-39,O,102.55,105,...,,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
9,Ekaterina Glukhova,F,SBD,Wraps,25.5,24-34,24-39,O,54.8,55,...,Yes,Russia,,GFP,,2019-05-11,Russia,,Bryansk,Open Tournament
