In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import csv
from powerlifting import models
from pydantic import ValidationError
from rich import pretty, print
import json

pretty.install()

# HI THERE COWBOY

In [2]:
# practicing scanning through directories to access target files
target_files = {"meet.csv", "entries.csv"}
folders = [x for x in Path('../data').iterdir() if x.is_dir()][:2]

file_paths = []
for folder in folders:
    for datafile in folder.iterdir():
        if datafile.name in target_files:
            file_paths.append(datafile.as_posix())

print(file_paths)

In [3]:
folders = [x for x in Path('../data').iterdir() if x.is_dir()]
columns = set()
not_found = 0
for folder in folders:
    fpath = folder.joinpath("entries.csv")
    try:
        with open(fpath, "r") as f:
            reader = csv.reader(f)
            for first_line in reader:
                for field in first_line:
                    columns.add(field)
                break
    except FileNotFoundError as e:
        not_found += 1
        continue

print(f"Folders without entries: {not_found}")

In [4]:
len(columns)

In [5]:
sorted(list(columns))

In [6]:
columns[:4]

TypeError: 'set' object is not subscriptable

In [None]:
folders[:2]

In [None]:
# getting parts of file paths, not sure if needed...
for fp in file_paths:
    parts = fp.split('/')
    print(parts)

In [None]:
practice_meet = "../data/2412/meet.csv"

meet = pd.read_csv(practice_meet)
meet = meet.to_dict(orient='records')[0]
meet

In [7]:
meet_info = models.Meet.New(meet)
meet_info

NameError: name 'meet' is not defined

In [8]:
practice_entries = "../data/2412/entries.csv"

entries = pd.read_csv(practice_entries)
entries = entries.replace(np.nan, None).to_dict(orient='records')
entries[0]

In [9]:
temp = {
    'Place': '1',
    'Name': 'Allison Wilson #1',
    'State': 'FL',
    'WeightClassKg': 'hi',
    'BodyweightKg': 60.7,
    'BirthDate': None,
    'Age': 17,
    'Squat1Kg': 110.0,
    'Squat2Kg': -115.0,
    'Squat3Kg': 115.0,
    'Best3SquatKg': 115.0,
    'Bench1Kg': 75.0,
    'Bench2Kg': 77.5,
    'Bench3Kg': -80.0,
    'Best3BenchKg': 77.5,
    'Deadlift1Kg': 122.5,
    'Deadlift2Kg': -130.0,
    'Deadlift3Kg': -130.0,
    'Best3DeadliftKg': 122.5,
    'TotalKg': 315.0,
    'Deadlift4Kg': None,
    'Event': 'SBD',
    'Equipment': 'Raw',
    'Sex': 'F',
    'Division': 'Juniors 16-17'
}

In [10]:
models.Athlete.New(entries[0])

In [11]:
def good_header(fpath: Path) -> bool:
    target_fields = ["Place","Name","State","WeightClassKg","BodyweightKg","BirthDate","Age","Squat1Kg","Squat2Kg","Squat3Kg","Best3SquatKg","Bench1Kg","Bench2Kg","Bench3Kg","Best3BenchKg","Deadlift1Kg","Deadlift2Kg","Deadlift3Kg","Best3DeadliftKg","TotalKg","Bench4Kg","Deadlift4Kg","Event","Equipment","Sex","Division"]
    with open(fpath.as_posix(), "r") as f:
        reader = csv.reader(f)
        header = next(reader)
    return True if  header == target_fields else False


In [12]:
valid_events = 0
athletes = []
for folder in folders:
    for file in folder.iterdir():
        if file.name == "entries.csv" and good_header(file):
            # read file into model
            valid_events += 1
            with open(file.as_posix(), "r") as f:
                reader = csv.DictReader(f)
                for row in reader:
                    athlete = models.Athlete.New(row)
                    athletes.append(athlete)

print(f"Total athletes collected: {len(athletes)} across {valid_events} valid events out of ({round((valid_events / len(folders)) * 100, 2)}%).")
athletes[:2]

In [13]:
max((athlete.best_bench for athlete in athletes if athlete.best_bench), key=lambda x: x*2)


In [14]:
sorted((a for a in athletes if a.best_bench), key=lambda x: x.best_bench, reverse=True)[:2]

In [15]:
final = [a for a in athletes if a.weight_class == 90.0]
len(final)

lst = models.AthleteList(athletes=final)

with open("90kilos_athletes.json", "w") as f:
    f.write(lst.json())

In [7]:
# practicing scanning through directories to access target files
target_files = "meet.csv"
folders = [x for x in Path('../data').iterdir() if x.is_dir()]

file_paths = []
for folder in folders:
    for datafile in folder.iterdir():
        if datafile.name == target_files:
            file_paths.append(datafile.as_posix())

print(file_paths[:4])

In [8]:
meets = []
for fpath in file_paths:
    with open(fpath, mode="r") as f:
        reader = csv.DictReader(f)
        for event in reader:
            meet = models.Meet.New(event)
            meets.append(meet)

meets[:2]

In [9]:
len(meets)