In [5]:
import polars as pl

In [100]:
cycle_df = pl.read_csv("./data/cycling_workout_codes.csv")
run_df = pl.read_csv("./data/running_workout_codes.csv")
swim_df = pl.read_csv("./data/swimming_workout_codes.csv")
brick_df = pl.read_csv("./data/brick_workout_codes.csv")

In [101]:
import re

def format_workout(input_str: str) -> str:
    output_str = input_str

    output_str = output_str.replace(" in ", " ")
    output_str = output_str.replace(",", " ")
    output_str = output_str.replace(";", " ")
    output_str = output_str.replace("x", "×")
    
    ## Store groups of parenthesis
    groups = re.findall(r'\(([^)]+)', output_str)
    for i, group in enumerate(groups):
        output_str = output_str.replace(f"({group})", f"_GROUP{i}_\n")

    ## Split on zones
    zones = re.findall(r'Z[1-5][^/]', output_str)
    for zone in zones: 
        output_str = output_str.replace(zone, f"{zone}\n")
    
    ## Ensure new line after rest
    rests = re.findall(r'[0-9]*.?rest', output_str)
    for rest in rests: 
        output_str = output_str.replace(rest, f"{rest}\n")
    
    ## Restore parenthesis
    for i, group in enumerate(groups):
        output_str = output_str.replace(f"_GROUP{i}_", f"({group})")
    
    ## Clean output spacing
    clean_output = ""
    for line in output_str.split("\n"):
        if line.strip() != "":
            clean_output += line.strip() + "\n"

    return clean_output


if __name__ == '__main__':
    print(format_workout('10 min Z1 2 x (20 min Z2/5 min Z1) 10 min Z1'))
    print(format_workout('5 min in Z1 15 min Z2'))
    print(format_workout('15 min Z1, 5 x (2.5 min Z4/5 min Z1), 10 min Z1'))
    print(format_workout('5 min Z1;5 min Z2;12 × (1 min Z5/2 min Z1);5 min Z1'))
    print(format_workout('250 Z1 400 Z3/120" rest 200 Z3/120" rest 1000 Z1'))

10 min Z1
2 × (20 min Z2/5 min Z1)
10 min Z1

5 min Z1
15 min Z2

15 min Z1
5 × (2.5 min Z4/5 min Z1)
10 min Z1

5 min Z1
5 min Z2
12 × (1 min Z5/2 min Z1)
5 min Z1

250 Z1
400 Z3/120" rest
200 Z3/120" rest
1000 Z1



In [103]:
cycle_df = cycle_df.with_columns([
    (pl.col("DESCRIPTION").map_elements(format_workout, return_dtype=str)).alias("DESCRIPTION")
])
run_df = run_df.with_columns([
    (pl.col("DESCRIPTION").map_elements(format_workout, return_dtype=str)).alias("DESCRIPTION")
])
swim_df = swim_df.with_columns([
    (pl.col("DESCRIPTION").map_elements(format_workout, return_dtype=str)).alias("DESCRIPTION")
])
brick_df = brick_df.with_columns([
    (pl.col("DESCRIPTION").map_elements(format_workout, return_dtype=str)).alias("DESCRIPTION")
])

# cycle_df.filter(pl.col("CLEAN_DESCRIPTION") == "").glimpse()
brick_df.glimpse()

Rows: 6
Columns: 8
$ CODE        <str> 'BR1', 'BR2', 'BR3', 'BR4', 'BR5', 'BR6'
$ DURATION    <i64> 90, 120, 61, 89, 112, 135
$ Z1          <i64> 30, 30, 15, 20, 20, 20
$ Z2          <i64> 60, 90, None, None, None, None
$ Z3          <i64> None, None, 46, 69, 92, 115
$ Z4          <str> None, None, None, None, None, None
$ Z5          <str> None, None, None, None, None, None
$ DESCRIPTION <str> 'Bike 20 min Z1\n2 × (Run 10 min Z2 → Bike 20 min Z2)\n10 min Z1\n', 'Bike 20 min Z1\n3 × (Run 10 min Z2 → Bike 20 min Z2)\n10 min Z1\n', 'Bike 15 min Z1\n2 × (Run 7 min Z3/3 min Z1 → Bike 10 min Z3/3 min Z1)\n', 'Bike 15 min Z1\n3 × (Run 7 min Z3/3 min Z1 → Bike 10 min Z3/3 min Z1)\n5 min Z1\n', 'Bike 15 min Z1\n4 × (Run 7 min Z3/3 min Z1 → Bike 10 min Z3/3 min Z1)\n5 min Z1\n', 'Bike 15 min Z1\n5 × (Run 7 min Z3/3 min Z1 → Bike 10 min Z3/3 min Z1)\n5 min Z1\n'



In [98]:
# for row in swim_df.rows(named=True):
#     if "rest" in row["DESCRIPTION"]:
#         print(row["CODE"], row["DESCRIPTION"], sep="\t")
#         print(row["CLEAN_DESCRIPTION"])

    

In [None]:
with open("/Users/thomastone/elevated-training/backend/data/swim_workout_codes.csv", "w") as f:
    swim_df.write_csv(f)
with open("/Users/thomastone/elevated-training/backend/data/cycle_workout_codes.csv", "w") as f:
    cycle_df.write_csv(f)
with open("/Users/thomastone/elevated-training/backend/data/run_workout_codes.csv", "w") as f:
    run_df.write_csv(f)
with open("/Users/thomastone/elevated-training/backend/data/brick_workout_codes.csv", "w") as f:
    brick_df.write_csv(f)