# Prep

In [134]:
import pandas as pd
import numpy as np

In [135]:
SILVER_FOLDER = r'C:\Users\User\Documents\GitHub\ap_workouts\b2-silver'
GOLD_FOLDER = r'C:\Users\User\Documents\GitHub\ap_workouts\b3-gold'

# Functions

## Transform

In [136]:
def extract_day_or_week(value):
    if value == "Standalone workout":
        return 0
    elif value.split()[-1].isdigit():
        return int(value.split()[-1])

In [137]:
def convert_to_minutes(duration):
    if "hr" in duration:
        if ':' in duration:
            hours, minutes = duration.replace(" hr", "").split(":")
            return int(hours) * 60 + int(minutes)
        else:
            return int(duration.replace(" hr", "")) * 60
    elif "min" in duration:
        return int(duration.replace(" min", ""))
    else:
        return None

In [138]:
def convert_to_seconds(duration):
    if pd.isna(duration):
        return 0
    elif ':' in duration:
        minutes, seconds = duration.split(":")
        return int(minutes) * 60 + int(seconds)
    else:
        return int(duration) * 60

In [139]:
def strip_measure(value):
    if pd.isna(value):
        return np.nan
    else:
        return value.split()[0]

# Extract

In [140]:
df_workouts_orig = pd.read_csv(f"{SILVER_FOLDER}/workouts.csv", sep=";", decimal=',')
df_exercises_log_orig = pd.read_csv(f"{SILVER_FOLDER}/exercises_log.csv", sep=";", decimal=',')
df_exercises_ref_orig = pd.read_csv(f"{SILVER_FOLDER}/exercises_ref.csv", sep=";")

# Transform

## Workouts

In [141]:
df_workouts_copied = df_workouts_orig.copy()

In [142]:
df_workouts_copied['day_number'] = df_workouts_copied['day_number'].apply(extract_day_or_week).astype('string')
df_workouts_copied['week_number'] = df_workouts_copied['week_number'].apply(extract_day_or_week).astype('string')
df_workouts_copied['start_time'] = pd.to_datetime(df_workouts_copied['start_time'], format="%Y-%m-%d %H:%M h")
df_workouts_copied['duration'] = df_workouts_copied['duration'].apply(convert_to_minutes)
df_workouts_copied['end_time'] = df_workouts_copied['start_time'] + pd.to_timedelta(df_workouts_copied['duration'], unit='m')

In [143]:
#df_workouts.iloc[:20]

## Exercises log

In [144]:
df_exercises_log_copied = df_exercises_log_orig.copy()

In [145]:
# convert values in minutes to seconds
mask_mins = (df_exercises_log_copied["measure"] == "mins")

df_exercises_log_copied['goal'] = df_exercises_log_copied['goal'].apply(strip_measure)
df_exercises_log_copied.loc[mask_mins, "goal"] = df_exercises_log_copied.loc[mask_mins, "goal"].apply(convert_to_seconds)
df_exercises_log_copied.loc[mask_mins, "quantity"] = df_exercises_log_copied.loc[mask_mins, "quantity"].apply(convert_to_seconds)
df_exercises_log_copied.loc[df_exercises_log_copied["measure"] == "mins", "measure"] = "secs"

# Save

In [147]:
df_workouts_copied.to_csv(f"{GOLD_FOLDER}/gold_workouts.csv", sep=";", decimal=',', index=False)
df_exercises_log_copied.to_csv(f"{GOLD_FOLDER}/gold_exerciseslog.csv", sep=";", decimal=',', index=False)