# Prep

In [1]:
import pandas as pd
import numpy as np

In [2]:
SILVER_FOLDER = r'C:\Users\User\Documents\GitHub\ap_workouts\b2-silver'
GOLD_FOLDER = r'C:\Users\User\Documents\GitHub\ap_workouts\b3-gold'

# Functions

## Transform

In [3]:
def extract_day_or_week(value):
    if value == "Standalone workout":
        return 0
    elif value.split()[-1].isdigit():
        return int(value.split()[-1])

In [4]:
def convert_to_minutes(duration):
    if "hr" in duration:
        if ':' in duration:
            hours, minutes = duration.replace(" hr", "").split(":")
            return int(hours) * 60 + int(minutes)
        else:
            return int(duration.replace(" hr", "")) * 60
    elif "min" in duration:
        return int(duration.replace(" min", ""))
    else:
        return None

In [5]:
def convert_to_seconds(duration):
    if pd.isna(duration):
        return 0
    elif ':' in duration:
        minutes, seconds = duration.split(":")
        return int(minutes) * 60 + int(seconds)
    else:
        return int(duration) * 60

In [6]:
def strip_measure(value):
    if pd.isna(value):
        return np.nan
    else:
        return value.split()[0]

# Extract

In [7]:
df_workouts_orig = pd.read_csv(f"{SILVER_FOLDER}/workouts.csv", sep=";", decimal=',')
df_exercises_log_orig = pd.read_csv(f"{SILVER_FOLDER}/exercises_log.csv", sep=";", decimal=',')
df_exercises_ref_orig = pd.read_csv(f"{SILVER_FOLDER}/exercises_ref.csv", sep=";")

# Transform

## Workouts

In [8]:
df_workouts_copied = df_workouts_orig.copy()

In [9]:
df_workouts_copied['day_number'] = df_workouts_copied['day_number'].apply(extract_day_or_week).astype('string')
df_workouts_copied['week_number'] = df_workouts_copied['week_number'].apply(extract_day_or_week).astype('string')
df_workouts_copied['start_time'] = pd.to_datetime(df_workouts_copied['start_time'], format="%Y-%m-%d %H:%M h")
df_workouts_copied['duration'] = df_workouts_copied['duration'].apply(convert_to_minutes)
df_workouts_copied['end_time'] = df_workouts_copied['start_time'] + pd.to_timedelta(df_workouts_copied['duration'], unit='m')

In [10]:
#df_workouts.iloc[:20]

## Exercises log

In [16]:
df_exercises_log_copied = df_exercises_log_orig.copy()

In [13]:
# convert values in minutes to seconds
mask_mins = (df_exercises_log_copied["measure"] == "mins")

df_exercises_log_copied['goal'] = df_exercises_log_copied['goal'].apply(strip_measure)
df_exercises_log_copied.loc[mask_mins, "goal"] = df_exercises_log_copied.loc[mask_mins, "goal"].apply(convert_to_seconds)
df_exercises_log_copied.loc[mask_mins, "quantity"] = df_exercises_log_copied.loc[mask_mins, "quantity"].apply(convert_to_seconds)
df_exercises_log_copied.loc[df_exercises_log_copied["measure"] == "mins", "measure"] = "secs"

In [13]:
df_exercises_log_copied['goal'] = df_exercises_log_copied['goal'].fillna(0).astype('int64')
df_exercises_log_copied['quantity'] = df_exercises_log_copied['quantity'].replace('-', 0).astype('int64')
df_exercises_log_copied['weight'] = df_exercises_log_copied['weight'].str.replace("+", "").str.replace(",", ".").replace('-', 0).fillna(0).astype('float64')

In [14]:
df_exercises_log_copied['volume'] = df_exercises_log_copied['weight'] * df_exercises_log_copied['quantity']

In [15]:
# when an exercise uses dumbbells, multiply its weight by 2 to yield total weight

df_exercises_log_copied['total_weight'] = df_exercises_log_copied['weight']

mask_dumbbells = (df_exercises_log_copied["equipment"] == "Dumbbells")
df_exercises_log_copied.loc[mask_dumbbells, "total_weight"] = df_exercises_log_copied.loc[mask_dumbbells, "total_weight"] * 2

In [16]:
df_exercises_log_copied = df_exercises_log_copied[[
    'workout_id',
    'exercise_number',
    'exercise_name',
    'equipment',
    'goal',
    'measure',
    'set_number',
    'weight',
    'total_weight',
    'quantity',
    'volume'
]]

# Save

In [17]:
df_workouts_copied.to_csv(f"{GOLD_FOLDER}/workouts.csv", sep=";", decimal=',', index=False)
df_exercises_log_copied.to_csv(f"{GOLD_FOLDER}/exercises_log.csv", sep=";", decimal=',', index=False)