# Prep

In [2]:
import pandas as pd
import numpy as np
import pprint
import re

## Functions

### Extract

In [3]:
def get_raw_workouts(fp):
  all_workouts = []
  counter = 0

  with open(fp, encoding='utf-8') as f:
    clean_strings = [line.strip() for line in f.readlines()]
    no_empty_lines = list(filter(None, clean_strings))
    no_empty_lines.pop(0) # tirar a primeira linha, que é vazia

    single_workout = []
    for line in no_empty_lines:
      if 'Week' in line or 'Standalone' in line:
        if counter > 0:
          all_workouts.append(single_workout)
          single_workout = []
        counter += 1
      single_workout.append(line)

  return all_workouts

### Transform

In [97]:
def get_workout_dict(raw_workout_list):
    workout_listdict = []

    workout_counter = 0
    for workout in raw_workout_list:
        #print(f"Workout: {workout_counter}") # use to find errors

        workout_description = workout[0].split(';')
        description_info = [item.strip() for item in workout_description[0].strip('"').split('·')]

        # 
        workout_dict = {
            #'workout_counter': workout_counter, # use to find errors
            #'raw_description': f'{workout[0]}', # use to check for data quality
            'description': {
                'day_name': description_info[0],
                'day_number': description_info[1],
                'week_number': '',
                'plan_name': '',
                'plan_function': '',
                'start_time': workout_description[1].strip('"'),
                'duration': workout_description[2].strip('"')
            },
            'exercises': [],
            'clean_exercises': []
        }

        # Standalone workouts follow a different structure and need specific treatment
        if 'Standalone' in workout[0]:
            workout_dict['description']['week_number'] = '0'
            workout_dict['description']['plan_name'] = 'Standalone'
            workout_dict['description']['plan_function'] = description_info[2]
        else:
            workout_dict['description']['week_number'] = description_info[2]
            workout_dict['description']['plan_name'] = description_info[3]
            workout_dict['description']['plan_function'] = description_info[4]

        # 
        raw_current_exercise = {
            'exercise': '',
            'data': []
        }
        raw_exercises = []
        for line in workout[1:]:
            if len(line) > 13:
                if len(raw_current_exercise['exercise']) > 0:
                    raw_exercises.append(raw_current_exercise)
                raw_current_exercise = {'exercise': line, 'data': []}
            else:
                raw_current_exercise['data'].append(line)
        raw_exercises.append(raw_current_exercise) # necessary to append the last dict

        # 
        clean_exercises = []
        for e in raw_exercises:
            exercise_sets = [] # será que é isso 
            #print(f"workout: {workout_counter}  {e}") # use to find errors
            exercise_info = [item.strip() for item in e['exercise'].strip('"').split('·')]
            order_and_name = exercise_info[0].split(". ")

            clean_exercise = {
                #'info': exercise_info, # use to check data quality
                'exercise_number': order_and_name[0],
                'exercise_name': order_and_name[1],
                'equipment': exercise_info[1],
                'reps_goal': ''
            }

            if (len(exercise_info) > 2):
                clean_exercise['reps_goal'] = exercise_info[2]
            
            # add set data to main dict
            exercise_set_header = e['data'][0].split(';')
            for set in e['data'][1:]:
                complete_set = clean_exercise.copy()
                
                set_info = set.split(';')
                #print(f"{clean_exercise['exercise_order']}, {set_info[0]}")
                
                #clean_exercise['test'] = exercise_set_header # use to check data quality
                complete_set['set_number'] = set_info[0]

                #print(complete_set)
                exercise_sets.append(complete_set) # mudei aqui
                
            clean_exercises.append(exercise_sets) # veremos
        

        workout_dict['exercises'] = raw_exercises
        workout_dict['clean_exercises'] = clean_exercises
        workout_counter += 1

        workout_listdict.append(workout_dict)
    
    return workout_listdict

# Extract

In [33]:
raw_workouts = get_raw_workouts(r'G:\My Drive\Projetos\workouts\2024_08_25 Workouts.csv')

In [43]:
#raw_workouts[1]

# Transform

In [108]:
workouts_dict = get_workout_dict(raw_workouts)
pprint.pprint(workouts_dict[2], sort_dicts=False)

{'description': {'day_name': 'CosBi',
                 'day_number': 'Day 1',
                 'week_number': 'Week 1',
                 'plan_name': 'Transição',
                 'plan_function': 'Deload',
                 'start_time': '2024-08-20 16:21 h',
                 'duration': '57 min'},
 'exercises': [{'exercise': '"1. Lat Pulldowns with Wide Overhand Grip · Cable '
                            '· 12 reps"',
                'data': ['#;KG;REPS', '1;60;12', '2;60;12', '3;60;8']},
               {'exercise': '"2. One-Arm Rows · Dumbbells · 12 reps"',
                'data': ['#;KG;REPS', '1;20;12', '2;20;12', '3;20;12']},
               {'exercise': '"3. Reverse Butterfly with Wide Grip · Machine · '
                            '12 reps"',
                'data': ['#;KG;REPS', '1;30;10', '2;30;10', '3;30;10']},
               {'exercise': '"4. Pullovers · Cable · 12 reps"',
                'data': ['#;KG;REPS', '1;47;12', '2;47;12', '3;47;11']},
               {'exercise': '"5

In [None]:
pprint.pprint(workouts_dict[32]['clean_exercises'][0]['info'], sort_dicts=False)

['1. Bench Press', 'Barbell', '12 reps']


# Tests

In [None]:
#raw_workouts[2]

In [None]:
#pprint.pprint(workouts_dict[2], sort_dicts=False)