# Prep

In [2]:
import pandas as pd
import numpy as np
import pprint
import re

In [112]:
WORKOUTS_FILE = r'C:\Users\User\Documents\GitHub\ap_workouts\raw_data\uptodate_workouts.csv'

## Functions

### Extract

In [3]:
def get_raw_workouts(fp):
  all_workouts = []
  counter = 0

  with open(fp, encoding='utf-8') as f:
    clean_strings = [line.strip() for line in f.readlines()]
    no_empty_lines = list(filter(None, clean_strings))
    no_empty_lines.pop(0) # tirar a primeira linha, que é vazia

    single_workout = []
    for line in no_empty_lines:
      if 'Week' in line or 'Standalone' in line:
        if counter > 0:
          all_workouts.append(single_workout)
          single_workout = []
        counter += 1
      single_workout.append(line)

  return all_workouts

### Transform

In [None]:
def get_workout_dict(raw_workout_list):
    workout_listdict = []

    workout_counter = 0
    for workout in raw_workout_list:
        #print(f"Workout: {workout_counter}") # use to find errors

        workout_description = workout[0].split(';')
        description_info = [item.strip() for item in workout_description[0].strip('"').split('·')]

        # 
        workout_dict = {
            #'workout_counter': workout_counter, # use to find errors
            #'raw_description': f'{workout[0]}', # use to check for data quality
            'description': {
                'id': re.sub(r'\D', '', workout_description[1].strip('"')), #re.sub(r'\D', '', input_string) ## workout_description[1].strip('"').strip('h').strip(' ')
                'day_name': description_info[0],
                'day_number': description_info[1],
                'week_number': '',
                'plan_name': '',
                'plan_function': '',
                'start_time': workout_description[1].strip('"'),
                'duration': workout_description[2].strip('"')
            },
            'exercises': [],
            'clean_exercises': []
        }

        # Standalone workouts follow a different structure and need specific treatment
        if 'Standalone' in workout[0]:
            workout_dict['description']['week_number'] = '0'
            workout_dict['description']['plan_name'] = 'Standalone'
            workout_dict['description']['plan_function'] = description_info[2]
        else:
            workout_dict['description']['week_number'] = description_info[2]
            workout_dict['description']['plan_name'] = description_info[3]
            workout_dict['description']['plan_function'] = description_info[4]

        # 
        raw_current_exercise = {
            'exercise': '',
            'data': []
        }
        raw_exercises = []
        for line in workout[1:]:
            if len(line) > 13:
                if len(raw_current_exercise['exercise']) > 0:
                    raw_exercises.append(raw_current_exercise)
                raw_current_exercise = {'exercise': line, 'data': []}
            else:
                raw_current_exercise['data'].append(line)
        raw_exercises.append(raw_current_exercise) # necessary to append the last dict

        # 
        clean_exercises = []
        for e in raw_exercises:
            exercise_sets = []
            #print(f"workout: {workout_counter}  {e}") # use to find errors
            exercise_info = [item.strip() for item in e['exercise'].strip('"').split('·')]
            order_and_name = exercise_info[0].split(". ")

            clean_exercise = {
                #'info': exercise_info, # use to check data quality
                'workout_id': workout_dict['description']['id'],
                'exercise_number': order_and_name[0],
                'exercise_name': order_and_name[1],
                'equipment': exercise_info[1],
                'reps_goal': ''
            }

            if (len(exercise_info) > 2):
                clean_exercise['reps_goal'] = exercise_info[2]
            
            # add set data to main dict
            exercise_set_header = e['data'][0].split(';')
            for set in e['data'][1:]:
                complete_set = clean_exercise.copy()
                set_info = set.split(';')
                
                #clean_exercise['test'] = exercise_set_header # use to check data quality
                complete_set['set_number'] = set_info[0]

                if 'KG' in exercise_set_header:
                    complete_set['weigth'] = set_info[1]
                    complete_set['quantity'] = set_info[2]
                    complete_set['measure'] = 'reps'
                elif ('KG' not in exercise_set_header and 'REPS' in exercise_set_header):
                    complete_set['weigth'] = ''
                    complete_set['quantity'] = set_info[1]
                    complete_set['measure'] = 'reps'
                elif ('KG' not in exercise_set_header and 'SECS' in exercise_set_header):
                    complete_set['weigth'] = ''
                    complete_set['quantity'] = set_info[1]
                    complete_set['measure'] = 'secs'
                elif ('KG' not in exercise_set_header and 'MINS' in exercise_set_header):
                    complete_set['weigth'] = ''
                    complete_set['quantity'] = set_info[1]
                    complete_set['measure'] = 'mins'
                else:
                    complete_set['weigth'] = 'error'
                    complete_set['quantity'] = '-1'
                    complete_set['measure'] = 'error'

                exercise_sets.append(complete_set)
                
            clean_exercises.append(exercise_sets)
        
        workout_dict['exercises'] = raw_exercises
        workout_dict['clean_exercises'] = [item for sublist in clean_exercises for item in sublist]
        workout_counter += 1

        workout_listdict.append(workout_dict)
    
    return workout_listdict

# Extract

In [155]:
raw_workouts = get_raw_workouts(WORKOUTS_FILE)

In [43]:
#raw_workouts[1]

# Transform

In [156]:
workouts_dict = get_workout_dict(raw_workouts)

In [157]:
pprint.pprint(workouts_dict[1], sort_dicts=False) #['exercises']

{'description': {'id': '202408221648',
                 'day_name': 'PeiTriOm',
                 'day_number': 'Day 2',
                 'week_number': 'Week 1',
                 'plan_name': 'Transição',
                 'plan_function': 'Deload',
                 'start_time': '2024-08-22 16:48 h',
                 'duration': '1:09 hr'},
 'exercises': [{'exercise': '"1. Chest Press · Machine · 12 reps"',
                'data': ['#;KG;REPS', '1;26;12', '2;26;12', '3;26;15']},
               {'exercise': '"2. Incline Bench Press · Dumbbells · 12 reps"',
                'data': ['#;KG;REPS', '1;14;12', '2;14;12', '3;14;9']},
               {'exercise': '"3. Dips · Bodyweight · 12 reps";"Altura 15, uns '
                            '3 dedos acima do cotovelo. Descendo até encostar '
                            'a ponta do pé no chão"',
                'data': ['#;KG;REPS', '1;+0;10', '2;+0;8', '3;+0;5']},
               {'exercise': '"4. Lean Away Lateral Raises · Cable · 12 reps"',
  

In [None]:
pprint.pprint(workouts_dict[32]['clean_exercises'][0]['info'], sort_dicts=False)

['1. Bench Press', 'Barbell', '12 reps']


# Tests

In [None]:
#raw_workouts[2]

In [None]:
#pprint.pprint(workouts_dict[2], sort_dicts=False)