## Preprocessing 01 (Working with Sampling)
#### 1. Aquisition with Crawling
#### 2. Tranformation included Cleansing & Denoising
#### 3. Imputation

In [1]:
import sys
import ssl
from urllib.request import Request, urlopen
from datetime import datetime

def crawling(url='', encoding='utf-8', err=lambda e: print(f'{e} : {datetime.now()}', file=sys.stderr)):
    try:
        req = Request(url)
        ssl._create_default_https_context = ssl._create_unverified_context

        resp = urlopen(req)
        recv = resp.read().decode(encoding, errors='replace')

        print(f'{datetime.now()}: success for request [{url}]')
        return recv
    except Exception as e:
        err(e)

In [2]:
from functools import reduce
from itertools import count

resfetch = []
for idx in count(start=1):
    l = len(resfetch)
    resfetch[l:] = list(filter(
        lambda s: s.startswith('#'),
        reduce(
            lambda param, func: func(param),
            [lambda txt: [] if txt is None else txt.splitlines(), lambda strs: [f(s) for f in [str.strip] for s in strs]],
            crawling(
                'https://raw.githubusercontent.com/kickscar/HELLCHANG-PRACTICES/main/training-record/Week0%02d.md' % idx,
                err=lambda e: None
            )
        )
    ))
    if len(resfetch) == l:
        break

print(resfetch)

2022-08-29 18:05:59.026935: success for request [https://raw.githubusercontent.com/kickscar/HELLCHANG-PRACTICES/main/training-record/Week001.md]
2022-08-29 18:05:59.575483: success for request [https://raw.githubusercontent.com/kickscar/HELLCHANG-PRACTICES/main/training-record/Week002.md]
2022-08-29 18:05:59.998913: success for request [https://raw.githubusercontent.com/kickscar/HELLCHANG-PRACTICES/main/training-record/Week003.md]
2022-08-29 18:06:00.451756: success for request [https://raw.githubusercontent.com/kickscar/HELLCHANG-PRACTICES/main/training-record/Week004.md]
2022-08-29 18:06:00.854885: success for request [https://raw.githubusercontent.com/kickscar/HELLCHANG-PRACTICES/main/training-record/Week005.md]
2022-08-29 18:06:01.208116: success for request [https://raw.githubusercontent.com/kickscar/HELLCHANG-PRACTICES/main/training-record/Week006.md]
2022-08-29 18:06:01.676415: success for request [https://raw.githubusercontent.com/kickscar/HELLCHANG-PRACTICES/main/training-reco

In [3]:
import re

restransform = []
for res in resfetch:
    if res.startswith('####'):
        # 1. split name and record
        name, record = (s.strip() for s in res.replace('####', '').strip().split(':'))

        # 2. cleansing name
        name = re.sub(r'[\d+\\.]', '', name).strip()
        p = re.compile(r'([a-zA-Z\s\',]+)\s*\[([a-zA-Z\s\',]+)\]').match(name)
        if p is not None:
            name, tool = p.groups()
            name = ''.join([name.strip(), f'[{tool.strip()}]']).title()
        # sval, unit = re.compile(r'(\d+.\d*)\s*(.*)').match(re.sub(r"[\([{})\]\s]", "", val)).groups()

        restransform[len(restransform):] = [(date, name, *re.split(r'\s+', record.strip()))]
    elif res.startswith('##'):
        date = datetime.strptime(res.replace('##', '').strip(), '%Y/%m/%d')

print(restransform)

[(datetime.datetime(2021, 6, 24, 0, 0), 'Flat Bench Press[Machine, Smith]', '40kg', '12reps', '5sets'), (datetime.datetime(2021, 6, 24, 0, 0), 'Seated Chest Press[Machine]', '20kg', '12reps', '5sets'), (datetime.datetime(2021, 6, 24, 0, 0), 'Lat Pulldown[Machine]', '20kg', '12reps', '5sets'), (datetime.datetime(2021, 6, 24, 0, 0), 'Seated Low Row[Cable]', '15kg', '12reps', '3sets'), (datetime.datetime(2021, 6, 24, 0, 0), 'Assisted Pullup[Machine]', '-50kg', '12reps', '3sets'), (datetime.datetime(2021, 6, 24, 0, 0), 'Shoulder Press[Machine]', '5kg', '12reps', '5sets'), (datetime.datetime(2021, 6, 25, 0, 0), 'Low Pulley[Machine]', '20kg', '12reps', '5sets'), (datetime.datetime(2021, 6, 25, 0, 0), 'Assisted Pullup[Machine]', '-50kg', '12reps', '5sets'), (datetime.datetime(2021, 6, 25, 0, 0), 'Shoulder Press[Machine]', '15kg', '12reps', '5sets(5th', 'failed)'), (datetime.datetime(2021, 6, 25, 0, 0), 'Leg Extension[Machine]', '20kg', '12reps', '3sets,', '25kg', '12reps', '2sets,', '30kg', '

In [4]:
# Normalization
import pickle
import os

dictfile = os.path.join(os.getcwd(), '../dataset/', 'exercise-dict.pkl')
exercisedict = None
with open(dictfile, 'rb') as f:
    exercisedict = pickle.load(f)

print(exercisedict)

{'equipments-dialects': {'Dumbbell': [], 'Machine': ['Machin']}, 'exercises': {'Arnold Press': {'dialects': [], 'muscle-groups': 'Chest', 'record-type': ['Weight', 'Repetion', 'Set']}, 'Assisted Dip': {'dialects': [], 'equipment-dialects': {'Machine': ['']}, 'muscle-groups': 'Chest', 'recrod-type': ['Weight', 'Repetition', 'Set']}, 'Assisted Pull-Up': {'dialects': ['Assisted Pullup'], 'muscle-groups': 'Lats', 'recrod-type': ['Weight', 'Repetition', 'Set']}, 'Back Extension': {'dialects': ['Back Extensions'], 'muscle-groups': 'Back', 'recrod-type': ['Repetition', 'Set']}, 'Body-Weight Squat': {'dialects': ['Basic Squat', 'Basic Squats', 'Air Squat'], 'muscle-groups': 'Quadriceps', 'recrod-type': ['Repetition', 'Set']}}}


In [5]:
# Which Exercise?

exs = [t[1] for t in restransform]
dataset_exs = [(ex, exs.count(ex)) for ex in list(set(exs))]

sorted_by_ex = sorted(dataset_exs, key=lambda t: t[0])
for d in sorted_by_ex:
    print(d)

('Arnold Press[Dumbbell]', 16)
('Assisted Dip[Machine]', 14)
('Assisted Dips[Machine]', 25)
('Assisted Pullup[Machine]', 55)
('Back Extension', 36)
('Basic Squat', 4)
('Basic Squats', 1)
('Behind The Neck Press[Machine, Smith]', 3)
('Bent Over Lateral Raise[Dumbbell]', 2)
('Bent Over Row[Barbell]', 5)
('Bent Over Row[Dumbbell]', 14)
('Biceps Curls[Ezbar]', 1)
('Biceps Curls[Fixed Barbell]', 1)
('Biceps Curls[Machine]', 13)
('Cable Chest Fly[Cable]', 1)
('Cable Cross-Over[Cable]', 5)
('Cable Triceps Pushdown[Cable]', 8)
('Chest Fly[Machine, Pec Deck]', 39)
('Chest Press[Machine]', 2)
('Cross Lunges', 1)
('Dead Bug', 2)
('Deadlift[Barbell]', 84)
('Deadlift[Dumbbell]', 8)
('Deadlift[Kettlebell]', 12)
('Decline Bench Press[Barbell]', 10)
('Dips', 18)
('Face Pulls[Cable]', 1)
('Flat Bench Press[Barbell]', 53)
('Flat Bench Press[Dumbbel]', 1)
('Flat Bench Press[Dumbbell]', 2)
('Flat Bench Press[Free]', 1)
('Flat Bench Press[Machine, Smith]', 7)
('Flat Chest Press[Barbell]', 1)
('Flat Chest P

In [6]:
# and How Often?
sorted_by_count = sorted(dataset_exs, key=lambda t: t[1], reverse=True)
for d in sorted_by_count:
    print(d)

('Lying Leg Curls[Machine]', 112)
('Lat Pulldown[Machine]', 98)
('Plank', 93)
('Seated Chest Press[Machine]', 86)
('Deadlift[Barbell]', 84)
('Hip Adduction[Machine]', 81)
('Hip Abduction[Machine]', 74)
('Shoulder Press[Machine]', 73)
('Leg Extension[Machine]', 62)
('Seated Row[Machine]', 62)
('Side Lateral Raise[Dumbbell]', 60)
('Assisted Pullup[Machine]', 55)
('Flat Bench Press[Barbell]', 53)
('Power Leg Press[Machine]', 50)
('Straight Arm Pulldown[Cable]', 39)
('Chest Fly[Machine, Pec Deck]', 39)
('One Leg Deadlift[Kettlebell]', 36)
('Back Extension', 36)
('Running', 33)
('Seated Low Row[Cable]', 33)
('Wide Pulldown Rear[Machine]', 33)
('Side-lying Hip Abduction', 32)
('Incline Bench Press[Machine, Smith]', 25)
('Assisted Dips[Machine]', 25)
('Wrist Roller', 22)
('Long Pull Row[Cable]', 19)
('Incline Chest Press[Machine]', 19)
('Dips', 18)
('Incline Bench Press[Barbell]', 17)
('Reverse Pec Deck Fly[Machine, Pec Deck]', 16)
('Incline Bench Press[Machine]', 16)
('Arnold Press[Dumbbell]