## 1. Scrape Exercises

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import copy
import os

### 1.1 Function to extract: title, body part, difficulty, equipment

In [2]:
def save_exercise_images(soup, title):
    link1 = soup.find('div', {'class': 'exercise-card__image'}).attrs['style'].split("'")[1]
    link2 = link1.strip('1.jpg') + '2.jpg'
    
    file_names = []
    for link in [link1, link2]:
        file_name = '{}_{}'.format(title.replace(" ", "").replace("/", "_").replace("\\", "_").replace(",", "_").replace("'", "_" ), link.split('/')[-1])
#         file_name = link.split('/')[-1]
        file_names.append(file_name)
        with open(os.path.join('images', file_name), "wb") as f:
            f.write(requests.get(link).content)
    
    return file_names
    

In [3]:
def scrape_exercise_page(soup, exercises_df):

    for card in soup.find_all("a", {"class": "exercise-card"}):
        title = card.find("h2", {"class": "exercise-card__title"}).get_text()

        exercise_info_text_base = "exercise-info__term exercise-info__term--"
        body_part = card.find("div", {"class": exercise_info_text_base + "body-part"}).dd.get_text().split(', ')

        difficulty = card.find("div", {"class": exercise_info_text_base + "difficulty"}).span.get_text()

        equipment = card.find("div", {"class": exercise_info_text_base + "equipment"}).dd.get_text().split(', ')

        # save images
        image_names = save_exercise_images(soup, title)
        
        exercises_df = exercises_df.append({"title": title, 
                                            "body_part": body_part, 
                                            "difficulty": difficulty, 
                                            "equipment": equipment, 
                                            "image1": image_names[0], 
                                            "image2": image_names[1]}, ignore_index=True)
        
    return exercises_df

### 1.2 Scrape data from website to dataframe

In [4]:
exercises_df = pd.DataFrame()

for difficulty in ['beginner', 'intermediate', 'advanced']:
    url = "https://www.acefitness.org/education-and-resources/lifestyle/exercise-library/experience/{}/".format(difficulty)

    page_number = 1
    while True:
        page = requests.get("{}/?page={}".format(url, page_number))
        soup = BeautifulSoup(page.content, 'html.parser')
        extracted_page_number = int(soup.find_all("script", {"type": "text/javascript"})[-4].contents[0].split("exerciseFilter.selectedPage = ")[-1].split(";")[0])
        if page_number == extracted_page_number:
            exercises_df = scrape_exercise_page(soup, exercises_df)
            page_number += 1
        else:
            break

In [5]:
# Move title column to front
exercises_df = exercises_df[['title'] + [col for col in exercises_df.columns if col != 'title']]
exercises_df

Unnamed: 0,title,body_part,difficulty,equipment,image1,image2
0,Triceps Pressdown,[Arms],Beginner,[Resistance Bands/Cables],TricepsPressdown_3-1.jpg,TricepsPressdown_3-2.jpg
1,Chest Press,"[Arms, Chest, Shoulders]",Beginner,"[Barbell, Bench]",ChestPress_3-1.jpg,ChestPress_3-2.jpg
2,Supine Pelvic Tilts,[Abs],Beginner,[No Equipment],SupinePelvicTilts_3-1.jpg,SupinePelvicTilts_3-2.jpg
3,Supermans,"[Back, Butt/Hips, Shoulders]",Beginner,[No Equipment],Supermans_3-1.jpg,Supermans_3-2.jpg
4,Bent Knee Push-up,"[Arms, Chest, Shoulders]",Beginner,[No Equipment],BentKneePush-up_3-1.jpg,BentKneePush-up_3-2.jpg
...,...,...,...,...,...,...
325,Waiter's Carry,[Full Body/Integrated],Advanced,[Kettlebells],Waiter_sCarry_374-1.jpg,Waiter_sCarry_374-2.jpg
326,Single Arm Swing,[Full Body/Integrated],Advanced,[Kettlebells],SingleArmSwing_392-1.jpg,SingleArmSwing_392-2.jpg
327,Single Arm Overhead Squat,"[Back, Full Body/Integrated, Legs - Thighs, Sh...",Advanced,[Kettlebells],SingleArmOverheadSquat_392-1.jpg,SingleArmOverheadSquat_392-2.jpg
328,Halo,"[Back, Butt/Hips, Chest, Full Body/Integrated,...",Advanced,[Kettlebells],Halo_392-1.jpg,Halo_392-2.jpg


### 1.3 Save data

In [6]:
exercises_df.to_csv(os.path.join('data', 'exercises.csv'), index=False)

## 2. Filtering Data

In [7]:
df = copy.deepcopy(exercises_df)
df = df.join(exercises_df['body_part'].str.join(',').str.get_dummies(sep=',').astype(bool).add_prefix('body_part__'))
df = df.join(exercises_df['equipment'].str.join(',').str.get_dummies(sep=',').astype(bool).add_prefix('equipment__'))

In [8]:
body_parts = [col[len('body_part__'):] for col in df.columns if 'body_part__' in col]
body_parts

['Abs',
 'Arms',
 'Back',
 'Butt/Hips',
 'Chest',
 'Full Body/Integrated',
 'Legs - Calves and Shins',
 'Legs - Thighs',
 'Neck',
 'Shoulders']

In [9]:
equipments = [col[len('equipment__'):] for col in df.columns if 'equipment__' in col]
equipments

['BOSU Trainer',
 'Barbell',
 'Bench',
 'Cones',
 'Dumbbells',
 'Heavy Ropes',
 'Hurdles',
 'Kettlebells',
 'Ladder',
 'Medicine Ball',
 'No Equipment',
 'Pull up bar',
 'Raised Platform/Box',
 'Resistance Bands/Cables',
 'Stability Ball',
 'TRX',
 'Weight Machines / Selectorized']

In [10]:
difficulties = list(exercises_df['difficulty'].unique())
difficulties

['Beginner', 'Intermediate', 'Advanced']

In [11]:
df.to_csv(os.path.join('data', 'exercises_expanded.csv'), index=False)

In [12]:
df[df['difficulty'] == 'Beginner'].title.values

array(['Triceps Pressdown', 'Chest Press', 'Supine Pelvic Tilts',
       'Supermans', 'Bent Knee Push-up', 'Cat-Cow', 'Cobra Exercise',
       'Ankle Flexion ', 'Incline Chest Press', 'Lateral Raise',
       'Step-up', 'Wrist Curl - Extension', 'Wrist Curl - Flexion',
       'Wrist Supination & Pronation Exercises', 'Hip Hinge',
       'Incline Reverse Fly', 'Side Lying Hip Abduction',
       'Side Lying Hip Adduction', 'Seated Biceps Curl',
       'Seated High Back Row ', 'Seated Medicine Ball Trunk Rotations',
       'Seated Row ', 'Glute Bridge Exercise', 'Crunch',
       'Contralateral Limb Raises', 'Front Raise', 'Triceps Kickback',
       'Stability Ball Sit-ups / Crunches', 'Stability Ball Wall Squats',
       'Standing Calf Raises - Wall', 'Half-kneeling Hay Baler',
       'Side Plank - modified', 'Dirty Dog',
       'Forward Stepping over Cones ', 'Single Leg Stand', 'Squat Jumps',
       'Supine Hollowing with Lower Extremity Movements',
       'Supine Rotator Cuff ', 'Supine