In [1]:
import pandas as pd
import re
# import data sets 
# fitness exercises 
fitness_df = pd.read_csv("fit_exercises.csv")
# activities 
act_df = pd.read_csv("activities.csv")

In [2]:
# some last minute clean up before merging 
fitness_df = fitness_df.iloc[: , 1:] # delete unnecessary index column 
act_df = act_df.iloc[: , 1:] # delete unnecessary index column 
# add additional columns, and imputations needed in fitness df  
# assume fitness (with specific equipment) exercises will build muscle strength (upper and/or lower)
fitness_df["Build Muscle Strength"] = 1
fitness_df 

Unnamed: 0,Bodypart,Equipment,Exercise,Target,Gym,Strengthen Core,Upper Body Gains,Bigger Booty,Stronger Legs,Build Muscle Strength
0,waist,body weight,3/4 sit-up,abs,0,1,0,0,0,1
1,waist,body weight,45° side bend,abs,0,1,0,0,0,1
2,waist,body weight,air bike,abs,0,1,0,0,0,1
3,upper legs,body weight,all fours squad stretch,quads,0,0,0,1,1,1
4,waist,body weight,alternate heel touchers,abs,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
1322,chest,body weight,wide-grip chest dip on high parallel bars,pectorals,0,0,1,0,0,1
1323,waist,body weight,wind sprints,abs,0,1,0,0,0,1
1324,upper legs,body weight,world greatest stretch,hamstrings,0,0,0,1,1,1
1325,lower arms,body weight,wrist circles,forearms,0,0,1,0,0,1


In [3]:
act_df

Unnamed: 0,Exercise,Intensity,Gym,Daily Tasks,Water Sports,Racing,Horse,Dance,Target,Combat,Net,Ball or Puck,Improve Endurance,Strengthen Core,Improve Flexibility,Improve Balance,Build Muscle Strength,Upper Body Gains,Bigger Booty,Stronger Legs
0,"Running, 10.9 mph (5.5 min mile)",High,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1
1,"Cross country skiing, uphill",High,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1
2,"Running, 10 mph (6 min mile)",High,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1
3,"Cycling, >20 mph, racing",High,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1
4,"Skin diving, fast",High,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,"Sit, playing with animals",Low,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
244,Pushing stroller or walking with children,Low,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
245,Croquet,Low,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
246,"Walking, under 2.0 mph, very slow",Low,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1


In [4]:
# merge data sets 
frames = [fitness_df, act_df]

df = pd.concat(frames)
df

Unnamed: 0,Bodypart,Equipment,Exercise,Target,Gym,Strengthen Core,Upper Body Gains,Bigger Booty,Stronger Legs,Build Muscle Strength,...,Water Sports,Racing,Horse,Dance,Combat,Net,Ball or Puck,Improve Endurance,Improve Flexibility,Improve Balance
0,waist,body weight,3/4 sit-up,abs,0,1,0,0,0,1,...,,,,,,,,,,
1,waist,body weight,45° side bend,abs,0,1,0,0,0,1,...,,,,,,,,,,
2,waist,body weight,air bike,abs,0,1,0,0,0,1,...,,,,,,,,,,
3,upper legs,body weight,all fours squad stretch,quads,0,0,0,1,1,1,...,,,,,,,,,,
4,waist,body weight,alternate heel touchers,abs,0,1,0,0,0,1,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,,,"Sit, playing with animals",0,0,0,0,0,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
244,,,Pushing stroller or walking with children,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
245,,,Croquet,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
246,,,"Walking, under 2.0 mph, very slow",0,0,0,0,0,1,1,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [5]:
df.columns

Index(['Bodypart', 'Equipment', 'Exercise', 'Target', 'Gym', 'Strengthen Core',
       'Upper Body Gains', 'Bigger Booty', 'Stronger Legs',
       'Build Muscle Strength', 'Intensity', 'Daily Tasks', 'Water Sports',
       'Racing', 'Horse', 'Dance', 'Combat', 'Net', 'Ball or Puck',
       'Improve Endurance', 'Improve Flexibility', 'Improve Balance'],
      dtype='object')

In [6]:
# columns that will be considered as numeric features in model later on 
feature_columns = ['Strengthen Core',
       'Upper Body Gains', 'Bigger Booty', 'Stronger Legs',
       'Build Muscle Strength', 'Daily Tasks', 'Water Sports',
       'Racing', 'Horse', 'Dance', 'Combat', 'Net', 'Ball or Puck',
       'Improve Endurance', 'Improve Flexibility', 'Improve Balance']
# if NaN entry --> impute it to be zero for data processing in model
for col in feature_columns:
    df[col] = df[col].fillna(0)

## Data Pre-Processing 

### Biometric 
- if age is greater than 60, no high intensity exercises recommended to user
- if BMI is greater than 30, no high intensity exercises recommended to user because they are considered obese and may have physical limitations from completing these types of exercises 

### Fitness Goals 
- if want to loose weight/fat —> recommend medium or high intensity  
- if want to gain weight —> recommend low or medium intensity
- if want to maintain weight --> no sub-filters 

### Gym Access 
- if have access to a gym --> no filtration 
- if no access to a gym --> only recommend activities that do not involve a gym, filter out gym activities 

### Current Exercise 
- If > 6 – Highly regular: only high intensity  
- If 2-6 – Regular : low-high intensity 
- < 2 – Sporadic : low-medium intensity 
- 0 – No exercise : low intensity 

# User Questionnaire 

## Biometric Questions

In [7]:
# Question 1
gender = input("What is your sex (M / F)?")
# Question 2
age = int(input("What is your age? (years)"))
# Question 3
weight = float(input("What is your weight (lbs)?"))
# Question 4
height = input("What is your height? (feet (ft), inches (in))")

What is your sex (M / F)?F
What is your age? (years)30
What is your weight (lbs)?120
What is your height? (feet (ft), inches (in))5,5


In [8]:
# calculate user height 
hlist = height.split(",") # split feet and remaining inches given for height 
feet = int(hlist[0]) # feet portion 
inch = int(hlist[1]) # remaining inches portion 
# calculate total height in inches 
tot_height = feet*12 + inch 
tot_height

65

In [9]:
# calculate user BMI 
BMI = (weight / (tot_height * tot_height)) * 703
BMI

19.966863905325443

## Fitness Goal Questions 

In [10]:
# Question 5
print("What is your body weight goal? \n")
weight_goals = input("\n A) Lose weight/fat \n B) Gain weight \n C) Maintain weight")

## Subfiltering section ##
# BMI and age based intensity filtration 
if BMI > 30 or age >= 60: 
    # filter out high intensity exercise 
    df = df[df['Intensity'] != 'High']
# search through response
# if they are not above 60 or overweight -- filter by loose weight 'A' selection: 
elif re.search("A|a", weight_goals):
    # they want to loose weight, time to filter out low intensity activities 
    # we want to recommend intense activities that will burn more calories per kg 
    df = df[df['Intensity'] != 'Low']
# if they don't put A and instead put B (gain weight):
elif re.search("B|b", weight_goals):
    # they want to gain weight, time to filter out high intensity
    df = df[df['Intensity'] != 'High']
# if they want to maintain weight, no change or filtration 

What is your body weight goal? 


 A) Lose weight/fat 
 B) Gain weight 
 C) Maintain weightA


In [11]:
# Question 6 
## Subfiltering section ## 
gym = input("Do you have access to a gym (Y / N)?")
if re.search("N|n", gym):
    # filter out gym based activities 
    df = df[df['Gym'] != 1] # Gym Required (1); Gym not required (0)

Do you have access to a gym (Y / N)?Y


In [12]:
# Question 7 
print("How many times per a week do you exercise?")
print("\n A) Greater than 6 times a week, very regular")
print("\n B) About 2-6 times a week, regular")
print("\n C) Less than 2 times a week, sporadic")
print("\n D) Don't really exercise at all")
excise_amt = input(" ")
## subfiltering section ## 
## response evaluation ## - no age or BMI dependence at this stage
if re.search("A|a", excise_amt):
    # time to filter out low intensity 
    df = df[df['Intensity'] != 'Low']
elif re.search("C|c", excise_amt):
    # time to filter out high intensity 
    df = df[df['Intensity'] != 'High']
elif re.search("D|d", excise_amt):
    # time to filter out medium and high intensity 
    df = df[df['Intensity'] != 'High']
    df = df[df['Intensity'] != 'Medium']

How many times per a week do you exercise?

 A) Greater than 6 times a week, very regular

 B) About 2-6 times a week, regular

 C) Less than 2 times a week, sporadic

 D) Don't really exercise at all
 B


In [13]:
user_dict = {} # user dictionary of attributes 
# User's feature data collected 
print("What are your fitness goals? \n Please enter the letters of all that apply:")
print("\n A) Strengthen core \n B) Upper body gains \n C) Bigger booty")
print("\n D) Stronger legs \n E) Improve flexibility \n F) Improve endurance")
fit_goals = input(" ")
# search through response 
# ----- Strengthen Core? ------------------
if re.search("A|a", fit_goals):
    # strengthen core dict key --> value = 1 
    user_dict["Strengthen Core"] = 1
else:
    user_dict["Strengthen Core"] = 0 
# ----- Upper Body Gains? -----------------
if re.search("B|b", fit_goals):
    # upper body gains dict key --> value = 1 
    user_dict["Upper Body Gains"] = 1
    user_dict["Build Muscle Strength"] = 1
else: 
    user_dict["Upper Body Gains"] = 0 
    user_dict["Build Muscle Strength"] = 0 
# ------ Bigger Booty? ----------------------
if re.search("C|c", fit_goals):
    # bigger booty dict key  --> value = 1
    user_dict["Bigger Booty"] = 1
else:
    user_dict["Bigger Booty"] = 0 
# ------ Stronger Legs? ----------------------   
if re.search("D|d", fit_goals):
    # stronger legs dict key --> value = 1 
    user_dict["Stronger Legs"] = 1
else:
    user_dict["Stronger Legs"] = 0
# ------ Improve Flexibility? ---------------------- 
if re.search("E|e", fit_goals):
    # improve flexibility dict key --> value = 1 
    user_dict["Improve Flexibility"] = 1
else:
    user_dict["Improve Flexibility"] = 0
if re.search("F|f", fit_goals):
    # improve endurance dict key --> value = 1 
    user_dict["Improve Endurance"] = 1
else:
    user_dict["Improve Endurance"] = 0

What are your fitness goals? 
 Please enter the letters of all that apply:

 A) Strengthen core 
 B) Upper body gains 
 C) Bigger booty

 D) Stronger legs 
 E) Improve flexibility 
 F) Improve endurance
 BC


In [14]:
user_dict

{'Strengthen Core': 0,
 'Upper Body Gains': 1,
 'Build Muscle Strength': 1,
 'Bigger Booty': 1,
 'Stronger Legs': 0,
 'Improve Flexibility': 0,
 'Improve Endurance': 0}

## Random Forest Classifier

In [30]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

In [31]:
#partition the data
feature_columns = ['Strengthen Core',
       'Upper Body Gains', 'Bigger Booty', 'Stronger Legs',
       'Build Muscle Strength', 'Daily Tasks', 'Water Sports',
       'Racing', 'Horse', 'Dance', 'Combat', 'Net', 'Ball or Puck',
       'Improve Endurance', 'Improve Flexibility', 'Improve Balance']
X   = df[feature_columns] #get the input features
y   = df['Exercise']              #get the target

X_train, X_test, y_train, y_test = train_test_split(X,              #the input features
                                                    y,              #the label
                                                    test_size=0.3,  #set aside 30% of the data as the test set
                                                    random_state=7, #reproduce the results
                                                   )

In [32]:
#build the classifier
rf = RandomForestClassifier(criterion='entropy', n_estimators = 50, random_state = 7)
rf.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=50, random_state=7)

In [33]:
#predict the labels for the test set
y_pred   = rf.predict(X_test)

# Prediction accuracy
print('The accuracy of the model is: {}'.format(rf.score(X_test, y_test)))

The accuracy of the model is: 0.0


In [34]:
#get the classification report for the decision tree
y_pred = rf.predict(X_test)
y_pred

array(['lever high row ', 'lunge with twist', 'lever high row ',
       'lever high row ', 'lever high row ',
       'dumbbell seated one leg calf raise - palm up', 'lever high row ',
       'lunge with twist', 'lever high row ',
       'self assisted inverse leg curl', 'lever high row ',
       'lever high row ', 'lever high row ', 'lunge with twist',
       'self assisted inverse leg curl', 'lever high row ',
       'lunge with twist', 'lunge with twist', 'lunge with twist',
       'lunge with twist', 'lever high row ', 'lever high row ',
       'lever high row ', 'lever high row ', 'lever high row ',
       'Whitewater rafting, kayaking, canoeing', 'lever high row ',
       'lever high row ', 'lever high row ', 'Soccer, competitive',
       'dumbbell seated one leg calf raise - palm up', 'lunge with twist',
       'Cycling, 12-13.9 mph, moderate', 'lever high row ',
       'lever high row ', 'lever high row ', 'lever high row ',
       'lever high row ', 'lever high row ',
       'S

## SVM Model 

In [20]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV

In [21]:
df.columns

Index(['Bodypart', 'Equipment', 'Exercise', 'Target', 'Gym', 'Strengthen Core',
       'Upper Body Gains', 'Bigger Booty', 'Stronger Legs',
       'Build Muscle Strength', 'Intensity', 'Daily Tasks', 'Water Sports',
       'Racing', 'Horse', 'Dance', 'Combat', 'Net', 'Ball or Puck',
       'Improve Endurance', 'Improve Flexibility', 'Improve Balance'],
      dtype='object')

In [22]:
#partition the data
X   = df[feature_columns] #get the input features
y   = df['Exercise']              #get the target

print(X)
print(y)

X_train, X_test, y_train, y_test = train_test_split(X,              #the input features
                                                    y,              #the label
                                                    test_size=0.3,  #set aside 30% of the data as the test set
                                                    random_state=7 #reproduce the results    
                                                   )

     Strengthen Core  Upper Body Gains  Bigger Booty  Stronger Legs  \
0                  1                 0             0              0   
1                  1                 0             0              0   
2                  1                 0             0              0   
3                  0                 0             1              1   
4                  1                 0             0              0   
..               ...               ...           ...            ...   
154                0                 0             0              0   
155                0                 0             0              0   
156                1                 1             0              1   
157                0                 0             1              1   
158                0                 1             1              1   

     Build Muscle Strength  Daily Tasks  Water Sports  Racing  Horse  Dance  \
0                        1          0.0           0.0     0.0    0.0

In [23]:
X_train = X_train.copy()
X_test  = X_test.copy()

In [24]:
X_train.head()

Unnamed: 0,Strengthen Core,Upper Body Gains,Bigger Booty,Stronger Legs,Build Muscle Strength,Daily Tasks,Water Sports,Racing,Horse,Dance,Combat,Net,Ball or Puck,Improve Endurance,Improve Flexibility,Improve Balance
983,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1026,0,0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
473,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
498,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
336,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
#normalize the numeric features
#scaler = StandardScaler()
#scaler.fit(X_train[feature_columns])

#X_train[feature_columns] = scaler.transform(X_train[feature_columns]) #scale the training data

In [26]:
# take a look at the normalized, prepared data
X_train.head(10)

Unnamed: 0,Strengthen Core,Upper Body Gains,Bigger Booty,Stronger Legs,Build Muscle Strength,Daily Tasks,Water Sports,Racing,Horse,Dance,Combat,Net,Ball or Puck,Improve Endurance,Improve Flexibility,Improve Balance
983,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1026,0,0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
473,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
498,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
336,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
232,0,0,1,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1050,0,0,1,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
157,0,0,1,1,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1064,0,0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
728,0,0,1,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
#initialize the classifier

svm = SVC() #the default kernel is rbf
svm.fit(X_train, y_train) #fit the data

SVC()

In [28]:
X_test

Unnamed: 0,Strengthen Core,Upper Body Gains,Bigger Booty,Stronger Legs,Build Muscle Strength,Daily Tasks,Water Sports,Racing,Horse,Dance,Combat,Net,Ball or Puck,Improve Endurance,Improve Flexibility,Improve Balance
432,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
58,1,0,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1020,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
677,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1118,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1311,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
642,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1063,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
602,0,1,0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
#encode & scale the new/test data
#X_test[feature_columns] = scaler.transform(X_test[feature_columns]) #scale the test data
#X_test['Exercise'] = encoder.transform(X_test[['Exercise']]) #encode the test data

# we want like a set of recommendations -- get the nearest neighbors basically 

#predict the labels for the test set
y_pred   = svm.predict(X_test)

print('The predicted exercise is: {}'.format(y_pred))

The predicted exercise is: ['wrist rollerer' 'wind sprints' 'wrist rollerer' 'wrist rollerer'
 'wrist rollerer' 'self assisted inverse leg curl' 'wrist rollerer'
 'wind sprints' 'wrist rollerer' 'self assisted inverse leg curl'
 'wrist rollerer' 'wrist rollerer' 'wrist rollerer' 'wind sprints'
 'self assisted inverse leg curl' 'wrist rollerer' 'wind sprints'
 'wind sprints' 'wind sprints' 'wind sprints' 'wrist rollerer'
 'wrist rollerer' 'wrist rollerer' 'wrist rollerer' 'wrist rollerer'
 'Whitewater rafting, kayaking, canoeing' 'wrist rollerer'
 'wrist rollerer' 'wrist rollerer' 'Soccer, competitive'
 'self assisted inverse leg curl' 'wind sprints' 'Walking 5.0 mph'
 'wrist rollerer' 'wrist rollerer' 'wrist rollerer' 'wrist rollerer'
 'wrist rollerer' 'wrist rollerer' 'Swimming synchronized' 'wind sprints'
 'wind sprints' 'wrist rollerer' 'wrist rollerer'
 'self assisted inverse leg curl' 'wrist rollerer' 'Softball, pitching'
 'wrist rollerer' 'wrist rollerer' 'wrist rollerer' 'wrist 