In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [6]:
dataset = pd.read_excel("gym_recommendation_dataset.xlsx")


In [7]:
dataset.columns


Index(['ID', 'Sex', 'Age', 'Height', 'Weight', 'Hypertension', 'Diabetes',
       'BMI', 'Level', 'Fitness Goal', 'Fitness Type', 'Exercises',
       'Equipment', 'Diet', 'Recommendation'],
      dtype='object')

In [8]:
dataset.drop(columns=["Fitness Type", "ID"], inplace=True)  # Drop the index column if it exists

In [9]:
dataset.columns

Index(['Sex', 'Age', 'Height', 'Weight', 'Hypertension', 'Diabetes', 'BMI',
       'Level', 'Fitness Goal', 'Exercises', 'Equipment', 'Diet',
       'Recommendation'],
      dtype='object')

In [10]:
dataset.head()


Unnamed: 0,Sex,Age,Height,Weight,Hypertension,Diabetes,BMI,Level,Fitness Goal,Exercises,Equipment,Diet,Recommendation
0,Male,18,1.68,47.5,No,No,16.83,Underweight,Weight Gain,"Squats, deadlifts, bench presses, and overhead...",Dumbbells and barbells,"Vegetables: (Carrots, Sweet Potato, and Lettuc...",Follow a regular exercise schedule. Adhere to ...
1,Male,18,1.68,47.5,Yes,No,16.83,Underweight,Weight Gain,"Squats, deadlifts, bench presses, and overhead...","Light athletic shoes, resistance bands, and li...","Vegetables: (Tomatoes, Garlic, leafy greens, b...",Follow a regular exercise schedule. Adhere to ...
2,Male,18,1.68,47.5,No,Yes,16.83,Underweight,Weight Gain,"Squats, yoga, deadlifts, bench presses, and ov...","Dumbbells, barbells and Blood glucose monitor","Vegetables: (Garlic, Roma Tomatoes, Capers and...",Follow a regular exercise schedule. Adhere to ...
3,Male,18,1.68,47.5,Yes,Yes,16.83,Underweight,Weight Gain,"Squats, yoga, deadlifts, bench presses, and ov...","Light athletic shoes, resistance bands, light ...","Vegetables: (Garlic, Roma Tomatoes, Capers, Gr...",Follow a regular exercise schedule. Adhere to ...
4,Male,18,1.68,47.5,No,No,16.83,Underweight,Weight Gain,"Squats, deadlifts, bench presses, and overhead...",Dumbbells and barbells,"Vegetables: (Carrots, Sweet Potato, Lettuce); ...",Follow a regular exercise schedule. Adhere to ...


We need to do scaling of numerical columns first

In [11]:
scaler = StandardScaler()
dataset[['Age', 'Height', 'Weight', 'BMI']] = scaler.fit_transform(dataset[['Age', 'Height', 'Weight', 'BMI']])
dataset.head()

Unnamed: 0,Sex,Age,Height,Weight,Hypertension,Diabetes,BMI,Level,Fitness Goal,Exercises,Equipment,Diet,Recommendation
0,Male,-1.63391,-0.202298,-1.14858,No,No,-1.121606,Underweight,Weight Gain,"Squats, deadlifts, bench presses, and overhead...",Dumbbells and barbells,"Vegetables: (Carrots, Sweet Potato, and Lettuc...",Follow a regular exercise schedule. Adhere to ...
1,Male,-1.63391,-0.202298,-1.14858,Yes,No,-1.121606,Underweight,Weight Gain,"Squats, deadlifts, bench presses, and overhead...","Light athletic shoes, resistance bands, and li...","Vegetables: (Tomatoes, Garlic, leafy greens, b...",Follow a regular exercise schedule. Adhere to ...
2,Male,-1.63391,-0.202298,-1.14858,No,Yes,-1.121606,Underweight,Weight Gain,"Squats, yoga, deadlifts, bench presses, and ov...","Dumbbells, barbells and Blood glucose monitor","Vegetables: (Garlic, Roma Tomatoes, Capers and...",Follow a regular exercise schedule. Adhere to ...
3,Male,-1.63391,-0.202298,-1.14858,Yes,Yes,-1.121606,Underweight,Weight Gain,"Squats, yoga, deadlifts, bench presses, and ov...","Light athletic shoes, resistance bands, light ...","Vegetables: (Garlic, Roma Tomatoes, Capers, Gr...",Follow a regular exercise schedule. Adhere to ...
4,Male,-1.63391,-0.202298,-1.14858,No,No,-1.121606,Underweight,Weight Gain,"Squats, deadlifts, bench presses, and overhead...",Dumbbells and barbells,"Vegetables: (Carrots, Sweet Potato, Lettuce); ...",Follow a regular exercise schedule. Adhere to ...


Now we need to encode the catergorical columns

In [12]:
label_encode = LabelEncoder()
for col in ['Sex', 'Hypertension', 'Diabetes', 'Level' ,'Fitness Goal']:
    dataset[col] = label_encode.fit_transform(dataset[col])

In [14]:
dataset.head()

Unnamed: 0,Sex,Age,Height,Weight,Hypertension,Diabetes,BMI,Level,Fitness Goal,Exercises,Equipment,Diet,Recommendation
0,1,-1.63391,-0.202298,-1.14858,0,0,-1.121606,3,0,"Squats, deadlifts, bench presses, and overhead...",Dumbbells and barbells,"Vegetables: (Carrots, Sweet Potato, and Lettuc...",Follow a regular exercise schedule. Adhere to ...
1,1,-1.63391,-0.202298,-1.14858,1,0,-1.121606,3,0,"Squats, deadlifts, bench presses, and overhead...","Light athletic shoes, resistance bands, and li...","Vegetables: (Tomatoes, Garlic, leafy greens, b...",Follow a regular exercise schedule. Adhere to ...
2,1,-1.63391,-0.202298,-1.14858,0,1,-1.121606,3,0,"Squats, yoga, deadlifts, bench presses, and ov...","Dumbbells, barbells and Blood glucose monitor","Vegetables: (Garlic, Roma Tomatoes, Capers and...",Follow a regular exercise schedule. Adhere to ...
3,1,-1.63391,-0.202298,-1.14858,1,1,-1.121606,3,0,"Squats, yoga, deadlifts, bench presses, and ov...","Light athletic shoes, resistance bands, light ...","Vegetables: (Garlic, Roma Tomatoes, Capers, Gr...",Follow a regular exercise schedule. Adhere to ...
4,1,-1.63391,-0.202298,-1.14858,0,0,-1.121606,3,0,"Squats, deadlifts, bench presses, and overhead...",Dumbbells and barbells,"Vegetables: (Carrots, Sweet Potato, Lettuce); ...",Follow a regular exercise schedule. Adhere to ...


In [15]:
dataset.to_csv('encoded_dataset.csv', index=False)


In [16]:
import joblib
joblib.dump(scaler, 'trained_scaler.pkl')


['trained_scaler.pkl']