In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import RFE
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pickle

In [2]:
def data():
    df = pd.read_csv("gym_members_exercise_tracking.csv")
    df["Gender"] = df["Gender"].map({"Male": 1, "Female": 2})
    df["Workout_Type"] = df["Workout_Type"].map({"Strength": 1, "Cardio": 2, "Yoga": 3, "HIIT": 4})

    def outlier(df, column):
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        df_cleaned = df[(df[column] > Q1 - 1.5 * IQR) & (df[column] < Q3 + 1.5 * IQR)]
        return df_cleaned

    df = outlier(df, "Weight (kg)")
    df = outlier(df, "Calories_Burned")
    df = outlier(df, "BMI")

    return df


In [3]:
def create_model():
    df = data()  # Assuming data() is a function that returns the cleaned DataFrame
    
    # Define features and target
    X = df[['Age', 'Gender', 'Avg_BPM', 'Session_Duration (hours)', 'Fat_Percentage']]
    y = df["Calories_Burned"]
    
    # Separate scalers for features and target
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    
    # Scale the features
    X_scaled = scaler_X.fit_transform(X)
    
    # Reshape and scale the target (y)
    y_np = np.array(y).reshape(-1, 1)  # Convert target to 2D array
    y_scaled = scaler_y.fit_transform(y_np)
    
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.20, random_state=42)
    
    # Initialize and train the RandomForest model
    model = RandomForestRegressor()
    model.fit(X_train, y_train.ravel())  # Train the model
    
    # Make predictions on the test set
    y_pred = model.predict(X_test)
    
    # Inverse transform predictions and true values to get back to original scale
    y_pred_original = scaler_y.inverse_transform(y_pred.reshape(-1, 1))
    y_test_original = scaler_y.inverse_transform(y_test)
    
    # Calculate Mean Squared Error (MSE) for performance evaluation
    mse = mean_squared_error(y_test_original, y_pred_original)
    print(f"Mean Squared Error: {mse:.2f}")
    
    # Return the scalers and the trained model
    return scaler_X, scaler_y, model



In [5]:
def main():
    df=data()
    scaler_X, scaler_y, model=create_model()
    # Save feature scaler
    with open("scaler_X.pkl", "wb") as f_X:
        pickle.dump(scaler_X, f_X)
    
    # Save target scaler
    with open("scaler_y.pkl", "wb") as f_y:
        pickle.dump(scaler_y, f_y)
    
    # Save the trained model
    with open("model.pkl", "wb") as f_model:
        pickle.dump(model, f_model)
if __name__=="__main__":
    main()


Mean Squared Error: 1000.23
