In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pickle

# --- Configuration ---
DATASET_PATH = "gym_recommendation.csv" # Make sure this file is in the same directory
MODEL_EXPORT_PATH = "fitness_coach_agent.pkl"

# --- 1. Load Data ---
df = pd.read_excel("gym recommendation.xlsx")
df.to_csv("gym_recommendation.csv", index=False) 


print("Dataset loaded successfully. Shape:", df.shape)
print("Columns:", df.columns.tolist())
print("\nFirst 5 rows:")
print(df.head())
print("\nInfo:")
df.info()

# --- 2. Data Preprocessing and Feature Engineering ---
print("\nPreprocessing data...")

# Standardize column names (e.g., remove leading/trailing spaces, convert to lowercase)
df.columns = df.columns.str.strip().str.lower()
print("Standardized columns:", df.columns.tolist())

# Check for missing values
print("\nMissing values before handling:")
print(df.isnull().sum())

# For simplicity, we'll fill missing categorical with mode and numerical with median
# A more robust approach might involve imputation or investigation
for col in ['sex', 'hypertension', 'diabetes', 'level', 'fitness goal', 'fitness type', 'exercises', 'equipment', 'diet', 'recommendation']:
    if col in df.columns and df[col].isnull().any():
        df[col] = df[col].fillna(df[col].mode()[0])

for col in ['age', 'height', 'weight', 'bmi']:
    if col in df.columns and df[col].isnull().any():
        df[col] = df[col].fillna(df[col].median())

print("\nMissing values after handling:")
print(df.isnull().sum())


# Feature Engineering: Ensure BMI and Level are consistent
# Calculate BMI if not present or to ensure consistency: Weight (kg) / (Height (m))^2
if 'height' in df.columns and 'weight' in df.columns:
    # Assuming height might be in cm for some datasets, let's standardize to meters if it seems so
    # For this dataset, 'Height' is already in meters based on description.
    df['calculated_bmi'] = df['weight'] / (df['height'] ** 2)
    # If 'bmi' column exists, we can compare or prefer the calculated one
    if 'bmi' in df.columns:
        # Let's use our calculated_bmi for consistency
        df['bmi'] = df['calculated_bmi']
    else:
        df['bmi'] = df['calculated_bmi']
    df.drop('calculated_bmi', axis=1, inplace=True)


# Define BMI Levels - this ensures consistency if 'level' column is problematic
def get_bmi_level(bmi):
    if bmi < 18.5:
        return 'Underweight'
    elif 18.5 <= bmi < 24.9:
        return 'Normal weight'
    elif 25 <= bmi < 29.9:
        return 'Overweight'
    else: # bmi >= 30
        return 'Obese'

if 'bmi' in df.columns:
    df['calculated_level'] = df['bmi'].apply(get_bmi_level)
    # If 'level' column exists, we can compare or prefer the calculated one
    if 'level' in df.columns:
        df['level'] = df['calculated_level'] # Prefer consistent calculated level
    else:
        df['level'] = df['calculated_level']
    df.drop('calculated_level', axis=1, inplace=True)


print("\nSample data after preprocessing:")
print(df[['age', 'sex', 'height', 'weight', 'bmi', 'level', 'fitness goal']].head())

# --- 3. Define the Fitness Coach Agent (CBR/k-NN like approach) ---
class FitnessCoachAgent:
    def __init__(self, dataset):
        self.dataset = dataset.copy()
        self.feature_cols = ['age', 'bmi', 'sex', 'fitness goal', 'hypertension', 'diabetes']
        self.target_cols = ['exercises', 'equipment', 'diet', 'fitness type', 'recommendation']
        
        # Prepare for similarity calculation: Scale numerical features
        self.scaler_age = MinMaxScaler()
        self.scaler_bmi = MinMaxScaler()
        
        self.dataset['age_scaled'] = self.scaler_age.fit_transform(self.dataset[['age']])
        self.dataset['bmi_scaled'] = self.scaler_bmi.fit_transform(self.dataset[['bmi']])

    def _calculate_similarity(self, user_profile, case):
        """
        Calculates a similarity score between a user and a case from the dataset.
        Higher score means more similar. Max score defined by number of matching categoricals + 2 (for numericals).
        """
        score = 0
        
        # Numerical features: age, bmi (use scaled versions)
        # Similarity for numerical is 1 - normalized_difference
        age_diff = abs(user_profile['age_scaled'] - case['age_scaled'])
        score += (1 - age_diff) 
        
        bmi_diff = abs(user_profile['bmi_scaled'] - case['bmi_scaled'])
        score += (1 - bmi_diff)
        
        # Categorical features: sex, fitness goal, hypertension, diabetes
        # Higher weight for fitness goal and sex as they are primary drivers
        if user_profile['sex'] == case['sex']:
            score += 1.5 # Weight for sex
        if user_profile['fitness goal'] == case['fitness goal']:
            score += 2.0 # Higher weight for fitness goal
        if user_profile['hypertension'] == case['hypertension']:
            score += 0.5
        if user_profile['diabetes'] == case['diabetes']:
            score += 0.5
            
        return score

    def recommend(self, age, weight, height, sex, fitness_goal, hypertension, diabetes):
        # 1. Preprocess user input
        user_bmi = weight / (height ** 2)
        
        user_profile = {
            'age': age,
            'bmi': user_bmi,
            'sex': sex,
            'fitness goal': fitness_goal,
            'hypertension': hypertension, # 'Yes' or 'No'
            'diabetes': diabetes         # 'Yes' or 'No'
        }
        
        # Scale user's numerical features using the fitted scalers
        user_profile['age_scaled'] = self.scaler_age.transform(np.array([[age]]))[0,0]
        user_profile['bmi_scaled'] = self.scaler_bmi.transform(np.array([[user_bmi]]))[0,0]
        
        # 2. Find the most similar case
        best_match_index = -1
        highest_similarity_score = -1

        for i, row in self.dataset.iterrows():
            similarity = self._calculate_similarity(user_profile, row)
            if similarity > highest_similarity_score:
                highest_similarity_score = similarity
                best_match_index = i
        
        if best_match_index != -1:
            best_case = self.dataset.iloc[best_match_index]
            recommendation = {
                'matched_user_profile': best_case[self.feature_cols + ['level']].to_dict(),
                'similarity_score': highest_similarity_score,
                'workout_plan': {
                    'fitness_type': best_case['fitness type'],
                    'exercises': best_case['exercises'],
                    'equipment': best_case['equipment']
                },
                'diet_plan': best_case['diet'],
                'general_recommendation': best_case['recommendation']
            }
            return recommendation
        else:
            return {"error": "Could not find a suitable match."}

    def get_unique_values(self):
        """Helper to get unique values for dropdowns in UI"""
        return {
            'sex': self.dataset['sex'].unique().tolist(),
            'fitness goal': self.dataset['fitness goal'].unique().tolist(),
            'hypertension': self.dataset['hypertension'].unique().tolist(),
            'diabetes': self.dataset['diabetes'].unique().tolist()
        }

# --- 4. Instantiate and Test the Agent ---
print("\nInstantiating and testing the agent...")
agent = FitnessCoachAgent(df)

# Test with a sample user
sample_user_inputs = {
    "age": 30,
    "weight": 70,  # kg
    "height": 1.75, # meters
    "sex": "Male", # From dataset: 'Male', 'Female'
    "fitness_goal": "Weight Loss", # From dataset: e.g., 'Weight Loss', 'Muscle Gain', etc.
    "hypertension": "No", # 'Yes', 'No'
    "diabetes": "No" # 'Yes', 'No'
}

# Check if test values are valid based on dataset unique values
unique_vals = agent.get_unique_values()
if sample_user_inputs["sex"] not in unique_vals["sex"]:
    print(f"Warning: Sample sex '{sample_user_inputs['sex']}' not in dataset options: {unique_vals['sex']}. Using first option.")
    sample_user_inputs["sex"] = unique_vals["sex"][0]
if sample_user_inputs["fitness_goal"] not in unique_vals["fitness goal"]:
    print(f"Warning: Sample fitness goal '{sample_user_inputs['fitness_goal']}' not in dataset options: {unique_vals['fitness goal']}. Using first option.")
    sample_user_inputs["fitness_goal"] = unique_vals["fitness goal"][0]


recommendation = agent.recommend(**sample_user_inputs)

print("\nRecommendation for sample user:")
import json
print(json.dumps(recommendation, indent=2))

print("\nUnique values for UI dropdowns:")
print(agent.get_unique_values())

# --- 5. Export the Agent ---
print(f"\nExporting the agent to {MODEL_EXPORT_PATH}...")
with open(MODEL_EXPORT_PATH, 'wb') as f:
    pickle.dump(agent, f)
print("Agent exported successfully.")

ImportError: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.