In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib
import os

# Create 'model' directory if it doesn't exist
os.makedirs("model", exist_ok=True)

# Load datasets
try:
    diet_df = pd.read_csv("../data/diet_recommendations_dataset.csv")
    print(f"Diet dataset loaded successfully with {len(diet_df)} rows")
except Exception as e:
    print(f"Error loading diet dataset: {e}")
    diet_df = None

try:
    gym_df = pd.read_excel("../data/gym recommendation.xlsx")
    print(f"Gym dataset loaded successfully with {len(gym_df)} rows")
except Exception as e:
    print(f"Error loading gym dataset: {e}")
    gym_df = None

# Train diet recommendation model
if diet_df is not None:
    try:
        diet_features = ['Age', 'Gender', 'Weight_kg', 'Height_cm', 'BMI',
                         'Disease_Type', 'Severity', 'Physical_Activity_Level',
                         'Dietary_Restrictions', 'Allergies', 'Preferred_Cuisine']
        
        # Drop rows with missing target values
        diet_df = diet_df.dropna(subset=['Diet_Recommendation'])
        print(f"Diet dataset after dropping NAs: {len(diet_df)} rows")
        
        # Convert categorical features to one-hot encoding
        X_diet = pd.get_dummies(diet_df[diet_features])
        
        # Transform target variable
        diet_label_encoder = LabelEncoder()
        y_diet = diet_label_encoder.fit_transform(diet_df['Diet_Recommendation'])
        
        # Train model
        diet_model = RandomForestClassifier(random_state=42)
        diet_model.fit(X_diet, y_diet)
        print("Diet model trained successfully")
        
        # Save model
        joblib.dump((diet_model, diet_label_encoder, list(X_diet.columns)), 'model/diet_model.pkl')
        print("Diet model saved successfully")
        
        # Verify the saved model
        loaded_model = joblib.load('model/diet_model.pkl')
        print(f"Loaded diet model type: {type(loaded_model)}")
        if isinstance(loaded_model, tuple):
            print(f"Diet model components: {[type(comp) for comp in loaded_model]}")
    except Exception as e:
        print(f"Error training diet model: {e}")

# Train gym recommendation model
if gym_df is not None:
    try:
        gym_features = ['Sex', 'Age', 'Height', 'Weight', 'Hypertension',
                        'Diabetes', 'BMI', 'Fitness Goal']
        
        # Drop rows with missing target values
        gym_df = gym_df.dropna(subset=['Recommendation'])
        print(f"Gym dataset after dropping NAs: {len(gym_df)} rows")
        
        # Convert categorical features to one-hot encoding
        X_gym = pd.get_dummies(gym_df[gym_features])
        
        # Transform target variable
        gym_label_encoder = LabelEncoder()
        y_gym = gym_label_encoder.fit_transform(gym_df['Recommendation'])
        
        # Train model
        gym_model = RandomForestClassifier(random_state=42)
        gym_model.fit(X_gym, y_gym)
        print("Gym model trained successfully")
        
        # Save model
        joblib.dump((gym_model, gym_label_encoder, list(X_gym.columns)), 'model/gym_model.pkl')
        print("Gym model saved successfully")
        
        # Verify the saved model
        loaded_model = joblib.load('model/gym_model.pkl')
        print(f"Loaded gym model type: {type(loaded_model)}")
        if isinstance(loaded_model, tuple):
            print(f"Gym model components: {[type(comp) for comp in loaded_model]}")
    except Exception as e:
        print(f"Error training gym model: {e}")

print("Training and saving models completed!")



Diet dataset loaded successfully with 1000 rows
Gym dataset loaded successfully with 14589 rows
Diet dataset after dropping NAs: 1000 rows
Diet model trained successfully
Diet model saved successfully
Loaded diet model type: <class 'tuple'>
Diet model components: [<class 'sklearn.ensemble._forest.RandomForestClassifier'>, <class 'sklearn.preprocessing._label.LabelEncoder'>, <class 'list'>]
Gym dataset after dropping NAs: 14589 rows
Gym model trained successfully
Gym model saved successfully
Loaded gym model type: <class 'tuple'>
Gym model components: [<class 'sklearn.ensemble._forest.RandomForestClassifier'>, <class 'sklearn.preprocessing._label.LabelEncoder'>, <class 'list'>]
Training and saving models completed!
