In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score
import joblib

# Load dataset
df = pd.read_excel('/Users/rohitnagula/Downloads/ninja1234_height_updated_quantities.xlsx')

# Define feature and target columns
features = ['Sex', 'Age', 'Height', 'Weight', 'Hypertension', 'Diabetes', 'BMI', 'Level', 'Fitness Goal', 'Fitness Type', 'Diet Type']
target_columns = {
    'Exercises': ('Exercises', 'categorical'),
    'Equipment': ('Equipment', 'categorical'),
    'Recommendation': ('Recommendation', 'categorical'),
    'Weeks to Reach Goal': ('Weeks to Reach Goal', 'numerical'),
    'Total Protein Intake': ('Total Protein Intake (grams)', 'numerical'),
    'BMR': ('BMR', 'numerical'),
    'Total Calorie Intake': ('Total Calorie Intake', 'numerical'),
    'Breakfast': ('Breakfast', 'categorical'),
    'Lunch': ('Lunch', 'categorical'),
    'Snacks': ('Evening Snacks', 'categorical'),
    'Dinner': ('Dinner', 'categorical')
}

# Preprocessing function
def get_preprocessor():
    categorical_features = ['Sex', 'Hypertension', 'Diabetes', 'Level', 'Fitness Goal', 'Fitness Type', 'Diet Type']
    numeric_features = ['Age', 'Height', 'Weight', 'BMI']

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='mean')),
                ('scaler', StandardScaler())
            ]), numeric_features),
            ('cat', Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
                ('onehot', OneHotEncoder(handle_unknown='ignore'))
            ]), categorical_features)
        ])
    return preprocessor

# Prepare data
def prepare_data(df, target):
    X = df[features]
    y = df[target]
    return train_test_split(X, y, test_size=0.2, random_state=42)

# Train and evaluate model
def train_and_evaluate(X_train, X_test, y_train, y_test, model_type):
    preprocessor = get_preprocessor()
    
    if model_type == 'numerical':
        model = RandomForestRegressor(n_estimators=100, random_state=42)
    else:  # Categorical targets use Naïve Bayes
        model = GaussianNB()
    
    model_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', model)
    ])

    model_pipeline.fit(X_train, y_train)
    y_pred = model_pipeline.predict(X_test)

    if model_type == 'numerical':
        return model_pipeline, {
            'MAE': mean_absolute_error(y_test, y_pred),
            'MSE': mean_squared_error(y_test, y_pred),
            'R²': r2_score(y_test, y_pred)
        }
    else:
        return model_pipeline, {'Accuracy': accuracy_score(y_test, y_pred)}

# Train all models
def train_all_models(df):
    models = {}
    accuracies = {}
    
    for target_name, (target_column, model_type) in target_columns.items():
        try:
            X_train, X_test, y_train, y_test = prepare_data(df, target_column)
            model, accuracy = train_and_evaluate(X_train, X_test, y_train, y_test, model_type)
            joblib.dump(model, f'model_{target_name}.pkl')
            models[target_name] = model
            accuracies[target_name] = accuracy
            print(f"Model for {target_name} - Metrics: {accuracy}")
        except Exception as e:
            print(f"Error for target '{target_name}': {e}")
    return models, accuracies

# Train models
models, accuracies = train_all_models(df)

# Define a function to make recommendations
def recommend(user_input):
    user_input_df = pd.DataFrame([user_input], columns=features)
    results = {}
    for target_name, (_, model_type) in target_columns.items():
        try:
            model = joblib.load(f'model_{target_name}.pkl')
            results[target_name] = model.predict(user_input_df)[0]
        except Exception as e:
            results[target_name] = f"Error: {e}"
    return results


Model for Exercises - Metrics: {'Accuracy': 0.9969156956819739}
Model for Equipment - Metrics: {'Accuracy': 0.46504455106237147}
Model for Recommendation - Metrics: {'Accuracy': 0.5332419465387251}
Model for Weeks to Reach Goal - Metrics: {'MAE': 0.0, 'MSE': 0.0, 'R²': 1.0}
Model for Total Protein Intake - Metrics: {'MAE': 0.0005260452364706301, 'MSE': 0.00015720313570939331, 'R²': 0.9999996966793514}
Model for BMR - Metrics: {'MAE': 0.12425426233722182, 'MSE': 0.25874147734696323, 'R²': 0.9999951297236711}
Model for Total Calorie Intake - Metrics: {'MAE': 0.21324555078444743, 'MSE': 0.782501438861629, 'R²': 0.9999950501219965}
Model for Breakfast - Metrics: {'Accuracy': 1.0}
Model for Lunch - Metrics: {'Accuracy': 1.0}
Model for Snacks - Metrics: {'Accuracy': 1.0}
Model for Dinner - Metrics: {'Accuracy': 1.0}
