In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import metrics
import pickle
import warnings
from warnings import filterwarnings
import tensorflow as tf
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import shap
from sklearn.preprocessing import StandardScaler

filterwarnings("ignore")
sns.set()

In [None]:
def load_data():
    df1 = pd.read_csv("calories.csv")
    df2 = pd.read_csv("exercise.csv")
    df = pd.concat([df2, df1["Calories"]], axis=1)
    df.drop(columns=["User_ID"], axis=1, inplace=True)
    return df

def process_features(df):
    # One-hot encoding
    categorical = pd.get_dummies(df["Gender"], drop_first=True)
    numerical = df.select_dtypes(include=np.number)
    return pd.concat([categorical, numerical], axis=1)

In [None]:
# Feature analysis visualizations
def create_visualizations(data):
    # Numerical distributions
    plt.figure(figsize=(20, 15))
    plotnumber = 1
    num_cols = data.columns[1:]  # Skip Male column

    for col in num_cols:
        if plotnumber <= 8:
            ax = plt.subplot(3, 3, plotnumber)
            sns.histplot(data[col], kde=True)
            plt.xlabel(col, fontsize=12)
            plotnumber += 1
    plt.tight_layout()
    plt.savefig('numerical_distributions.png')
    plt.close()

    # Correlation heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(data.corr(), cmap='Blues', annot=True)
    plt.title('Feature Correlation')
    plt.savefig('correlation_heatmap.png')
    plt.close()

In [None]:
def train_body_temp_model(X_train, X_test, y_train, y_test):
    def predict(ml_model, model_name):
        model = ml_model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        print(f'\n{model_name} Performance (Body Temp Prediction):')
        print(f'R2 Score: {metrics.r2_score(y_test, y_pred):.4f}')
        print(f'MAE: {metrics.mean_absolute_error(y_test, y_pred):.2f}')
        print(f'RMSE: {np.sqrt(metrics.mean_squared_error(y_test, y_pred)):.2f}')
        return model

    print("\n" + "="*50)
    print("Training Body Temperature Models")
    print("="*50)

    models = {
        'XGBRegressor': XGBRegressor(),
        'LinearRegression': LinearRegression(),
        'DecisionTree': DecisionTreeRegressor(),
        'RandomForest': RandomForestRegressor()
    }

    best_model = None
    best_score = -np.inf

    for name, model in models.items():
        current_model = predict(model, name)
        score = metrics.r2_score(y_test, current_model.predict(X_test))
        if score > best_score:
            best_score = score
            best_model = current_model

    return best_model


In [None]:
def train_calorie_model(X_train, X_test, y_train, y_test):
    def predict(ml_model, model_name):
        model = ml_model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        print(f'\n{model_name} Performance (Calorie Prediction):')
        print(f'R2 Score: {metrics.r2_score(y_test, y_pred):.4f}')
        print(f'MAE: {metrics.mean_absolute_error(y_test, y_pred):.2f}')
        print(f'RMSE: {np.sqrt(metrics.mean_squared_error(y_test, y_pred)):.2f}')
        return model

    print("\n" + "="*50)
    print("Training Calorie Models")
    print("="*50)

    models = {
        'XGBRegressor': XGBRegressor(),
        'LinearRegression': LinearRegression(),
        'DecisionTree': DecisionTreeRegressor(),
        'RandomForest': RandomForestRegressor()
    }

    best_model = None
    best_score = -np.inf

    for name, model in models.items():
        current_model = predict(model, name)
        score = metrics.r2_score(y_test, current_model.predict(X_test))
        if score > best_score:
            best_score = score
            best_model = current_model

    return best_model


In [None]:
# Neural Network Model Definitions
def build_body_temp_nn(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(input_shape,)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def build_calorie_nn(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(input_shape,)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

def train_nn_body_temp(X_train, X_val, y_train, y_val):
    model = build_body_temp_nn(X_train.shape[1])
    early_stop = tf.keras.callbacks.EarlyStopping(patience=10)
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,
        batch_size=32,
        callbacks=[early_stop],
        verbose=0
    )
    return model

def train_nn_calorie(X_train, X_val, y_train, y_val):
    model = build_calorie_nn(X_train.shape[1])
    early_stop = tf.keras.callbacks.EarlyStopping(patience=10)
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,
        batch_size=32,
        callbacks=[early_stop],
        verbose=0
    )
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig('training_history.png')
    plt.close()

    return model

In [None]:
def explain_with_shap(model, X_data, feature_names, model_type='traditional', title_suffix=""):
    print(f"\nGenerating SHAP explanations ({model_type})...")
    sample = X_data.sample(min(100, len(X_data)), random_state=1)

    try:
        sample_array = sample.values.astype(np.float32)

        if model_type == 'traditional':
            explainer = shap.TreeExplainer(model)
        elif model_type == 'neural_net':
            bg_sample_size = min(50, sample_array.shape[0])
            # Use permutation to avoid issues with np.random.choice
            indices = np.random.permutation(sample_array.shape[0])[:bg_sample_size]
            background = sample_array[indices]
            explainer = shap.DeepExplainer(model, background)

        shap_values = explainer.shap_values(sample_array)
        if isinstance(shap_values, list):
            shap_values = shap_values[0]

        plt.figure(figsize=(10, 6))
        shap.summary_plot(shap_values, sample_array, feature_names=feature_names, show=False)
        plt.savefig(f'shap_summary_{title_suffix}.png', bbox_inches='tight')
        plt.close()

        expected_value = explainer.expected_value
        if isinstance(expected_value, list):
            expected_value = expected_value[0]
        if isinstance(expected_value, np.ndarray) and expected_value.size == 1:
            expected_value = expected_value.item()

        force_instance = sample_array[0]
        plt.figure()
        shap.force_plot(expected_value, shap_values[0], force_instance,
                        feature_names=feature_names, matplotlib=True, show=False)
        plt.savefig(f'shap_force_{title_suffix}.png', bbox_inches='tight')
        plt.close()

    except Exception as e:
        print(f"SHAP Error: {str(e)}")

In [None]:
def train_models(X_body_train, X_body_val, y_body_train, y_body_val, y_cal_train, y_cal_val):
    """Train all models and return them"""
    # Train body temperature models
    body_temp_model = train_body_temp_model(X_body_train, X_body_val, y_body_train, y_body_val)
    body_temp_nn = train_nn_body_temp(X_body_train, X_body_val, y_body_train, y_body_val)

    # Generate body temperature predictions
    def generate_predictions(model, nn_model, X):
        return {
            'traditional': model.predict(X),
            'neural_net': nn_model.predict(X).flatten()
        }

    body_temp_train_preds = generate_predictions(body_temp_model, body_temp_nn, X_body_train)
    body_temp_val_preds = generate_predictions(body_temp_model, body_temp_nn, X_body_val)

    # Create calorie features
    def create_calorie_features(X, preds):
        return X.assign(
            Predicted_Body_Temp_Traditional=preds['traditional'],
            Predicted_Body_Temp_NN=preds['neural_net']
        )

    X_cal_train = create_calorie_features(X_body_train, body_temp_train_preds)
    X_cal_val = create_calorie_features(X_body_val, body_temp_val_preds)

    # Train calorie models
    calorie_model = train_calorie_model(
        X_cal_train.drop(columns=['Predicted_Body_Temp_NN']),
        X_cal_val.drop(columns=['Predicted_Body_Temp_NN']),
        y_cal_train,
        y_cal_val
    )
    calorie_nn = train_nn_calorie(X_cal_train, X_cal_val, y_cal_train, y_cal_val)

    return body_temp_model, body_temp_nn, calorie_model, calorie_nn

def test_models(models, X_body_test, y_body_test, X_cal_test_raw, y_cal_test):
    """Evaluate models and return test predictions"""
    body_temp_model, body_temp_nn, calorie_model, calorie_nn = models

    # Body temperature predictions
    body_temp_test_preds = {
        'traditional': body_temp_model.predict(X_body_test),
        'neural_net': body_temp_nn.predict(X_body_test).flatten()
    }

    # Create calorie features for test set
    X_cal_test = X_cal_test_raw.assign(
        Predicted_Body_Temp_Traditional=body_temp_test_preds['traditional'],
        Predicted_Body_Temp_NN=body_temp_test_preds['neural_net']
    )

    # Calorie predictions
    calorie_preds = {
        'traditional': calorie_model.predict(X_cal_test.drop(columns=['Predicted_Body_Temp_NN'])),
        'neural_net': calorie_nn.predict(X_cal_test).flatten()
    }

    return body_temp_test_preds, calorie_preds, X_cal_test

def generate_shap(models, X_body_sample, X_cal_sample):
    """Generate SHAP explanations for all models"""
    body_temp_model, body_temp_nn, calorie_model, calorie_nn = models

    # Body temperature explanations
    explain_with_shap(
        body_temp_model,
        X_body_sample,
        feature_names=X_body_sample.columns.tolist(),
        model_type='traditional',
        title_suffix="Body_Temp_Traditional"
    )
    explain_with_shap(
        body_temp_nn,
        X_body_sample,
        feature_names=X_body_sample.columns.tolist(),
        model_type='neural_net',
        title_suffix="Body_Temp_NeuralNet"
    )

    # Calorie explanations
    explain_with_shap(
        calorie_model,
        X_cal_sample.drop(columns=['Predicted_Body_Temp_NN']),
        feature_names=X_cal_sample.drop(columns=['Predicted_Body_Temp_NN']).columns.tolist(),
        model_type='traditional',
        title_suffix="Calorie_Traditional"
    )
    explain_with_shap(
        calorie_nn,
        X_cal_sample,
        feature_names=X_cal_sample.columns.tolist(),
        model_type='neural_net',
        title_suffix="Calorie_NeuralNet"
    )


In [None]:
def plot_predictions_heatmap(y_true, y_pred, model_label, task):
    """
    Creates and saves a heatmap comparing the true and predicted values.
    """
    heatmap_data, xedges, yedges = np.histogram2d(y_true, y_pred, bins=30)
    plt.figure(figsize=(8, 6))
    sns.heatmap(heatmap_data.T, cmap="coolwarm", annot=False)
    plt.xlabel(f"True {task}")
    plt.ylabel(f"Predicted {task}")
    plt.title(f"Heatmap of {task} Predictions ({model_label})")
    plt.savefig(f"heatmap_{task}_{model_label}.png", bbox_inches='tight')
    plt.close()

def plot_feature_importance(model, feature_names, model_label, task):
    """
    Plots and saves a bar plot of feature importances or coefficients (if available).
    """
    try:
        if hasattr(model, 'feature_importances_'):
            importances = model.feature_importances_
        elif hasattr(model, 'coef_'):
            importances = model.coef_
        else:
            print(f"No feature importance available for {model_label}")
            return
        fi_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
        fi_df.sort_values(by='Importance', ascending=False, inplace=True)
        plt.figure(figsize=(10, 6))
        sns.barplot(x='Importance', y='Feature', data=fi_df)
        plt.title(f'Feature Importance for {model_label} ({task})')
        plt.tight_layout()
        plt.savefig(f'feature_importance_{task}_{model_label}.png', bbox_inches='tight')
        plt.close()
    except Exception as e:
        print(f"Error plotting feature importance for {model_label}: {e}")

def print_metrics(y_true, y_pred):
    """Helper function to print metrics"""
    print(f"R2 Score: {metrics.r2_score(y_true, y_pred):.4f}")
    print(f"MAE: {metrics.mean_absolute_error(y_true, y_pred):.2f}")
    print(f"RMSE: {np.sqrt(metrics.mean_squared_error(y_true, y_pred)):.2f}")

In [None]:
# To train and test
def main():
    # Data processing
    df = load_data()
    processed_data = process_features(df)

    create_visualizations(processed_data)

    # Split data
    temp_data, final_test_data = train_test_split(processed_data, test_size=0.2, random_state=1)
    X_body = temp_data.drop(columns=['Calories', 'Body_Temp'])
    y_body = temp_data['Body_Temp']

    # Train/val split
    X_body_train, X_body_val, y_body_train, y_body_val = train_test_split(
        X_body, y_body, test_size=0.25, random_state=1)

    # Train models
    models = train_models(
        X_body_train, X_body_val,
        y_body_train, y_body_val,
        temp_data.loc[X_body_train.index]['Calories'],
        temp_data.loc[X_body_val.index]['Calories']
    )

    # Prepare test data
    X_body_test = final_test_data.drop(columns=['Calories', 'Body_Temp'])
    y_body_test = final_test_data['Body_Temp']
    y_cal_test = final_test_data['Calories']

    # Test models
    body_temp_preds, calorie_preds, X_cal_test = test_models(
        models, X_body_test, y_body_test,
        final_test_data.drop(columns=['Calories', 'Body_Temp']),
        y_cal_test
    )

    # Print results
    print("\n" + "="*50)
    print("Final Test Set Evaluation")
    print("="*50)

    # Body temperature metrics
    for model_type in ['traditional', 'neural_net']:
        print(f"\nBody Temperature ({model_type}):")
        print_metrics(y_body_test, body_temp_preds[model_type])

    # Calorie metrics
    for model_type in ['traditional', 'neural_net']:
        print(f"\nCalorie Prediction ({model_type}):")
        print_metrics(y_cal_test, calorie_preds[model_type])

    # Save models
    with open('body_temp_model.pkl', 'wb') as f:
        pickle.dump(models[0], f)
    with open('calorie_model.pkl', 'wb') as f:
        pickle.dump(models[2], f)
    models[1].save('body_temp_nn.h5')
    models[3].save('calorie_nn.h5')

    # SHAP explanations
    print("\n" + "="*50)
    print("Generating SHAP Explanations")
    print("="*50)
    generate_shap(
        models,
        X_body_test.sample(100, random_state=1),
        X_cal_test.sample(100, random_state=1)
    )

def print_metrics(y_true, y_pred):
    """Helper function to print metrics"""
    print(f"R2 Score: {metrics.r2_score(y_true, y_pred):.4f}")
    print(f"MAE: {metrics.mean_absolute_error(y_true, y_pred):.2f}")
    print(f"RMSE: {np.sqrt(metrics.mean_squared_error(y_true, y_pred)):.2f}")

if __name__ == "__main__":
    main()