In [7]:
# run this first to load the original data files -- must run at beginning of each Kaggle session

import pandas as pd

# using the 4.12.16-5.12.16 database for simplicity in data wrangling
daily_activity_path = "/kaggle/input/fitbit/mturkfitbit_export_4.12.16-5.12.16/Fitabase Data 4.12.16-5.12.16/dailyActivity_merged.csv"
sleep_data_path = "/kaggle/input/fitbit/mturkfitbit_export_4.12.16-5.12.16/Fitabase Data 4.12.16-5.12.16/sleepDay_merged.csv"
weight_data_path = "/kaggle/input/fitbit/mturkfitbit_export_4.12.16-5.12.16/Fitabase Data 4.12.16-5.12.16/weightLogInfo_merged.csv"

daily_activity = pd.read_csv(daily_activity_path)
sleep_data = pd.read_csv(sleep_data_path)
weight_data = pd.read_csv(weight_data_path)
processed_data = daily_activity.merge(sleep_data, how='outer', on='Id')
processed_data = processed_data.merge(weight_data, how='outer', on='Id')

# saved data to csv before running models
processed_data.to_csv("/kaggle/working/processed_data.csv", index=False)

In [8]:
# user inputs their personal metrics HERE
very_active_minutes = 50
fairly_active_minutes = 60
lightly_active_minutes = 120
sedentary_minutes = 760
total_steps = 12000
calories = 2400
total_minutes_asleep = 450
weight_kg = 73

In [9]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/kaggle-python

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import warnings
import joblib

data_path = "/kaggle/working/processed_data.csv"
data = pd.read_csv(data_path)
data.to_csv("/kaggle/working/personalized_insights.csv", index=False)

def train_regression_model(features, target, n=0.2):
    # function to train regression model that predicts a target based on provided features
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=n, random_state=42)
    model = RandomForestRegressor(random_state=42)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    return model, mse, predictions

def cluster_users(features, n=4):
    # function performing k-means clustering based on the selected features
    kmeans = KMeans(n_clusters=n, random_state=42, n_init=10)
    clusters = kmeans.fit_predict(features)
    return kmeans, clusters

# predicting optimal steps & active minutes
steps_features = data[["VeryActiveMinutes", "FairlyActiveMinutes", "LightlyActiveMinutes"]]
steps_target = data["TotalSteps"]
steps_model, steps_mse, steps_predictions = train_regression_model(steps_features, steps_target)
# print(f"Steps Prediction Model MSE: {steps_mse:.2f}")

# suggesting sleep improvements
data["TotalMinutesAsleep"] = data["TotalMinutesAsleep"].fillna(data["TotalMinutesAsleep"].mean())
# chose to replace NaN with the mean value of total minutes sleep to account for outliers
sleep_features = data[["SedentaryMinutes", "Calories", "VeryActiveMinutes"]]
sleep_target = data["TotalMinutesAsleep"]
sleep_model, sleep_mse, sleep_predictions = train_regression_model(sleep_features, sleep_target)
# print(f"Sleep Prediction Model MSE: {sleep_mse:.2f}")

# personalizing caloric targets
calories_features = data[["TotalSteps", "VeryActiveMinutes", "LightlyActiveMinutes"]]
calories_target = data["Calories"]
calories_model, calories_mse, calories_predictions = train_regression_model(calories_features, calories_target)
# print(f"Calories Prediction Model MSE: {calories_mse:.2f}")

# recommending actionable steps for target weight/BMI
data = data.dropna(subset=["WeightKg"])
# chose to drop null weight values due to a small number of null values in the data
weight_features = data[["TotalSteps", "Calories", "VeryActiveMinutes"]]
weight_target = data["WeightKg"]
weight_model, weight_mse, weight_predictions = train_regression_model(weight_features, weight_target)
# print(f"Weight Prediction Model MSE: {weight_mse:.2f}")

cluster_features = data[["SedentaryMinutes", "VeryActiveMinutes", "LightlyActiveMinutes"]]
kmeans_model, user_clusters = cluster_users(cluster_features)
data["UserCluster"] = user_clusters
# print(f"Cluster Centers:\n{kmeans_model.cluster_centers_}")

# visualize clustering
# plt.scatter(data["SedentaryMinutes"], data["LightlyActiveMinutes"], c=data["UserCluster"], cmap="viridis")
# plt.colorbar(label="Cluster")
# plt.xlabel("Sedentary Minutes")
# plt.ylabel("Lightly Active Minutes")
# plt.title("User Clusters")
# plt.show()

# data.to_csv("/kaggle/working/personalized_insights.csv", index=False)

# print("Personalized insights saved to /kaggle/working/personalized_insights.csv")

joblib.dump(steps_model, '/kaggle/working/steps_model.pkl')
joblib.dump(sleep_model, '/kaggle/working/sleep_model.pkl')
joblib.dump(calories_model, '/kaggle/working/calories_model.pkl')
joblib.dump(weight_model, '/kaggle/working/weight_model.pkl')

steps_model = joblib.load('/kaggle/working/steps_model.pkl')
sleep_model = joblib.load('/kaggle/working/sleep_model.pkl')
calories_model = joblib.load('/kaggle/working/calories_model.pkl')
weight_model = joblib.load('/kaggle/working/weight_model.pkl')

def generate_recommendations(user_data):
    # function to generate the recommendations for the user
    # takes in the user data (dictionary with user inputs of metrics like active minutes, steps, etc.)
    # returns dictionary with predictions & recommendations

    # recommendation for steps -- based on active minutes
    user_features_steps = np.array([[
        user_data['very_active_minutes'],
        user_data['fairly_active_minutes'],
        user_data['lightly_active_minutes']
    ]])

    # recommendation for sleep time -- based on active minutes and caloric intake
    user_features_sleep = np.array([[
        user_data['sedentary_minutes'],
        user_data['calories'],
        user_data['very_active_minutes']
    ]])

    # recommendation for caloric intake -- based on steps and active minutes
    user_features_calories = np.array([[
        user_data['total_steps'],
        user_data['very_active_minutes'],
        user_data['lightly_active_minutes']
    ]])

    # recommendation for target weight change -- based on steps, calories, active minutes
    user_features_weight = np.array([[
        user_data['total_steps'],
        user_data['calories'],
        user_data['very_active_minutes']
    ]])

    # 12.2.24 revised predicted steps so the target step count is never lower than the users' step count
    predicted_steps = max(steps_model.predict(user_features_steps)[0], user_data['total_steps'])
    
    predicted_steps = round(predicted_steps)  # rounded to the nearest whole number
    predicted_sleep = round(sleep_model.predict(user_features_sleep)[0])  # rounded to whole number
    predicted_calories = round(calories_model.predict(user_features_calories)[0])  # rounded to whole number
    predicted_weight = round(weight_model.predict(user_features_weight)[0], 1)  # rounded to first decimal

    # empty dictionary to ultimately include the recommendations
    recommendations = {}

    # steps
    if user_data['total_steps'] < predicted_steps:
        recommendations['steps'] = f"Increase daily steps by {int(predicted_steps - user_data['total_steps'])} to meet optimal levels."
    else:
        recommendations['steps'] = "Your step count is on target!"

    # sleep 
    if user_data['total_minutes_asleep'] < predicted_sleep:
        recommendations['sleep'] = f"Increase sleep duration by {int(predicted_sleep - user_data['total_minutes_asleep'])} minutes for better recovery."
    else:
        recommendations['sleep'] = "Your sleep duration is sufficient."

    # caloric intake
    if user_data['calories'] > predicted_calories:
        recommendations['calories'] = f"Reduce daily calorie intake by {int(user_data['calories'] - predicted_calories)} to maintain balance."
    else:
        recommendations['calories'] = "Your calorie intake aligns with your activity levels."

    # weight
    if abs(user_data['weight_kg'] - predicted_weight) > 1:
        recommendations['weight'] = f"Adjust lifestyle to achieve a weight closer to {predicted_weight:.1f} kg."
    else:
        recommendations['weight'] = "Your weight is within a healthy range."

    return {
        "predicted_steps": predicted_steps,
        "predicted_sleep": predicted_sleep,
        "predicted_calories": predicted_calories,
        "predicted_weight": predicted_weight,
        "recommendations": recommendations
    }

# user's inputted personal metrics
user_input = {
    'very_active_minutes': very_active_minutes,
    'fairly_active_minutes': fairly_active_minutes,
    'lightly_active_minutes': lightly_active_minutes,
    'sedentary_minutes': sedentary_minutes,
    'total_steps': total_steps,
    'total_minutes_asleep': total_minutes_asleep,
    'calories': calories,
    'weight_kg': weight_kg
}

# added to remove unnecessary warning that would appear after the correct result
import warnings
warnings.filterwarnings("ignore", message="X does not have valid feature names")

# for organization when printing result
result = generate_recommendations(user_input)

# first showing the user's inputted metrics
print("USER METRICS:")
print(f"Very active time: {user_input['very_active_minutes']} minutes")
print(f"Fairly active time: {user_input['fairly_active_minutes']} minutes")
print(f"Lightly active time: {user_input['lightly_active_minutes']} minutes")
print(f"Sedentary time: {user_input['sedentary_minutes']} minutes")
print(f"Steps: {user_input['total_steps']} steps")
print(f"Sleep: {user_input['total_minutes_asleep']} minutes")
print(f"Weight: {user_input['weight_kg']} kg")

# second showing the key metrics the user should focus on and use as the target metric
print("\nTARGET METRICS:")
print(f"Steps: {result['predicted_steps']} steps")
print(f"Sleep: {result['predicted_sleep']} minutes")
print(f"Calories: {result['predicted_calories']} kcal")
print(f"Weight: {result['predicted_weight']} kg")

# last showing the recommendations the user should implement in their routine to achieve the target metrics
print("\nRECOMMENDATIONS:")
for key, recommendation in result['recommendations'].items():
    print(f"{key.capitalize()}: {recommendation}")

USER METRICS:
Very active time: 50 minutes
Fairly active time: 60 minutes
Lightly active time: 120 minutes
Sedentary time: 760 minutes
Steps: 12000 steps
Sleep: 450 minutes
Weight: 73 kg

TARGET METRICS:
Steps: 12000 steps
Sleep: 450 minutes
Calories: 1631 kcal
Weight: 67.7 kg

RECOMMENDATIONS:
Steps: Your step count is on target!
Sleep: Your sleep duration is sufficient.
Calories: Reduce daily calorie intake by 769 to maintain balance.
Weight: Adjust lifestyle to achieve a weight closer to 67.7 kg.
