In [161]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

df = pd.read_csv("data.csv")

In [162]:
# Create a mapping dictionary to convert existing IDs to new IDs starting from 1
id_mapping = {old_id: new_id for new_id, old_id in enumerate(df['Id'].unique(), start=1)}

# Replace existing IDs with new IDs
df['Id'] = df['Id'].map(id_mapping)

In [163]:
# Convert ActivityDate to datetime format
df['ActivityDate'] = pd.to_datetime(df['ActivityDate'])

In [164]:
df.isna().sum()

Id                             0
ActivityDate                   0
TotalSteps                     0
TotalDistance               1880
TrackerDistance             1880
LoggedActivitiesDistance    1880
VeryActiveDistance          1880
ModeratelyActiveDistance    1880
LightActiveDistance         1880
SedentaryActiveDistance     1880
VeryActiveMinutes           2820
FairlyActiveMinutes         2820
LightlyActiveMinutes        2820
SedentaryMinutes            2820
Calories                    2820
TotalSleepRecords           3347
TotalMinutesAsleep          3347
TotalTimeInBed              3347
dtype: int64

In [165]:
df.fillna(df.mean(), inplace=True)
df

Unnamed: 0,Id,ActivityDate,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories,TotalSleepRecords,TotalMinutesAsleep,TotalTimeInBed
0,1,2016-04-12,13162,8.500000,8.500000,0.000000,1.880000,0.550000,6.060000,0.000000,25.000000,13.000000,328.000000,728.000000,1985.000000,1.000000,327.000000,346.000000
1,1,2016-04-13,10735,6.970000,6.970000,0.000000,1.570000,0.690000,4.710000,0.000000,21.000000,19.000000,217.000000,776.000000,1797.000000,2.000000,384.000000,407.000000
2,1,2016-04-14,10460,6.740000,6.740000,0.000000,2.440000,0.400000,3.910000,0.000000,30.000000,11.000000,181.000000,1218.000000,1776.000000,1.000000,412.000000,442.000000
3,1,2016-04-15,9762,6.280000,6.280000,0.000000,2.140000,1.260000,2.830000,0.000000,29.000000,34.000000,209.000000,726.000000,1745.000000,2.000000,340.000000,367.000000
4,1,2016-04-16,12669,8.160000,8.160000,0.000000,2.710000,0.410000,5.040000,0.000000,36.000000,10.000000,221.000000,773.000000,1863.000000,1.000000,700.000000,712.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3755,33,2016-05-08,10686,99.151234,9.520122,10.636532,0.752144,1.954181,1.954181,0.752144,21.164894,13.564894,192.812766,991.210638,2303.609574,1.118644,419.467312,458.639225
3756,33,2016-05-09,20226,99.151234,9.520122,10.636532,0.752144,1.954181,1.954181,0.752144,21.164894,13.564894,192.812766,991.210638,2303.609574,1.118644,419.467312,458.639225
3757,33,2016-05-10,10733,99.151234,9.520122,10.636532,0.752144,1.954181,1.954181,0.752144,21.164894,13.564894,192.812766,991.210638,2303.609574,1.118644,419.467312,458.639225
3758,33,2016-05-11,21420,99.151234,9.520122,10.636532,0.752144,1.954181,1.954181,0.752144,21.164894,13.564894,192.812766,991.210638,2303.609574,1.118644,419.467312,458.639225


In [166]:
# Descriptive statistics for steps & distance
steps_stats = df['TotalSteps'].describe()
distance_stats = df['TotalDistance'].describe()

print("Descriptive Statistics for Steps:")
print(steps_stats)
print("\nDescriptive Statistics for Distance (in miles):")
print(distance_stats)

# Descriptive statistics
fig, axs = plt.subplots(2, 1, figsize=(10, 8))
df['TotalSteps'].plot(ax=axs[0], kind='hist', bins=20, title='Histogram of Total Steps')
df['TotalDistance'].plot(ax=axs[1], kind='hist', bins=20, title='Histogram of Total Distance (in miles)')
plt.tight_layout()
plt.show()

Descriptive Statistics for Steps:
count     3760.000000
mean      4642.660372
std       4719.110284
min          0.000000
25%       1219.000000
50%       2524.500000
75%       7402.250000
max      36019.000000
Name: TotalSteps, dtype: float64

Descriptive Statistics for Distance (in miles):
count    3760.000000
mean       99.151234
std        85.840878
min         0.000000
25%         9.815000
50%        99.151234
75%        99.151234
max       518.000000
Name: TotalDistance, dtype: float64


  plt.show()


In [168]:
# Average length of a step for each person
df['AvgStepLength'] = df['TotalDistance'] / df['TotalSteps']
print(df.groupby('Id')['AvgStepLength'].mean())

# Average step length for each person
avg_step_length = df.groupby('Id')['AvgStepLength'].mean()
avg_step_length.plot(kind='bar', figsize=(10, 6))
plt.title('Average Step Length for Each Person')
plt.xlabel('Person ID')
plt.ylabel('Average Step Length (in miles)')
plt.xticks(rotation=45)
plt.show()

Id
1          inf
2     0.055050
3     0.055638
4          inf
5          inf
6     0.071357
7     0.132405
8     0.064497
9     0.168181
10    0.088661
11    0.093544
12    0.101682
13         inf
14         inf
15         inf
16    0.085781
17    0.084592
18    0.082760
19         inf
20    0.099586
21         inf
22         inf
23         inf
24         inf
25    0.114421
26         inf
27         inf
28    0.045654
29         inf
30    0.066155
31         inf
32         inf
33    0.063225
Name: AvgStepLength, dtype: float64


  plt.show()


In [151]:
print("\nCalories Burned per Step for Each Individual:")
print(df.groupby('Id')['Calories'].mean())

# Calories burned per step for each individual
calories_per_step = df.groupby('Id')['Calories'].mean()
calories_per_step.plot(kind='bar', figsize=(10, 6))
plt.title('Calories Burned per Step for Each Individual')
plt.xlabel('Person ID')
plt.ylabel('Calories Burned per Step')
plt.xticks(rotation=45)
plt.show()


Calories Burned per Step for Each Individual:
Id
1     2181.812020
2     2098.545891
3     2430.532181
4     2121.078149
5     2270.908794
6     2355.199116
7     2112.868471
8     2158.747503
9     2238.568292
10    2206.949116
11    2210.982181
12    2106.123848
13    2324.158794
14    2221.144681
15    2237.126536
16    2501.174923
17    2274.255568
18    2236.021697
19    2469.094278
20    2196.626536
21    2567.615514
22    2292.992895
23    2377.612353
24    2260.649489
25    2223.215245
26    2363.707181
27    2369.295891
28    2464.158794
29    2174.707181
30    2586.852342
31    2410.715245
32    2218.284767
33    2582.771697
Name: Calories, dtype: float64


  plt.show()


In [169]:
correlation_light = df['LightActiveDistance'].corr(df['VeryActiveMinutes'])
correlation_moderate = df['ModeratelyActiveDistance'].corr(df['VeryActiveMinutes'])
correlation_very = df['VeryActiveDistance'].corr(df['VeryActiveMinutes'])

print("Correlation between Light Activity Distance and Very Active Minutes:", correlation_light)
print("Correlation between Moderate Activity Distance and Very Active Minutes:", correlation_moderate)
print("Correlation between Very Active Distance and Very Active Minutes:", correlation_very)

Correlation between Light Activity Distance and Very Active Minutes: 0.10663481177875173
Correlation between Moderate Activity Distance and Very Active Minutes: 0.06717610672692782
Correlation between Very Active Distance and Very Active Minutes: 0.7677102898409658


In [153]:
# relevant columns for correlation analysis
cols_for_correlation = ['VeryActiveMinutes', 'ModeratelyActiveDistance', 'LightActiveDistance', 'SedentaryActiveDistance', 
                        'TotalSteps', 'TotalDistance', 'Calories']

corr_matrix = df[cols_for_correlation].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 10})
plt.title('Correlation Map')
plt.show()

  plt.show()


In [171]:
# Read the CSV file
df = pd.read_csv("data.csv")

# Drop rows with missing values in both X and y
df.dropna(subset=['TotalSteps', 'Calories', 'VeryActiveMinutes', 
                  'FairlyActiveMinutes', 'LightlyActiveMinutes', 
                  'SedentaryMinutes', 'TotalMinutesAsleep'], inplace=True)

# Feature Engineering
# Creating additional features or transformations
df['TotalActivityMinutes'] = df['VeryActiveMinutes'] + df['FairlyActiveMinutes'] + df['LightlyActiveMinutes']
df['HighIntensityActivity'] = (df['VeryActiveMinutes'] > 30) | (df['FairlyActiveMinutes'] > 30)

# Select relevant features and target variable
X = df[['TotalSteps', 'Calories', 'VeryActiveMinutes', 'FairlyActiveMinutes', 
        'LightlyActiveMinutes', 'SedentaryMinutes', 'TotalActivityMinutes', 'HighIntensityActivity']]
y = df['TotalMinutesAsleep']  # Assuming 'TotalMinutesAsleep' is the target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest Regressor model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (Random Forest):", mae)

# Convert continuous predictions to intervals
def convert_to_intervals(minutes):
    intervals = []
    for minute in minutes:
        if minute < 240:
            intervals.append('Less than 4 hours')
        elif minute < 420:
            intervals.append('4-7 hours')
        elif minute < 600:
            intervals.append('7-10 hours')
        else:
            intervals.append('More than 10 hours')
    return intervals

# Convert predictions to intervals
y_pred_intervals = convert_to_intervals(y_pred)

# Evaluate intervals
y_test_intervals = convert_to_intervals(y_test)
accuracy = sum(1 for pred, true in zip(y_pred_intervals, y_test_intervals) if pred == true) / len(y_test_intervals)
print("Accuracy of interval predictions:", accuracy)



Mean Absolute Error (Random Forest): 85.85415034166144
Accuracy of interval predictions: 0.39759036144578314


The mean absolute error (MAE) of approximately 85.85 indicates the average absolute difference between the predicted and actual values of sleep duration in minutes. In this case, a MAE of 85.85 means that, on average, the predictions of the Random Forest model are off by approximately 85.85 minutes from the true sleep duration.

The accuracy of interval predictions of approximately 39.76% indicates the proportion of correctly predicted sleep duration intervals out of all predictions made. In other words, out of all the sleep duration predictions made by converting continuous predictions into intervals, approximately 39.76% of them match the true intervals present in the test data.

Interpreting these results:

The MAE value indicates the model's overall performance in predicting sleep duration in minutes. Lower MAE values indicate better performance, as they indicate smaller discrepancies between predicted and actual values.
The accuracy of interval predictions provides an understanding of how well the model performs when sleep duration is categorized into intervals. Higher accuracy values indicate better performance, as they indicate a higher proportion of correctly predicted intervals.
In this case, the MAE value suggests that the model's predictions of sleep duration in minutes have a moderate level of error, while the accuracy of interval predictions indicates that the model's performance in predicting sleep duration intervals is relatively low. Further improvements to the model, such as feature engineering, hyperparameter tuning, or trying different algorithms, may help enhance prediction accuracy.

In [160]:
def calculate_sleep_quality_and_time(total_minutes_asleep, total_time_in_bed):
    # Calculate sleep duration in hours
    sleep_duration_hours = total_minutes_asleep / 60
    
    # Calculate time in bed in hours
    time_in_bed_hours = total_time_in_bed / 60

    # Convert time in bed to hours and minutes
    time_in_bed_minutes = int((time_in_bed_hours % 1) * 60)  # Extract minutes
    time_in_bed_hours = int(time_in_bed_hours)  # Extract hours
    
    # Predict sleep quality based on sleep duration
    if sleep_duration_hours >= 7:
        sleep_quality = "Average"
    elif 5 <= sleep_duration_hours < 7:
        sleep_quality = "Good"
    else:
        sleep_quality = "Bad"
    
    # Output the results
    result = f"Sleep quality: {sleep_quality}\nTotal time in bed: {time_in_bed_hours} hours and {time_in_bed_minutes} minutes"
    return result

def calculate_caloric_expenditure_and_health(calories):
    # Calculate healthy calorie burn (e.g., based on BMR and activity level)
    healthy_calories_burn = 2000  # Example: Assume a healthy calorie burn goal of 2000 calories
    
    # Determine if calorie burn is healthy or not
    if calories >= healthy_calories_burn:
        health_status = "Healthy"
    else:
        health_status = "Unhealthy"
    
    # Output the results
    result = f"Calories burned: {calories}\nHealthy calorie burn goal: {healthy_calories_burn} calories\nHealth status: {health_status}"
    return result

def analyze_activity_trends(total_steps, total_distance):
    # Analyze trends in Total Steps and Total Distance
    if total_steps >= 10000:
        steps_trend = "High"
    elif 5000 <= total_steps < 10000:
        steps_trend = "Moderate"
    else:
        steps_trend = "Low"
    
    if total_distance >= 8:
        distance_trend = "High"
    elif 4 <= total_distance < 8:
        distance_trend = "Moderate"
    else:
        distance_trend = "Low"
    
    # Output the results
    result = f"Total Steps Trend: {steps_trend}\nTotal Distance Trend: {distance_trend}"
    return result

def calculate_all_functions(total_minutes_asleep, total_time_in_bed, calories, total_steps, total_distance):
    sleep_result = calculate_sleep_quality_and_time(total_minutes_asleep, total_time_in_bed)
    calorie_result = calculate_caloric_expenditure_and_health(calories)
    activity_result = analyze_activity_trends(total_steps, total_distance)
    
    combined_result = f"Sleep Quality and Time:\n{sleep_result}\n\nCaloric Expenditure and Health:\n{calorie_result}\n\nActivity Trends:\n{activity_result}"
    return combined_result

# Create the Gradio interface with tabs
with gr.Blocks() as demo:
    gr.Markdown("# Health and Fitness Analysis")
    
    with gr.Tab("All Functions"):
        
        with gr.Tab("All Functions"):
                with gr.Row():
                    with gr.Column():
                        all_inputs = [
                            gr.Number(label="Total Minutes Asleep"),
                            gr.Number(label="Total Time in Bed (minutes)"),
                            gr.Number(label="Total Calories Burned"),
                            gr.Number(label="Total Steps"),
                            gr.Number(label="Total Distance (in miles)")
                        ]
                    with gr.Column():
                        all_output = gr.Textbox(label="Combined Results")
                        all_button = gr.Button("Calculate All")
                        all_button.click(calculate_all_functions, inputs=all_inputs, outputs=all_output)
                    
        # with gr.Row():
        #     with gr.Column():
        #         sleep_inputs_all = [
        #             gr.Number(label="Total Minutes Asleep"),
        #             gr.Number(label="Total Time in Bed (minutes)")
        #         ]
        #         sleep_output_all = gr.Textbox(label="Sleep Quality and Time")
        #         sleep_button_all = gr.Button("Calculate Sleep")
        #         sleep_button_all.click(calculate_sleep_quality_and_time, inputs=sleep_inputs_all, outputs=sleep_output_all)
        #     
        #     with gr.Column():
        #         calorie_input_all = gr.Number(label="Total Calories Burned")
        #         calorie_output_all = gr.Textbox(label="Caloric Expenditure and Health")
        #         calorie_button_all = gr.Button("Calculate Calories")
        #         calorie_button_all.click(calculate_caloric_expenditure_and_health, inputs=calorie_input_all, outputs=calorie_output_all)
        #     
        #     with gr.Column():
        #         activity_inputs_all = [
        #             gr.Number(label="Total Steps"),
        #             gr.Number(label="Total Distance (in miles)")
        #         ]
        #         activity_output_all = gr.Textbox(label="Activity Trends")
        #         activity_button_all = gr.Button("Analyze Activity")
        #         activity_button_all.click(analyze_activity_trends, inputs=activity_inputs_all, outputs=activity_output_all)
    
    with gr.Tab("Sleep Quality"):
        sleep_inputs = [
            gr.Number(label="Total Minutes Asleep"),
            gr.Number(label="Total Time in Bed (minutes)")
        ]
        sleep_output = gr.Textbox(label="Sleep Quality and Time")
        sleep_button = gr.Button("Calculate")
        sleep_button.click(calculate_sleep_quality_and_time, inputs=sleep_inputs, outputs=sleep_output)
    
    with gr.Tab("Caloric Expenditure"):
        calorie_input = gr.Number(label="Total Calories Burned")
        calorie_output = gr.Textbox(label="Caloric Expenditure and Health")
        calorie_button = gr.Button("Calculate")
        calorie_button.click(calculate_caloric_expenditure_and_health, inputs=calorie_input, outputs=calorie_output)
    
    with gr.Tab("Activity Trends"):
        activity_inputs = [
            gr.Number(label="Total Steps"),
            gr.Number(label="Total Distance (in miles)")
        ]
        activity_output = gr.Textbox(label="Activity Trends")
        activity_button = gr.Button("Analyze")
        activity_button.click(analyze_activity_trends, inputs=activity_inputs, outputs=activity_output)

demo.launch()

Running on local URL:  http://127.0.0.1:7908

To create a public link, set `share=True` in `launch()`.




In [172]:
def calculate_sleep_quality_and_time(total_minutes_asleep, total_time_in_bed):
    # Calculate sleep duration in hours
    sleep_duration_hours = total_minutes_asleep / 60
    
    # Calculate time in bed in hours
    time_in_bed_hours = total_time_in_bed / 60

    # Convert time in bed to hours and minutes
    time_in_bed_minutes = int((time_in_bed_hours % 1) * 60)  # Extract minutes
    time_in_bed_hours = int(time_in_bed_hours)  # Extract hours
    
    # Predict sleep quality based on sleep duration
    if sleep_duration_hours >= 7:
        sleep_quality = "Average"
    elif 5 <= sleep_duration_hours < 7:
        sleep_quality = "Good"
    else:
        sleep_quality = "Bad"
    
    # Output the results
    result = f"Sleep quality: {sleep_quality}\nTotal time in bed: {time_in_bed_hours} hours and {time_in_bed_minutes} minutes"
    return result

# Sample sleep data (assuming each entry is in minutes)
sleep_data = [
    {"total_minutes_asleep": 360, "total_time_in_bed": 420},  # 6 hours of sleep in 7 hours in bed
    {"total_minutes_asleep": 480, "total_time_in_bed": 480},  # 8 hours of sleep in 8 hours in bed
    {"total_minutes_asleep": 300, "total_time_in_bed": 360},  # 5 hours of sleep in 6 hours in bed
]

# Calculate total minutes in sleep and total time in bed
total_minutes_in_sleep = sum(entry["total_minutes_asleep"] for entry in sleep_data)
total_time_in_bed = sum(entry["total_time_in_bed"] for entry in sleep_data)

# Display results
print("Total minutes in sleep:", total_minutes_in_sleep)
print("Total time in bed:", total_time_in_bed)

interface = gr.Interface(
    fn=calculate_sleep_quality_and_time,
    inputs=["number", "number"],
    outputs="text",
    title="Sleep Quality Predictor",
    description="Enter your sleep data to predict sleep quality and total time in bed in hours."
)

# interface.launch()


Total minutes in sleep: 1140
Total time in bed: 1260


In [175]:

def calculate_caloric_expenditure_and_health(calories):
    # Calculate healthy calorie burn (e.g., based on BMR and activity level)
    healthy_calories_burn = 2000  # Example: Assume a healthy calorie burn goal of 2000 calories
    
    # Determine if calorie burn is healthy or not
    if calories >= healthy_calories_burn:
        health_status = "Healthy"
    else:
        health_status = "Unhealthy"
    
    # Output the results
    result = f"Calories burned: {calories}\nHealthy calorie burn goal: {healthy_calories_burn} calories\nHealth status: {health_status}"
    return result

interface = gr.Interface(
    fn=calculate_caloric_expenditure_and_health,
    inputs=gr.Number(label="Enter total calories burned..."),
    outputs="text",
    title="Caloric Expenditure & Health Predictor",
    description="Enter total calories burned to predict health status based on a healthy calorie burn goal."
)

# interface.launch()


In [173]:
import gradio as gr
import pandas as pd


def analyze_activity_trends(total_steps, total_distance):
    # Analyze trends in Total Steps and Total Distance
    if total_steps >= 10000:
        steps_trend = "High"
    elif 5000 <= total_steps < 10000:
        steps_trend = "Moderate"
    else:
        steps_trend = "Low"
    
    if total_distance >= 8:
        distance_trend = "High"
    elif 4 <= total_distance < 8:
        distance_trend = "Moderate"
    else:
        distance_trend = "Low"
    
    # Output the results
    result = f"Total Steps Trend: {steps_trend}\nTotal Distance Trend: {distance_trend}"
    return result

interface = gr.Interface(
    fn=analyze_activity_trends,
    inputs=[
        gr.Number(label="Total Steps"),
        gr.Number(label="Total Distance (in miles)")
    ],
    outputs="text",
    title="Activity Trends Analyzer",
    description="Analyze trends in Total Steps and Total Distance to understand daily activity levels and patterns."
)

# interface.launch()
