In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import seaborn as sns
from sklearn.feature_selection import SelectKBest, f_regression
import pickle

In [3]:
data = pd.read_csv("steps_count.csv")

types = [
    "HKQuantityTypeIdentifierBasalEnergyBurned", 
    "HKQuantityTypeIdentifierActiveEnergyBurned",
    "HKQuantityTypeIdentifierDistanceWalkingRunning", 
    "HKQuantityTypeIdentifierAppleStandTime", 
    "HKQuantityTypeIdentifierAppleExerciseTime", 
    "HKQuantityTypeIdentifierStepCount"
]

ex_data = data[data["type"].isin(types)]
df = pd.DataFrame(ex_data)
df = df.drop(columns=['end_date', 'source_name'])
df.loc[:, 'start_date'] = pd.to_datetime(df['start_date'])
df['start_date'] = df['start_date'].dt.date

df_final = df.groupby(['start_date', 'type']).agg({
    'value':'sum',
    'unit':'first'
}).reset_index()

df_final['type'] = df_final['type'].replace({
    "HKQuantityTypeIdentifierBasalEnergyBurned": "basal_energy_burned",
    "HKQuantityTypeIdentifierActiveEnergyBurned": "active_energy_burned",
    "HKQuantityTypeIdentifierDistanceWalkingRunning":"walking_running_distance",
    'HKQuantityTypeIdentifierAppleStandTime':"stand_time",
    'HKQuantityTypeIdentifierAppleExerciseTime':"exercise_time", 
    'HKQuantityTypeIdentifierStepCount':'step_count'
})

pivot_df = df_final.pivot_table(index='start_date', columns='type', values='value', fill_value=0)
pivot_df = pivot_df.reset_index()

X= pivot_df[['active_energy_burned', 'basal_energy_burned', 'exercise_time', 'stand_time', 'walking_running_distance']]
y = pivot_df['step_count']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)
y_pred_all_features = model.predict(X_test)
model_af_mae = mean_absolute_error(y_test, y_pred_all_features)

with open("step_predict_model.pkl", "wb") as f:
    pickle.dump(model, f)