In [None]:
from google.colab import drive
import json
drive.mount('/content/drive')

base_path = '/content/drive/MyDrive/ActivityForecastML'
timeseries_path =  '/content/drive/MyDrive/ActivityForecastML/timeseries-data.json'
categorical_path=  '/content/drive/MyDrive/ActivityForecastML/categorical-data.json'

with open(timeseries_path , "r")  as f:
    timeseries_json = json.load(f)
with open(categorical_path , "r")  as f:
    categorical_json = json.load(f)
print(timeseries_json[0])
print(categorical_json.keys())




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
{'metric': 'STEPS', 'count': 55, 'start': '2025-10-21T04:02:20.402Z', 'end': '2025-10-21T04:04:13.068Z'}
dict_keys(['gender', 'isSmoker', 'birthYear', 'birthMonth', 'disease', 'diagnoses', 'events', 'sideEffects', 'therapies', 'molecularAnalysisSelection'])


In [None]:
import pandas as pd

ts_df = pd.DataFrame(timeseries_json)
ts_df['start'] = pd.to_datetime(ts_df['start'],utc=True)
ts_df['start'] = ts_df['start'].dt.tz_convert(None)
ts_df['date'] = ts_df['start'].dt.date

daily_steps = ts_df.groupby('date')['count'].sum().reset_index()
daily_steps = daily_steps.rename(columns={'count': 'daily_steps'})

full_dates = pd.date_range(daily_steps['date'].min(), daily_steps['date'].max())

daily_steps = daily_steps.set_index('date').reindex(full_dates).fillna(0).reset_index()
daily_steps = daily_steps.rename(columns={'index' : 'date'})

therapy_count = len(categorical_json['therapies'])
daily_steps['therapy_count'] = therapy_count
daily_steps['day_of_week'] = pd.to_datetime(daily_steps['date']).dt.dayofweek



In [None]:
current_year = pd.Timestamp.today().year
age = current_year - categorical_json['birthYear']

daily_steps['age'] = age
daily_steps['is_smoker'] = int(categorical_json['isSmoker'])
daily_steps['is_on_therapy'] = int(len(categorical_json['therapies']) > 0)

In [None]:
prophet_df = daily_steps.rename(columns={'date':'ds', 'daily_steps' : 'y'})
prophet_df = prophet_df[['ds', 'y']]

In [None]:
!pip install prophet




In [None]:
from prophet import Prophet

model = Prophet()
model = Prophet(daily_seasonality=True)
model.fit(prophet_df)

<prophet.forecaster.Prophet at 0x7e690c54cdd0>

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

split_idx= int(len(prophet_df) * 0.8)

train_df = prophet_df.iloc[:split_idx]
test_df= prophet_df.iloc[split_idx:]

In [None]:
model_val = Prophet()
model_val.fit(train_df)
future_test = test_df[['ds']]
forecast_test = model_val.predict(future_test)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [None]:
y_true = test_df['y'].values
y_pred=forecast_test['yhat'].values

rmse= np.sqrt(mean_squared_error(y_true,y_pred))
mae=mean_absolute_error(y_true,y_pred)
rmse,mae

(np.float64(11476.444160156336), 8698.287976470034)

In [None]:
!pip install interpret



In [None]:
daily_steps['steps_t_1'] = daily_steps['daily_steps'].shift(1)
model_df = daily_steps.dropna().reset_index(drop=True)
X = model_df[['steps_t_1', 'age', 'is_smoker', 'is_on_therapy']]
y = model_df['daily_steps']

In [None]:
split_idx=int(len(X) * 0.8)
X_train=X.iloc[:split_idx]
X_test=X.iloc[split_idx:]


In [None]:
split_idx = int(len(y) * 0.8)

y_train = y.iloc[:split_idx]
y_test  = y.iloc[split_idx:]

In [None]:
type(y_test), y_test.shape


(pandas.core.series.Series, (295,))

In [None]:
from interpret.glassbox import ExplainableBoostingRegressor
ebm = ExplainableBoostingRegressor(random_state=42)
ebm.fit(X_train, y_train)

y_pred_ebm = ebm.predict(X_test)
rmse_ebm = np.sqrt(mean_squared_error(y_test, y_pred_ebm))
mae_ebm = mean_absolute_error(y_test, y_pred_ebm)
rmse_ebm, mae_ebm

(np.float64(7646.359503640046), 5544.163731707657)

In [None]:
ebm.explain_global()


<interpret.glassbox._ebm._ebm.EBMExplanation at 0x7e690ba9e030>

In [None]:
future = model.make_future_dataframe(periods= 365)
forecast= model.predict(future)

forecast_out = forecast[['ds', 'yhat','trend']].tail(365)
forecast_out=forecast_out.rename(columns={'ds': 'Date', 'yhat':'Predicted_Steps', 'trend': 'Trend_Component'})
forecast_out['Exogenous_Impact'] = 0
forecast_out = forecast_out[
    ['Date', 'Predicted_Steps', 'Trend_Component', 'Exogenous_Impact']
]

forecast_out.to_csv('/content/drive/MyDrive/ActivityForecastML/365_day_forecast.csv', index= False)