# Calories Restriction & Weight Loss

## Imports

In [1]:
import os

import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm

## Data

### Load

In [9]:
data_path = './data'
data_files = os.listdir(data_path)
data = pd.DataFrame(columns=['burned', 'steps', 'weight', 'consumed'])
dates = None
for fname in data_files:
    if 'activity' in fname:
        activity_data = pd.read_csv(os.path.join(data_path, fname))
        data.loc[:, 'burned'] = activity_data['Calories Burned']
        data.loc[:, 'steps'] = activity_data['Steps']
        dates = activity_data['Date']
    elif 'body' in fname:
        data_body = pd.read_csv(os.path.join(data_path, fname))
        data.loc[:, 'weight'] = data_body['Weight']
    elif 'food' in fname:
        data_food = pd.read_csv(os.path.join(data_path, fname))
        data.loc[:, 'consumed'] = data_food['Calories In']
data.index = dates
data['deficite'] = data['burned'] - data['consumed']
data['weight_delta'] = data['weight'].diff(periods=1).shift(-1)
data = data.dropna()

In [10]:
data

Unnamed: 0_level_0,burned,steps,weight,consumed,deficite,weight_delta
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-01,2873,7065,67.7,0,2873,0.0
2019-01-02,2163,3715,67.7,1561,602,0.0
2019-01-03,2670,8142,67.7,1981,689,0.0
2019-01-04,2500,7549,67.7,2090,410,0.0
2019-01-05,2885,14040,67.7,3112,-227,0.0
2019-01-06,2044,2765,67.7,378,1666,0.0
2019-01-07,2594,7827,67.7,2010,584,1.15
2019-01-08,2270,7715,68.85,1916,354,1.15
2019-01-09,3016,12465,70.0,3254,-238,0.0
2019-01-10,2812,10632,70.0,2130,682,-0.2


### Clean

In [11]:
data = data.loc[~(data == 0).any(1)]  # remove rows containing zero value
stats = data.describe().loc[['mean', 'std']]
outliers = (np.abs(stats.loc['mean'] - data) > stats.loc['std'] * 2).any(1)
data = data.loc[~outliers]

In [12]:
data

Unnamed: 0_level_0,burned,steps,weight,consumed,deficite,weight_delta
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-07,2594,7827,67.7,2010,584,1.15
2019-01-08,2270,7715,68.85,1916,354,1.15
2019-01-10,2812,10632,70.0,2130,682,-0.2
2019-01-11,2966,11243,69.8,2419,547,0.4
2019-01-12,2700,7714,70.2,2378,322,-0.12
2019-01-13,2827,10108,70.08,2668,159,-0.12
2019-01-14,2104,3952,69.96,2102,2,-0.12
2019-01-15,2377,8035,69.84,1921,456,-0.12
2019-01-16,2320,4194,69.72,1593,727,-0.12
2019-01-17,2355,5668,69.6,1765,590,-0.6


## Model

In [13]:
X = data['deficite'].values.reshape(-1, 1)
y = data['weight_delta'].values
X_const = sm.add_constant(X)
model = sm.OLS(y, X_const)
res = model.fit()

In [14]:
loss_500 = res.params[1] * 500
loss_500

-0.326366203781185

In [15]:
conf_int = loss_500 + (res.conf_int(.05)[1] - res.params[1])
conf_int

array([-0.32674504, -0.32598737])

The model predicts a loss of 326 grams (-0.326, 95% CI [-0.327, -.326]) for a day of 500 calories deficit.