In [13]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import pickle
from collections import defaultdict
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline

from functions import (
    datespace, parse_datetime, import_concat, stitch_drop_append,
    expand_input_time, get_input_bed, 
    get_delta, get_delta_scale, get_p_day, get_diff, get_avg, get_var, 
    get_top5, get_bottom5,
    estimator_cv_scores, estimator_cv_scores2)

from classes import (TimeScaler, AvgRatioFiller, MatrixPipeline, OneHotEncoder, 
                    ZeroFiller, ChainTransformer, StandardScaler, AvgFiller)


pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

# import sleep

In [4]:
raw_all_sleep = pd.read_csv('data/sleep_archive.csv')
raw_all_sleep.columns = ['start', 'end', 'asleep','awake','awakening','bed',' rem','light','deep']
raw_all_sleep['start'] = raw_all_sleep['start'].apply(parse_datetime)
raw_all_sleep['end'] = raw_all_sleep['end'].apply(parse_datetime)
raw_all_sleep.drop_duplicates(inplace=True)
raw_all_sleep.sort_values('start', inplace=True)
raw_all_sleep.reset_index(inplace=True, drop=True)

# raw_all_sleep['dr'] = (raw_all_sleep['deep'] / raw_all_sleep['bed']).apply(lambda x: round(x,2))


# import hr 

In [5]:
# raw_hr
raw_hr = pd.read_csv('data/hr_archive.csv')
raw_hr['date_time']=raw_hr['date_time'].apply(parse_datetime)
raw_hr['date']=raw_hr['date_time'].apply(lambda x: x.date())

KeyboardInterrupt: 

In [None]:
# exp_hr
wake = np.empty(len(exp_sleep))
wake[1:] = exp_sleep.loc[:len(exp_sleep)-2, 'end']
exp_sleep['wake'] = list(map(lambda x: pd.to_datetime(x), wake))
exp_hr = raw_hr.merge(exp_sleep[['date','start','wake']], on='date', how='right')

In [None]:
# fil_hr
mask1 = exp_hr['date_time'] >= exp_hr['wake']
mask2 = exp_hr['date_time'] < exp_hr['start']
fil_hr = exp_hr[mask1 & mask2]

In [None]:
# agg_hr
agg_hr = fil_hr.groupby('date').bpm.agg([get_top5, 'mean', get_bottom5]).reset_index()

# import am

In [None]:
raw_am = pd.read_csv('data/am_archive.csv')
raw_am['date'] = raw_am['date'].apply(parse_date)

# separate sleep & nap 

In [6]:
sti_all_sleep = stitch_drop_append(raw_all_sleep)

# 
sync_sleep_mask = \
((sti_all_sleep['start'].apply(lambda x: x.time()) >= datetime(1,1,1,17,0).time()) |\
(sti_all_sleep['start'].apply(lambda x: x.time()) < datetime(1,1,1,5,0).time())) &\
(sti_all_sleep['asleep'] >= 180)

# 4
sync_nap_mask = \
((sti_all_sleep['start'].apply(lambda x: x.time()) >= datetime(1,1,1,22,0).time()) |\
(sti_all_sleep['start'].apply(lambda x: x.time()) < datetime(1,1,1,10,0).time())) &\
(sti_all_sleep['asleep'] < 180)

# 7
async_sleep_mask = \
(sti_all_sleep['start'].apply(lambda x: x.time()) > datetime(1,1,1,5,0).time()) &\
(sti_all_sleep['start'].apply(lambda x: x.time()) < datetime(1,1,1,17,0).time()) &\
(sti_all_sleep['asleep'] >= 180)

# 31
async_nap_mask = \
(sti_all_sleep['start'].apply(lambda x: x.time()) > datetime(1,1,1,10,0).time()) &\
(sti_all_sleep['start'].apply(lambda x: x.time()) < datetime(1,1,1,22,0).time()) &\
(sti_all_sleep['asleep'] < 180)

# check 120 as divide

# nap 

In [7]:
# raw_nap
raw_nap = sti_all_sleep.loc[async_sleep_mask, :].copy()
raw_nap.reset_index(inplace=True, drop=True)

sel_nap = pd.DataFrame()
sel_nap['date'] = raw_nap['start'].apply(lambda x: x.date())
sel_nap['nap'] = raw_nap['asleep']

# sleep  

In [8]:
# raw_sleep
raw_sleep = sti_all_sleep.loc[sync_sleep_mask, :].copy()
raw_sleep.reset_index(inplace=True, drop=True)
# sel_sleep
sel_sleep = raw_sleep[['start','end','bed','deep']]

In [9]:
# import and define user input start and end time
with open('/Users/Sehokim/capstone/data/start.pkl', 'rb') as s:
    raw_input_start = pickle.load(s)
    
with open('/Users/Sehokim/capstone/data/end.pkl', 'rb') as s:
    raw_input_end = pickle.load(s)
    
# apn_sleep
input_start = expand_input_time(raw_input_start)
input_end =  expand_input_time(raw_input_end)
input_bed = get_input_bed(input_start, input_end)
input_list = [input_start, input_end, input_bed]
input_dict = defaultdict()
for k, v in zip(sel_sleep.columns, input_list):
    input_dict[k] = v
    
input_df = pd.DataFrame(input_dict, index=[len(sel_sleep)])
apn_sleep = pd.concat([sel_sleep, input_df], axis=0, sort=False)

In [10]:
def get_sum(x):
    mask1 = exp_sleep.date < x
    mask2 = exp_sleep.date > x-timedelta(days=3)
    return exp_sleep.loc[mask1 & mask2, 'bed'].mean()

# exp_sleep
exp_sleep = pd.DataFrame()
exp_sleep['date'] = apn_sleep['end'].apply(lambda x: x.date()) - timedelta(days=1)
exp_sleep['day'] = exp_sleep['date'].apply(lambda x: x.weekday())
exp_sleep['start'] = apn_sleep['start']
exp_sleep['end'] = apn_sleep['end']
exp_sleep['bed'] = apn_sleep['bed']
exp_sleep['deep'] = apn_sleep['deep']
exp_sleep['delta'] = get_delta_scale(apn_sleep)

for i in range(7):
    i += 1
    exp_sleep[f'p{i}'] = get_p_day(exp_sleep, i)
exp_sleep['p1_diff'] = get_diff(exp_sleep, 'p1')
exp_sleep['p3_avg'] = get_avg(exp_sleep, 3)
exp_sleep['p7_avg'] = get_avg(exp_sleep, 7)
exp_sleep['p3_var'] = get_var(exp_sleep, 3)
exp_sleep['p7_var'] = get_var(exp_sleep, 7)
exp_sleep['p3_diff'] = get_diff(exp_sleep, 'p3_avg')
exp_sleep['p7_diff'] = get_diff(exp_sleep, 'p7_avg')
exp_sleep['p3_sum'] = exp_sleep['date'].apply(get_sum)


# merge + trim

In [11]:
merged = exp_sleep.merge(sel_nap, on='date', how='left')
# merged = merged_sleep.merge(agg_hr, on='date', how='left')\
# .merge(raw_am, on='date', how='left')\


merged.drop([
#                    'start','end','delta',
#                    'p1_diff','p3_diff','p7_diff','p3_var','p7_var',
#                    'p1','p2','p3','p4','p5','p6','p7','p3_avg','p7_avg'
], axis=1, inplace=True)

# transform

In [12]:
time_stdsc = ChainTransformer([TimeScaler(), StandardScaler()])
zero_stdsc = ChainTransformer([ZeroFiller(), StandardScaler()])
avg_stdsc = ChainTransformer([AvgFiller(), StandardScaler()])
# deep_standard_scaler = ChainTransformer([AvgRatioFiller(merged_sleep['bed']), StandardScaler()])

branches = [
    ('start', time_stdsc), 
    ('end', time_stdsc),
    ('bed', StandardScaler()),
    ('day', OneHotEncoder()),
    ('p1', time_stdsc),
    ('p2', time_stdsc),
    ('p3', time_stdsc),
    ('p4', time_stdsc),
    ('p5', time_stdsc),
    ('p6', time_stdsc),
    ('p7', time_stdsc),
    ('delta', StandardScaler()),
    ('p3_avg', time_stdsc),
    ('p7_avg', time_stdsc),
    ('p3_var', StandardScaler()),
    ('p7_var', StandardScaler()),
    ('p1_diff', StandardScaler()),
    ('p3_diff', StandardScaler()),
    ('p7_diff', StandardScaler()),
    ('nap', ZeroFiller()),
#     ('get_top5', avg_stdsc),
#     ('get_bottom5', avg_stdsc),
#     ('mean', avg_stdsc),
#     ('sedentary', StandardScaler()),
#     ('lightly', StandardScaler()),
#     ('moderately', StandardScaler()),
#     ('very', StandardScaler()),
    ('p3_sum', avg_stdsc),
    ('deep', AvgRatioFiller(merged['bed']))
]

mp = MatrixPipeline(branches)
mp.fit(merged)
Xy = mp.transform(merged)
Xy = Xy[7:].copy()
Xy.reset_index(inplace=True, drop=True)

y = Xy.pop('deep')
Xy.pop('date')
# Xy.pop('bed')
# X = Xy.pop('bed')
X = Xy


In [None]:
fig, ax = plt.subplots(figsize=(10,10))
ax.scatter(Xy['p3_sum'], df.deep)

# build Lasso 

In [73]:
# get regularization strength
a_space = np.logspace(np.log10(0.000001), np.log10(1000000), num=500)
mses = []
trainmses = []
for a in a_space:
    mse, r2, trainmse, trainr2, coef, intercept = estimator_cv_scores(X, y, Lasso, a, max_iter=1000)
    mses.append(round(mse, 3))
    trainmses.append(round(trainmse, 3))
    
tuned_a = a_space[np.argmin(mses)]
print(tuned_a)



























































1.213859262690629


In [74]:
cvmses = []
cvr2s = []
traincvmses = []
traincvr2s = []
for _ in range(500):
    cvmse, cvr2, traincvmse, traincvr2, coef, intercept = estimator_cv_scores(X, y, Lasso, tuned_a, max_iter=1000)
    cvmses.append(cvmse)
    cvr2s.append(cvr2)
    traincvmses.append(traincvmse)
    traincvr2s.append(traincvr2)
    

# print model score
print(f'ridge test CVMSE_500: {np.mean(cvmses):.2f}, CVR2_500: {np.mean(cvr2s):.2f}\n'
  f'ridge train CVMSE_500: {np.mean(traincvmses):.2f}, CVR2_500: {np.mean(traincvr2s):.2f}')


ridge test CVMSE_500: 157.70, CVR2_500: 0.27
ridge train CVMSE_500: 154.65, CVR2_500: 0.28


# build Ridge 

In [76]:
# get regularization strength
a_space = np.logspace(np.log10(0.000001), np.log10(1000000), num=500)
mses = []
trainmses = []
for a in a_space:
    mse, r2, trainmse, trainr2, coef, intercept = estimator_cv_scores(X, y, Ridge, a)
    mses.append(round(mse, 3))
    trainmses.append(round(trainmse, 3))
    
tuned_a = a_space[np.argmin(mses)]
print(tuned_a)

120.27083347685095


# evaluate Ridge

In [77]:
cvmses = []
cvr2s = []
traincvmses = []
traincvr2s = []
for _ in range(500):
    cvmse, cvr2, traincvmse, traincvr2, coef, intercept = estimator_cv_scores(X, y, Ridge, tuned_a)
    cvmses.append(cvmse)
    cvr2s.append(cvr2)
    traincvmses.append(traincvmse)
    traincvr2s.append(traincvr2)
    

# print model score
print(f'ridge test CVMSE_500: {np.mean(cvmses):.2f}, CVR2_500: {np.mean(cvr2s):.2f}\n'
  f'ridge train CVMSE_500: {np.mean(traincvmses):.2f}, CVR2_500: {np.mean(traincvr2s):.2f}')


ridge test CVMSE_500: 164.08, CVR2_500: 0.23
ridge train CVMSE_500: 147.67, CVR2_500: 0.32


BW: 
trimmed scaled 0.03   
trimmed not scaled 0.06  
expanded scaled -0.09  
expanded not scaled -0.09

BL:
trimmed scaled 0.24  
trimmed not scaled 0.23  
expanded scaled   
expanded not scaled 

SE:
trimmed scaled 0.39   
trimmed not scaled   
expanded scaled   
expanded not scaled 
br trimmed -0.11


In [75]:
# print beta coef
beta = list(zip(X.columns, map(lambda x: round(x, 99), coef)))
for b, c in beta:
    print(f'{b:10} : {c}')

is_1       : 0.0
is_3       : 0.0
is_4       : 0.0
is_5       : 0.0
is_6       : -0.0
is_2       : 0.0
is_0       : -0.0
start_sin  : -0.0
start_cos  : -0.0
end_sin    : 0.0
end_cos    : -0.0
bed        : 6.998826020107766
delta      : 0.0
p1_sin     : -0.0
p1_cos     : -0.0
p2_sin     : 0.0
p2_cos     : 0.0
p3_sin     : -0.0
p3_cos     : -0.0
p4_sin     : -0.0
p4_cos     : -0.0
p5_sin     : -0.0
p5_cos     : -0.2643110087328162
p6_sin     : -0.0
p6_cos     : -0.0
p7_sin     : 0.0
p7_cos     : 0.0
p1_diff    : -0.0
p3_avg_sin : 0.0
p3_avg_cos : -0.0
p7_avg_sin : -0.0
p7_avg_cos : -0.0
p3_var     : -0.0
p7_var     : 0.0
p3_diff    : -0.0
p7_diff    : -0.0
p3_sum     : -0.0
nap        : 0.0


# build neural network 

In [None]:
import keras
from keras.models import Sequential, Model, Input
from keras.layers import Dense, Dropout, Activation
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np
import scipy.stats as scs
import matplotlib.pyplot as plt

# predict today 

In [None]:
# predict today
Xtest = X.iloc[:-1, :].values
ytest = y.iloc[:-1].values
Xtoday = X.iloc[-1, :].values.reshape(1,-1)
ridge = Ridge(a)
ridge.fit(Xtest, ytest)
y_ = ridge.predict(Xtoday)[0]
y_