In [2]:
import numpy as np
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf 
from statsmodels.tsa.seasonal import seasonal_decompose 
from pmdarima import auto_arima                        
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse
import warnings
warnings.filterwarnings("ignore")


In [3]:
df = pd.read_csv('GlucoCheck/Data/CGM/CGM_Analyzer_Appended.csv')
df.head()

Unnamed: 0,Display Time,subjectId,GlucoseValue
0,2016-06-25 09:14:00,ID01,108.0
1,2016-06-25 09:17:00,ID01,108.0
2,2016-06-25 09:20:00,ID01,108.0
3,2016-06-25 09:23:00,ID01,107.658
4,2016-06-25 09:26:00,ID01,107.496


In [4]:
df['Display Time'] = pd.to_datetime(df['Display Time'])
df = df.set_index("Display Time")
df.drop(['subjectId'], axis=1, inplace=True)
df.head()

Unnamed: 0_level_0,GlucoseValue
Display Time,Unnamed: 1_level_1
2016-06-25 09:14:00,108.0
2016-06-25 09:17:00,108.0
2016-06-25 09:20:00,108.0
2016-06-25 09:23:00,107.658
2016-06-25 09:26:00,107.496


In [5]:
train_data = df[:len(df)-500]
test_data = df[len(df)-500:]

In [6]:
auto_arima(df['GlucoseValue'][0:1000], 
           seasonal=True, m=12, max_p=3, max_d=3,max_q=3, max_P=4, max_D=4,max_Q=4).summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,1000.0
Model:,"SARIMAX(2, 0, 0)x(0, 0, [1, 2], 12)",Log Likelihood,-1735.879
Date:,"Thu, 20 Aug 2020",AIC,3483.757
Time:,14:12:58,BIC,3513.204
Sample:,0,HQIC,3494.949
,- 1000,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.4777,0.149,3.202,0.001,0.185,0.770
ar.L1,1.8128,0.011,166.263,0.000,1.791,1.834
ar.L2,-0.8168,0.010,-78.562,0.000,-0.837,-0.796
ma.S.L12,0.0158,0.044,0.361,0.718,-0.070,0.102
ma.S.L24,0.0429,0.034,1.255,0.209,-0.024,0.110
sigma2,1.8703,0.027,69.704,0.000,1.818,1.923

0,1,2,3
Ljung-Box (Q):,48.2,Jarque-Bera (JB):,49818.64
Prob(Q):,0.18,Prob(JB):,0.0
Heteroskedasticity (H):,5.99,Skew:,1.34
Prob(H) (two-sided):,0.0,Kurtosis:,37.47


In [8]:
arima_model = SARIMAX(train_data['GlucoseValue'], order = (5,2,2))
arima_result = arima_model.fit()
arima_result.summary()

0,1,2,3
Dep. Variable:,GlucoseValue,No. Observations:,31291.0
Model:,"SARIMAX(5, 2, 2)",Log Likelihood,-91520.705
Date:,"Thu, 20 Aug 2020",AIC,183057.411
Time:,14:38:15,BIC,183124.219
Sample:,0,HQIC,183078.804
,- 31291,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,-0.7062,0.024,-29.793,0.000,-0.753,-0.660
ar.L2,0.2358,0.007,33.625,0.000,0.222,0.250
ar.L3,-0.0497,0.003,-15.830,0.000,-0.056,-0.044
ar.L4,0.0277,0.003,8.876,0.000,0.022,0.034
ar.L5,0.0390,0.002,15.850,0.000,0.034,0.044
ma.L1,-0.0141,0.024,-0.597,0.550,-0.061,0.032
ma.L2,-0.9857,0.024,-41.319,0.000,-1.033,-0.939
sigma2,20.3246,0.028,728.008,0.000,20.270,20.379

0,1,2,3
Ljung-Box (Q):,148.35,Jarque-Bera (JB):,9165280104.68
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.86,Skew:,24.18
Prob(H) (two-sided):,0.0,Kurtosis:,2654.0


In [9]:
arima_pred = arima_result.predict(start = len(train_data), end = len(df)-1, typ="levels").rename("ARIMA Predictions")
arima_pred

31291    86.418329
31292    85.789368
31293    85.046078
31294    84.387435
31295    84.164698
           ...    
31786    82.873820
31787    82.871456
31788    82.869094
31789    82.866729
31790    82.864367
Name: ARIMA Predictions, Length: 500, dtype: float64

In [12]:
s = (arima_pred.tolist())

In [13]:
o = test_data.GlucoseValue.tolist()

In [14]:
def index_agreement(o,s):
        
    ia = 1 -(np.sum((o-s)**2))/(np.sum((np.abs(s-np.mean(o))+np.abs(o-np.mean(o)))**2))

    return ia

In [15]:
index_agreement(np.asarray(o),np.asarray(s))

0.43892552984254984

In [None]:
fig = plt.figure(figsize=(36, 18))
plt.plot(test_df['Display Time'], test_df['GlucoseValue'], color='red')
plt.plot(test_df['Display Time'], test_df['Preds'], color='green')

plt.xlabel('Indexes')
plt.ylabel('Glucose Values')
plt.show()