In [10]:
import warnings
import itertools
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import datetime
import pandas as pd


df = pd.read_csv("dataset_mood_smartphone.csv",index_col=0)# Importing df
df = df.loc[df.variable=="mood",:] # Filtering df
df = df[["id","time","value"]] # Selecting columns

ids = df["id"].unique() # Unique Id values

df.set_index('time', inplace=True) # Setting time as the index.
df.index = pd.to_datetime(df.index)# Converting index to date-time index

groups = df.groupby('id').apply(lambda df1: df1.resample("D").mean()) #Grouping by id, than taking avg mood of each day
groups.fillna(method="bfill",inplace=True) # Backward filling; Fill the missing values with the most recent previous value

In [9]:
#grid search for optimal p,d,q parameters,
p=d=q=range(0,2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

Examples of parameter combinations for Seasonal ARIMA...
SARIMAX: (0, 0, 1) x (0, 0, 1, 12)
SARIMAX: (0, 0, 1) x (0, 1, 0, 12)
SARIMAX: (0, 1, 0) x (0, 1, 1, 12)
SARIMAX: (0, 1, 0) x (1, 0, 0, 12)


In [12]:
#select the combinatination parameters that gives the lowest AIC

warnings.filterwarnings("ignore") 

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(groups,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)

            results = mod.fit()

            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue

ARIMA(0, 0, 0)x(0, 0, 0, 12)12 - AIC:8960.286569516597
ARIMA(0, 0, 0)x(0, 0, 1, 12)12 - AIC:7373.884993738262
ARIMA(0, 0, 0)x(0, 1, 0, 12)12 - AIC:3617.5720090735686
ARIMA(0, 0, 0)x(0, 1, 1, 12)12 - AIC:2934.3343964747132
ARIMA(0, 0, 0)x(1, 0, 0, 12)12 - AIC:3615.60703026882
ARIMA(0, 0, 0)x(1, 0, 1, 12)12 - AIC:2942.615173574901
ARIMA(0, 0, 0)x(1, 1, 0, 12)12 - AIC:3307.1696240463752
ARIMA(0, 0, 0)x(1, 1, 1, 12)12 - AIC:2919.4716057228015
ARIMA(0, 0, 1)x(0, 0, 0, 12)12 - AIC:7362.860971522184
ARIMA(0, 0, 1)x(0, 0, 1, 12)12 - AIC:6251.06697954523
ARIMA(0, 0, 1)x(0, 1, 0, 12)12 - AIC:3486.433716914553
ARIMA(0, 0, 1)x(0, 1, 1, 12)12 - AIC:2721.5355906699215
ARIMA(0, 0, 1)x(1, 0, 0, 12)12 - AIC:3483.4088084300106
ARIMA(0, 0, 1)x(1, 0, 1, 12)12 - AIC:2728.867811416741
ARIMA(0, 0, 1)x(1, 1, 0, 12)12 - AIC:3148.131138940932
ARIMA(0, 0, 1)x(1, 1, 1, 12)12 - AIC:2721.1135173626863
ARIMA(0, 1, 0)x(0, 0, 0, 12)12 - AIC:2990.425721882768
ARIMA(0, 1, 0)x(0, 0, 1, 12)12 - AIC:2974.4573814607625
ARIM

In [17]:
predictions = []
dates = []


# looping through id
for idx,i in enumerate(ids):    

    mod = sm.tsa.statespace.SARIMAX(groups.loc[i], order=(1,1,1),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
    
    results = mod.fit()
    
    #Predicting the day after each individuals last recorded day
    pred = results.get_prediction(start=(pd.to_datetime(groups.loc[i].index[-1])+datetime.timedelta(days=1)), dynamic=False)
    
    predictions.append((pred.predicted_mean)[0])
    dates.append(pd.to_datetime(groups.loc[i].index[-1])+datetime.timedelta(days=1))
    print(i," " ,predictions[idx],"\n")





AS14.01   7.834514906130194 

AS14.02   7.504816304113891 

AS14.03   7.513481452890998 

AS14.05   6.5571364379582056 

AS14.06   7.113826074853779 

AS14.07   5.841057428214179 

AS14.08   6.67961378566821 

AS14.09   6.685952124511344 

AS14.12   5.400660917049814 

AS14.13   7.789603599128519 

AS14.14   6.862297311939507 

AS14.15   6.951073074038945 

AS14.16   7.205689324575409 

AS14.17   7.282160814862077 

AS14.19   6.897603694295951 

AS14.20   6.98484274673681 

AS14.23   8.168795538472565 

AS14.24   6.9270384431983665 

AS14.25   6.847464002491281 

AS14.26   6.801714400953187 

AS14.27   7.303066322992101 

AS14.28   6.815851405709267 

AS14.29   6.957449176939651 

AS14.30   7.813362478449303 

AS14.31   6.998431154448346 

AS14.32   7.199583055064071 

AS14.33   6.689765458109524 



In [18]:
prediction_vals = pd.DataFrame(
    {'Ids': ids,'Dates': dates[:27],'predictions': predictions[:27]
     })
prediction_vals

Unnamed: 0,Ids,Dates,predictions
0,AS14.01,2014-05-05,7.834515
1,AS14.02,2014-04-26,7.504816
2,AS14.03,2014-05-08,7.513481
3,AS14.05,2014-05-06,6.557136
4,AS14.06,2014-05-09,7.113826
5,AS14.07,2014-05-06,5.841057
6,AS14.08,2014-05-06,6.679614
7,AS14.09,2014-05-06,6.685952
8,AS14.12,2014-05-06,5.400661
9,AS14.13,2014-05-05,7.789604
