In [1]:
import configparser
import os
from joblib import dump, load
import json
from tqdm import tqdm
from helpers.helper_functions import *
from helpers.helper_classes import *
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels as sm
import numpy as np
import pmdarima as pm
from statsmodels.tsa.arima.model import ARIMA

pd.set_option('display.max_rows', 500)

# Read config.ini file
config = configparser.ConfigParser()
config.read('src/config.ini')
# os.chdir(config['PATH']['ROOT_DIR'])

# # Load data
df = pd.read_csv(config['PATH']['DATA_DIR'] + '/dataset_mood_smartphone.csv')
df.drop('Unnamed: 0', axis=1, inplace=True)

# time to datetime
df['time'] = pd.to_datetime(df['time'])


In [2]:
# TODO: 
# DONE: Forward fill valence and arousal
# DONE: Remove appCat.builtin negative values
# DONE: Remove appCat outliers
# Impute valence, arousal and mood
# Impute long term missing values with mean instead of ffill (for valence and arousal)
# Aggregate to daily mood
# Decide on aggregation method for each variable

In [3]:
# # Forward fill valence and arousal
# # Iterate over people
# for person in tqdm(df['id'].unique()):
#     # Forward fill valence and arousal
#     idx_arousal = np.logical_and(df['id'] == person, df['variable'] == 'circumplex.arousal')
#     df.loc[idx_arousal] = df.loc[idx_arousal].fillna(method='ffill')
df

Unnamed: 0,id,time,variable,value
0,AS14.01,2014-02-26 13:00:00.000,mood,6.000
1,AS14.01,2014-02-26 15:00:00.000,mood,6.000
2,AS14.01,2014-02-26 18:00:00.000,mood,6.000
3,AS14.01,2014-02-26 21:00:00.000,mood,7.000
4,AS14.01,2014-02-27 09:00:00.000,mood,6.000
...,...,...,...,...
376907,AS14.30,2014-04-11 07:51:16.948,appCat.weather,8.032
376908,AS14.30,2014-04-19 11:00:32.747,appCat.weather,3.008
376909,AS14.30,2014-04-26 10:19:07.434,appCat.weather,7.026
376910,AS14.30,2014-04-27 00:44:48.450,appCat.weather,23.033


## Extreme values appCat variables
There are many outliers in the appCat variables, this is not ideal for numerical stability. We will one hot encode these outliers per variable and remove the outlier from the original observation

In [4]:
#  We will one hot encode these outliers per variable and remove the outlier from the original observation
#  Moreover we will remove all negative values

all_vars = df['variable'].unique()
appVars = [var for var in all_vars if 'appCat' in var]
appVars

for var in appVars:
    df_var_cur = df[df['variable'] == var]
    # Iterate over observations

    # Get 95th percentile
    perc98 = np.percentile(df_var_cur['value'], 98)

    # Get all idx where value is smaller than 0
    idx = df_var_cur[df_var_cur['value'] < 0].index
    df.drop(idx, axis=0, inplace=True)

    # Get all idx where value is larger than 95th percentile
    idx_98 = df_var_cur[df_var_cur['value'] > perc98].index

    # Change variable name to var_outlier
    df.loc[idx_98, 'variable'] = var + '_outlier'
    df.loc[idx_98, 'value'] = 1    


In [5]:
def KF_params(arma_model):
    p, _, q = arma_model.order
    params = arma_model.params()
    
    # if p == 0 and q == 0:
    #     return None
    m = np.maximum(p, q+1)
    T = np.zeros((m, m))
    R = np.zeros(m)
    a_init = np.zeros(m)
    P_init = np.eye(m) * 10e7
    R[0] = 1
    Z = np.zeros(m)
    Z[0] = 1
    d = arma_model.params()['intercept'].astype(float)
    Q = np.array([params['sigma2']])

    for i in range(p):
        T[i, 0] = params["ar.L"+str(i+1)]
    for j in range(q):
        R[j+1] = params["ma.L"+str(j+1)]
    if m > 1:
        for k in range(m-1):
            T[k, k+1] = 1
    elif p == 0 and q == 0:
        T[0,0] = 1
    
    R = R.reshape(m,1)
    Z = Z.reshape(1,m)

    # Sum AR coefficients
    sum_AR = np.sum(arma_model.arparams())
    sum_AR2 = np.sum(arma_model.arparams()**2)
    sum_MA2 = np.sum(arma_model.maparams()**2)

    # Initial state and variance
    if p > 0:
        mu = params['intercept'] / (1 - sum_AR)
        a_init[0] = mu
        if p > 1:
            for i in range(p-1):
                a_init[i+1] = np.sum(arma_model.arparams()[i+1] * mu)
        P_init[0,0] = params['sigma2'] * (sum_MA2 + 1) / (1 - sum_AR2)

    return {'T': T, 'R': R, 'Z': Z, 'Q': Q, 'd': d, 'a_init': a_init, 'P_init': P_init}

KF_res_dict = {}

# Iterate over people
# for person in tqdm(df['id'].unique()):
for person in tqdm(['AS14.30']):
    # Dataframe for mood of current person
    idx_mood = np.logical_and(df['id'] == person , df['variable'] == 'mood')
    df_mood = df[idx_mood].copy()['value']
    model = pm.auto_arima(df_mood, suppress_warnings=True, seasonal=False, stepwise=True, d = 0, stationary = True)

    # # Extract the best (p, d, q) orders
    p, d, q = model.order
    params = model.params()
    # Get KF parameters
    KF_res_dict[person] = KF_params(model)




100%|██████████| 1/1 [00:00<00:00,  1.22it/s]


In [32]:

class KalmanFilter:
    """
    This class implements a Kalman filter. We have:

    1) The observation equation:
        y_t = Z_t * alpha_t + eps_t
        eps_t ~ N(0, H_t)

    2) The state update equation:
        alpha_{t+1} = T_t * alpha_t + R_t * eta_t
        eta_t ~ N (0, Q_t)

    3) Initialization
        alpha_1 ~ N(a1, P1)

        Stationary      -> a1 = mean(y_t),  P1 = var(y_t)
        Non-stationary  -> a1 = 0,          P1 = 10e7

    NOTE:
        - Z_t, H_t, R_t and Q_t are non-stochastic but can be time varying matrices.
    """

    def __init__(self, y, a_init, P_init, H, Q, R, d=0, c=0):
        self.y = np.array(y)
        self.n = len(y)
        self.a_init = np.array(a_init)
        self.P_init = np.array(P_init)

        # Make vector over time if not already
        self.H = self.as_vector_over_time(H)
        self.Q = self.as_vector_over_time(Q)
        self.R = self.as_vector_over_time(R)
        self.d = self.as_vector_over_time(d)
        self.c = self.as_vector_over_time(c)

        # Keep track of the procedures being ran
        self.smoother_ran = False
        self.filter_ran = False
        self.disturbance_smoother_ran = False

    def run_filter(self, Z, T):
        """
        Return a dictionary with:
            - v
            - F
            - K
            - a_pred (a_t)
            - a_filter (a_t|t)
            - P
        """
        result_dict = {key: "" for key in ["a_pred", "a_filter", "P", "v", "F", "K"]}

        # Initialization
        a_pred = np.array([np.zeros(shape=self.a_init.shape) for i in range(self.n + 1)])
        a_pred[0] = self.a_init

        a_filtered = [np.zeros(shape=self.a_init.shape) for i in range(self.n)]

        P = [np.zeros(shape=self.P_init.shape) for i in range(self.n + 1)]
        P[0] = self.P_init

        F = [None] * self.n
        K = [None] * self.n
        v = [np.array([0]).reshape(1, 1) for i in range(self.n)]

        Z = self.as_vector_over_time(Z)
        T = self.as_vector_over_time(T)

        # Loop as in slide 36 week 2
        for t in range(self.n):    
            # Prediction error
            v[t] = self.y[t] - Z[t] @ a_pred[t] - self.d[t]
            
            if np.isnan(self.y[t]):
                v[t] = np.zeros(shape=v[t - 1].shape)

            # Prediction variance
            F[t] = Z[t] @ P[t] @ Z[t].T + self.H[t]

            # print(f"F[{t}] = {F[t]}\n P[{t}] = {P[t]}\n Z[{t}] = {Z[t]}\n")
            PZF1 = P[t] @ Z[t].T @ np.linalg.inv(F[t])

            # Kalman Gain
            if np.isnan(self.y[t]):
                K[t] = np.zeros(shape=(T[t].shape[0], Z[t].shape[0]))
            else:
                K[t] = T[t] @ PZF1

            # Filtered and predicted a
            a_filtered[t] = a_pred[t] + PZF1 @ v[t]
            a_pred[t + 1] = T[t] @ a_filtered[t] + self.c[t] 

            # Update variance
            P[t + 1] = (
                T[t] @ P[t] @ T[t].T
                + self.R[t] @ self.Q[t] @ self.R[t].T
                - K[t] @ F[t] @ K[t].T
            )
        print(np.array(a_pred).shape)
        
        result_dict["a_pred"] = np.array(a_pred).reshape(self.n + 1, len(a_pred[0]))
        result_dict["a_filter"] = np.array(a_filtered).reshape(
            self.n, len(a_filtered[0])
        )
        result_dict["P"] = P
        result_dict["v"] = np.array(v).reshape(self.n, len(v[0]))
        result_dict["F"] = F
        result_dict["K"] = K

        # Save all matrices in class using self
        self.Z = Z
        self.T = T
        self.a_pred = result_dict["a_pred"]
        self.a_filter = result_dict["a_filter"]
        self.P = result_dict["P"]
        self.v = result_dict["v"]
        self.F = result_dict["F"]
        self.K = result_dict["K"]

        self.results_filter = result_dict
        self.filter_ran = True

        return result_dict
    
    

    def run_smoother(self, Z, T):
        """
        Return a dictionary with:
            - a_smooth
            - P_smooth (V in the book)
            - N
            - r
        """
        results_dict = {key: "" for key in ["a_smooth", "P_smooth", "N", "r"]}

        # Run filter if not already done
        if not self.filter_ran:
            self.run_filter(Z, T)
        res = self.results_filter

        Z = self.as_vector_over_time(Z)
        T = self.as_vector_over_time(T)

        # Read F from filter results
        # HARDCODED SIZE of v for now
        m = self.a_init.shape[0]
        F = res["F"].copy()
        v = res["v"].copy().reshape(self.n, 1, 1)  
        a_pred = res["a_pred"].reshape(self.n + 1, m, 1)

        # In the case of missing observations, we want F --> inf.
        # NOTE: we do not want this when using missing observations to `forecast`.
        # However: in that case we don't use the smoother/
        for t, y_t in enumerate(self.y):
            if np.isnan(y_t):
                F[t] = F[t] * 10e7

        # Initialize the smoothed values and the cumulants
        a_smooth = np.zeros(shape=(self.n, m, 1))
        P_smooth = np.zeros(shape=(self.n, m, m))
        r = np.zeros(shape=(self.n + 1, m, 1))
        N = np.zeros(shape=(self.n + 1, m, m))

        # Backwards recursive loop as in slide 45 week 2
        for t in range(self.n - 1, -1, -1):
            L = T[t] - res["K"][t] @ Z[t]
            r[t] = Z[t].T @ np.linalg.inv(F[t]) @ v[t] + L.T @ r[t + 1]
            N[t] = Z[t].T @ np.linalg.inv(F[t]) @ Z[t] + L.T @ N[t + 1] @ L

        # Forwards recursive loop as in slide 45 week 2
        for t in range(0, self.n):
            a_smooth[t] = a_pred[t] + res["P"][t] @ r[t]
            P_smooth[t] = res["P"][t] - res["P"][t].T @ N[t] @ res["P"][t]

        results_dict["a_smooth"] = np.array(a_smooth).reshape(self.n, len(a_smooth[0]))
        results_dict["P_smooth"] = P_smooth
        results_dict["N"] = N[1:]
        results_dict["r"] = r[1:]

        # Save all matrices in class using self
        self.a_smooth = results_dict["a_smooth"]
        self.P_smooth = results_dict["P_smooth"]
        self.N = results_dict["N"]
        self.r = results_dict["r"]

        self.results_smoother = results_dict
        self.smoother_ran = True

        return results_dict

    def run_disturbance_smoother(self):
        """This methods runs the disturbance smoothing recursions found on
        p. 96 of the DK book
        """
        if not self.smoother_ran:
            raise ValueError("Run the smoother first")
        if not self.filter_ran:
            raise ValueError("Run the filter first")

        results_dict = {
            key: ""
            for key in [
                "eps_smoothed",
                "eta_smoothed",
                "u",
                "D",
                "var_eps_smoothed",
                "var_eta_smoothed",
            ]
        }

        results_dict["N"] = self.N
        results_dict["r"] = self.r

        # Initialize empty arrays for the result arrays
        self.u = np.zeros(shape=(self.n, 1))
        self.eps_smoothed = np.zeros(shape=(self.n, 1))
        self.eta_smoothed = np.zeros(shape=(self.n, 1))
        self.D = [np.zeros(shape=self.F[0].shape) for _ in range(self.n)]
        self.var_eps_smoothed = np.zeros(shape=(self.n, 1))
        self.var_eta_smoothed = np.zeros(shape=(self.n, 1))

        for t in range(self.n):
            self.u[t] = np.linalg.inv(self.F[t]) @ self.v[t] - self.K[t].T @ self.r[t]
            self.D[t] = np.linalg.inv(self.F[t]) + self.K[t].T @ self.N[t] @ self.K[t]

            # Smoothed disturbances and variances
            self.eps_smoothed[t] = self.H[t] @ self.u[t]
            self.eta_smoothed[t] = self.Q[t] @ self.R[t].T @ self.r[t]
            self.var_eps_smoothed[t] = self.H[t] - self.H[t] @ self.D[t] @ self.H[t]
            self.var_eta_smoothed[t] = (
                self.Q[t] - self.Q[t] @ self.R[t].T @ self.N[t] @ self.R[t] @ self.Q[t]
            )

        # Save results
        results_dict["eps_smoothed"] = self.eps_smoothed
        results_dict["eta_smoothed"] = self.eta_smoothed
        results_dict["var_eps_smoothed"] = self.var_eps_smoothed
        results_dict["var_eta_smoothed"] = self.var_eta_smoothed
        results_dict["u"] = self.u
        results_dict["D"] = self.D

        self.results_disturbance_smoother = results_dict
        self.disturbance_smoother_ran = True

        return results_dict

    def run_simul(self, eps_hat):

        results_dict = {key: "" for key in ["eps_plus", "eta_plus", "y_plus", "A_plus"]}

        self.eps_plus = np.zeros(shape=(self.n, 1))
        self.eta_plus = np.zeros(shape=(self.n, 1))

        self.eps_hat_plus = np.zeros(shape=(self.n, 1))

        self.eps_tilde = np.zeros(shape=(self.n, 1))
        self.eta_tilde = np.zeros(shape=(self.n, 1))

        self.y_plus = np.zeros(shape=(self.n, 1))
        self.A_plus = np.zeros(shape=(self.n, 1))

        self.A_plus[0] = self.y[0]

        for t in range(self.n):
            self.eps_plus[t] = np.random.normal(0, np.sqrt(self.H[t]))
            self.eta_plus[t] = np.random.normal(0, np.sqrt(self.Q[t]))
            self.y_plus[t] = self.eps_plus[t] + self.A_plus[t]
            if t != self.n - 1:
                self.A_plus[t + 1] = self.A_plus[t] + self.eta_plus[t]

        results_dict["eps_plus"] = self.eps_plus
        results_dict["eta_plus"] = self.eta_plus
        results_dict["y_plus"] = self.y_plus
        results_dict["A_plus"] = self.A_plus

        return results_dict

    def run_simul_comp():
        results_dict = {key: "" for key in ["eps_plus", "eta_plus", "y_plus", "A_sim"]}

        return results_dict

    def as_vector_over_time(self, matrix):
        # If the 'matrix' is already a list of length n, this is not needed
        if type(matrix) == list and len(matrix) == self.n:
            return matrix

        # Return an error if the length is not good
        if type(matrix) == list and len(matrix) != self.n:
            raise ValueError("The vector of matrices needs to be of length n")

        if type(matrix) == np.ndarray and matrix.shape[0] == self.n:
            return matrix
        
        if type(matrix) == np.ndarray:
            return [matrix for i in range(self.n)]
        
        if (
            type(matrix) == int
            or type(matrix) == float
            or type(matrix) == np.float64
            or type(matrix) == np.int64
            or type(matrix) == np.int32
            or type(matrix) == np.float32
            or type(matrix) == np.int16
            or type(matrix) == np.float16
        ):
            return [np.array(matrix).reshape(1, 1) for i in range(self.n)]

        else:
            raise TypeError(
                f"matrix needs to be of type np.ndarray or list, type given: {type(matrix)}"
            )


In [33]:
# AS14.05
idx_mood = np.logical_and(df['id'] == "AS14.30" , df['variable'] == 'mood')
KF_1430 = KF_res_dict["AS14.30"]
df_mood = df[idx_mood].copy()
# sort by time
df_mood.sort_values('time', inplace=True)
# df_mood.values to floats
df_mood['value'] = df_mood['value'].astype(float)

KF = KalmanFilter(y=df_mood['value'].values,
                a_init=KF_1430['a_init'],
                P_init = KF_1430['P_init'],
                H = np.array([0]).reshape(1,1),
                Q = KF_1430['Q'],
                R = KF_1430['R'],
                d = KF_1430['d'])

res = KF.run_smoother(Z = KF_1430['Z'], T = KF_1430['T'])

res_final = (res['a_smooth'] + KF_1430['d']).flatten()
# DF with one column with the smoothed values, and one column with the true values
df_mood['smoothed'] = res_final
df_mood

(236, 1)


Unnamed: 0,id,time,variable,value,smoothed
376908,AS14.30,2014-03-20 09:00:00,mood,,8.0
376909,AS14.30,2014-03-20 12:00:00,mood,,8.0
4899,AS14.30,2014-03-20 15:00:00,mood,8.0,8.0
4900,AS14.30,2014-03-20 18:00:00,mood,7.0,7.0
4901,AS14.30,2014-03-20 21:00:00,mood,6.0,6.0
4902,AS14.30,2014-03-21 09:00:00,mood,7.0,7.0
4903,AS14.30,2014-03-21 12:00:00,mood,8.0,8.0
4904,AS14.30,2014-03-21 15:00:00,mood,7.0,7.0
4905,AS14.30,2014-03-21 18:00:00,mood,9.0,9.0
4906,AS14.30,2014-03-21 21:00:00,mood,8.0,8.0


In [20]:
Z = KF_1430['Z']
P = KF_1430['P_init']
H = np.array([0]).reshape(1,1)
Z @ P @ Z + H

array([[1.e+08]])

In [None]:
KF_res_dict['AS14.30']

{'T': array([[1.]]),
 'R': array([[1.]]),
 'Z': array([[1.]]),
 'Q': array([0.39793925]),
 'd': 7.790178569605351,
 'a_init': array([0.]),
 'P_init': array([[1.e+08, 0.e+00, 0.e+00],
        [0.e+00, 1.e+08, 0.e+00],
        [0.e+00, 0.e+00, 1.e+08]])}

In [19]:
df_mood['value'].values

array([8., 7., 6., 7., 8., 7., 9., 8., 8., 8., 7., 8., 7., 8., 8., 8., 7.,
       8., 8., 8., 7., 8., 8., 8., 8., 8., 8., 8., 8., 6., 8., 8., 4., 8.,
       8., 8., 7., 7., 7., 7., 9., 9., 8., 9., 8., 9., 8., 8., 8., 8., 7.,
       7., 7., 7., 8., 8., 8., 8., 8., 8., 8., 7., 8., 8., 6., 8., 8., 7.,
       8., 9., 8., 8., 8., 8., 8., 8., 7., 8., 8., 8., 8., 8., 8., 8., 8.,
       8., 8., 8., 7., 8., 7., 8., 7., 8., 8., 9., 7., 7., 8., 8., 8., 8.,
       9., 8., 8., 8., 9., 7., 7., 7., 9., 8., 8., 7., 7., 7., 8., 7., 7.,
       7., 9., 7., 8., 8., 7., 8., 7., 8., 8., 8., 8., 9., 9., 8., 8., 8.,
       7., 7., 7., 7., 7., 8., 8., 7., 8., 8., 8., 8., 7., 8., 8., 8., 8.,
       8., 7., 8., 8., 7., 8., 9., 8., 8., 7., 8., 8., 8., 7., 8., 8., 8.,
       8., 9., 8., 8., 8., 6., 8., 8., 8., 7., 8., 7., 8., 8., 8., 8., 8.,
       8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 7., 8., 8., 9.,
       8., 8., 8., 8., 8., 8., 8., 8., 7., 8., 8., 8., 7., 8., 8., 8., 8.,
       8., 8., 8.])

In [53]:
np.diag(10e7 * np.ones(m))

array([[1.e+08, 0.e+00, 0.e+00],
       [0.e+00, 1.e+08, 0.e+00],
       [0.e+00, 0.e+00, 1.e+08]])

In [48]:
# KF = KalmanFilter(y=df['Nile'], 
#                     a_init=np.zeros(np.maximum()).reshape(1,1), 
#                     P_init=np.array([10e7]).reshape(1,1),
#                     H=np.array([15099]).reshape(1,1), 
#                     Q = np.array([1469.1]).reshape(1,1), 
#                     R = np.array([1]).reshape(1,1))

# def KF_arma(p, q, df):
#     KF = KalmanFilter(y=df['Nile'],
#                     a_init=np.zeros(np.maximum(p, q+1)).reshape(np.maximum(p, q+1),1),
#                     P_init = np.diag(10e7 * np.ones(np.maximum(p, q+1))),
#                     H = np.array([0]).reshape(1,1),
#                     Q)
params['sigma2']

0.19812905785970927

In [49]:
# Get all variables
variables = df['variable'].unique()

# Iterate over variables
for variable in variables:
    # Get the data for this variable
    df_variable = df[df['variable'] == variable]

    # Get 95th percentile
    percentile_95 = np.percentile(df_variable['value'], 95)

    # Describe value column for variable
    print(f"Variable {variable}:")
    print(df_variable['value'].describe())

Variable mood:
count    5641.000000
mean        6.992555
std         1.032769
min         1.000000
25%         7.000000
50%         7.000000
75%         8.000000
max        10.000000
Name: value, dtype: float64
Variable circumplex.arousal:
count    5597.000000
mean       -0.098624
std         1.051868
min        -2.000000
25%        -1.000000
50%         0.000000
75%         1.000000
max         2.000000
Name: value, dtype: float64
Variable circumplex.valence:
count    5487.000000
mean        0.687808
std         0.671298
min        -2.000000
25%         0.000000
50%         1.000000
75%         1.000000
max         2.000000
Name: value, dtype: float64
Variable activity:
count    22965.000000
mean         0.115958
std          0.186946
min          0.000000
25%          0.000000
50%          0.021739
75%          0.158333
max          1.000000
Name: value, dtype: float64
Variable screen:
count    96578.000000
mean        75.335206
std        253.822497
min          0.035000
25%        

In [92]:
df.append(df_variable)
# print(df.loc[idx_arousal])

AttributeError: 'DataFrame' object has no attribute 'append'

In [30]:
idx_mood = np.logical_and(df['id'] == "AS14.30" , df['variable'] == 'mood')
df_mood = df[idx_mood].copy()
df_mood

# Time sections

# Get first date
first_date = df_mood['time'].min().date()

# Last date
last_date = df_mood['time'].max().date()

# Iterate over dates by day
for date in pd.date_range(start=first_date, end=last_date, freq='D'):
    # Get all rows for this date
    idx_date = df_mood['time'].dt.date == date.date()
    df_date = df_mood[idx_date].copy()

    if len(df_date) == 5:
        print(date, len(df_date), df_date)
        continue

    # check for observation between 9.00 and 12.00
    hour_sets = [[9,12], [12,15], [15,18], [18,21], [21,24]]

    for hour_set in hour_sets:
        idx_cur = np.logical_and(df_date['time'].dt.hour >= hour_set[0], df_date['time'].dt.hour < hour_set[1])
        if len(df_date[idx_cur]) == 0:
            print(f"Missing observation for {date} between {hour_set[0]} and {hour_set[1]}")
            # Create new row
            new_row = df_date.iloc[0].copy()
            new_row['time'] = date.replace(hour=hour_set[0])
            new_row['value'] = np.nan

            df = pd.concat([df, pd.DataFrame(new_row).T], ignore_index=True)
            df_date = pd.concat([df_date, pd.DataFrame(new_row).T], ignore_index=True)



Missing observation for 2014-03-20 00:00:00 between 9 and 12
Missing observation for 2014-03-20 00:00:00 between 12 and 15
2014-03-21 00:00:00 5            id                time variable  value
4902  AS14.30 2014-03-21 09:00:00     mood    7.0
4903  AS14.30 2014-03-21 12:00:00     mood    8.0
4904  AS14.30 2014-03-21 15:00:00     mood    7.0
4905  AS14.30 2014-03-21 18:00:00     mood    9.0
4906  AS14.30 2014-03-21 21:00:00     mood    8.0
2014-03-22 00:00:00 5            id                time variable  value
4907  AS14.30 2014-03-22 09:00:00     mood    8.0
4908  AS14.30 2014-03-22 12:00:00     mood    8.0
4909  AS14.30 2014-03-22 15:00:00     mood    7.0
4910  AS14.30 2014-03-22 18:00:00     mood    8.0
4911  AS14.30 2014-03-22 21:00:00     mood    7.0
2014-03-23 00:00:00 5            id                time variable  value
4912  AS14.30 2014-03-23 09:00:00     mood    8.0
4913  AS14.30 2014-03-23 12:00:00     mood    8.0
4914  AS14.30 2014-03-23 15:00:00     mood    8.0
4915  AS14.

In [82]:
df

Unnamed: 0,id,time,variable,value
0,AS14.01,2014-02-26 13:00:00,mood,6.0
1,AS14.01,2014-02-26 15:00:00,mood,6.0
2,AS14.01,2014-02-26 18:00:00,mood,6.0
3,AS14.01,2014-02-26 21:00:00,mood,7.0
4,AS14.01,2014-02-27 09:00:00,mood,6.0
...,...,...,...,...
376914,AS14.30,2014-04-14 15:00:00,mood,
376915,AS14.30,2014-04-20 12:00:00,mood,
376916,AS14.30,2014-04-25 15:00:00,mood,
376917,AS14.30,2014-05-05 18:00:00,mood,


22

In [114]:
np.diag(10e7 * np.ones(q+1)))

Unnamed: 0,id,time,variable,value
376903,AS14.30,2014-04-06 11:38:32.033,appCat.weather,4.117
376904,AS14.30,2014-04-07 18:13:31.111,appCat.weather,11.039
376905,AS14.30,2014-04-07 18:16:49.107,appCat.weather,60.829
376906,AS14.30,2014-04-07 18:21:04.197,appCat.weather,3.018
376907,AS14.30,2014-04-11 07:51:16.948,appCat.weather,8.032
376908,AS14.30,2014-04-19 11:00:32.747,appCat.weather,3.008
376909,AS14.30,2014-04-26 10:19:07.434,appCat.weather,7.026
376910,AS14.30,2014-04-27 00:44:48.450,appCat.weather,23.033
376911,AS14.32,2014-04-07 18:25:14.036,appCat.weather,22.431
376912,AS14.30,2014-03-20 09:00:00.000,mood,


Unnamed: 0,id,time,variable,value
0,AS14.01,2014-02-26 13:00:00,mood,6.0
1,AS14.01,2014-02-26 15:00:00,mood,6.0
2,AS14.01,2014-02-26 18:00:00,mood,6.0
3,AS14.01,2014-02-26 21:00:00,mood,7.0
4,AS14.01,2014-02-27 09:00:00,mood,6.0
...,...,...,...,...
376918,AS14.30,2014-04-14 15:00:00,mood,
376919,AS14.30,2014-04-20 12:00:00,mood,
376920,AS14.30,2014-04-25 15:00:00,mood,
376921,AS14.30,2014-05-05 18:00:00,mood,


In [29]:
model.params()

intercept    2.938124
ar.L1       -0.054876
ar.L2        0.630987
ma.L1        0.443589
ma.L2       -0.447554
sigma2       0.198129
dtype: float64