In [None]:
from tqdm import tqdm
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pmdarima.arima.utils import ndiffs

In [None]:
df_daily = pd.read_pickle("data_clean_daily.pkl")

In [None]:
def ADF_test(df):    
    df.set_index('date')

    # Augmented Dickey Fuller test: 
    # p-value < 0.05? --> time-series is stationary
    # p-value > 0.05? --> time-series is non-stationary
    result = adfuller(df['mood_score'])
    print('Augmented Dickey Fuller test for ', user)
    print('ADF Statistic: %f' % result[0])
    print('p-value: %f' % result[1])
    print()

In [None]:
df_arima = pd.DataFrame()
df_arima['id'] = df_daily['id'].copy()
df_arima['date'] = df_daily['time'].copy()
df_arima['mood_score'] = list(df_daily['mood']['mean'].copy())
len(df_arima)

In [None]:
df_arima

In [None]:
df_user1 = df_arima[df_arima['id']=='AS14.01']

In [None]:
def plot_user(df, title='Mood'):
    plt.figure(figsize=(16,6), dpi=100)
    plt.plot(df['date'], df['mood_score'], color='tab:red')
    plt.gca().set(title=title, xlabel='Date', ylabel='Mood score')
    plt.show()

In [None]:
plot_user(df_user1, 'Mood for user 1')

In [None]:
df_arima = df_arima[~df_arima['mood_score'].isna()]

In [None]:
ADF_test(df_user1)

In [None]:
def plot_diff(df):
    plt.rcParams.update({'figure.figsize':(9,7), 'figure.dpi':120})
    
    # Original Series
    fig, axes = plt.subplots(3, 2, sharex=True)
    axes[0, 0].plot(df['mood_score']); axes[0, 0].set_title('Original Series')
    plot_acf(df['mood_score'], ax=axes[0, 1], lags = len(df['mood_score'])-1)
                
    # 1st Differencing
    axes[1, 0].plot(df['mood_score'].diff()); axes[1, 0].set_title('1st Order Differencing')
    plot_acf(df['mood_score'].diff().dropna(), ax = axes[1, 1], lags= len(df['mood_score'])-2)
                
    # 2nd Differencing
    axes[2, 0].plot(df['mood_score'].diff().diff()); axes[2, 0].set_title('2nd Order Differencing')
    plot_acf(df['mood_score'].diff().diff().dropna(), ax=axes[2, 1], lags= len(df['mood_score'])-3)

    plt.show()

In [None]:
plot_diff(df_user1)

In [None]:
def stat_tests(df):
    ## Adf Test
    print("ADF: " +str(ndiffs(df['mood_score'], test='adf')) )
    # KPSS test
    print("KPSS: " + str( ndiffs(df['mood_score'], test='kpss')) )
    # PP test:
    print("PP: " + str( ndiffs(df['mood_score'], test='pp')) )

In [None]:
stat_tests(df_user1)

In [None]:
# PACF plot of 1st differenced series
plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})

fig, axes = plt.subplots(1, 2, sharex=True)
axes[0].plot(df_user1['mood_score'].diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim=(0,5))
plot_pacf(df_user1['mood_score'].diff().dropna(), ax=axes[1], lags=(len(df_user1['mood_score'])/2)-1)

plt.show()