In [19]:
import numpy as np
import pandas as pd
import cufflinks as cf
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot 
import plotly.express as px
from datetime import datetime,date

%matplotlib inline
init_notebook_mode(connected=True)
cf.set_config_file(sharing='private',theme='pearl',offline=True)
#print("Cufflinks Version : {}".format(cf.__version__))

df = pd.read_csv('./data/btc-usd-max.csv',index_col=0,parse_dates=True )
df = df.resample('D').first()
df['price'].fillna(method='ffill', inplace=True)
# to get the log price
# df['price'] = np.log(df['price'])
df.head()

Unnamed: 0_level_0,price,market_cap,total_volume
snapped_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-04-28 00:00:00+00:00,135.3,1500518000.0,0.0
2013-04-29 00:00:00+00:00,141.96,1575032000.0,0.0
2013-04-30 00:00:00+00:00,135.3,1501657000.0,0.0
2013-05-01 00:00:00+00:00,117.0,1298952000.0,0.0
2013-05-02 00:00:00+00:00,103.43,1148668000.0,0.0


In [5]:
# Ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")

In [15]:
from statsmodels.tsa.stattools import adfuller

def adf_test(series,title=''):
    """
    Pass in a time series and an optional title, returns an ADF report
    """
    print(f'Augmented Dickey-Fuller Test: {title}')
    result = adfuller(series.dropna(),autolag='AIC') # .dropna() handles differenced data
    
    labels = ['ADF test statistic','p-value','# lags used','# observations']
    out = pd.Series(result[0:4],index=labels)

    for key,val in result[4].items():
        out[f'critical value ({key})']=val
        
    print(out.to_string())          # .to_string() removes the line "dtype: float64"
    
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis")
        print("Reject the null hypothesis")
        print("Data has no unit root and is stationary")
    else:
        print("Weak evidence against the null hypothesis")
        print("Fail to reject the null hypothesis")
        print("Data has a unit root and is non-stationary")

In [22]:
adf_test(df['price'])

Augmented Dickey-Fuller Test: 
ADF test statistic        -1.631209
p-value                    0.466858
# lags used               29.000000
# observations          3404.000000
critical value (1%)       -3.432273
critical value (5%)       -2.862389
critical value (10%)      -2.567222
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and is non-stationary


In [23]:
# if i want to do it all manually I can do the following 
# a value of p <= 0.05 indicates stationary data so no good for forecasting!
dftest = adfuller(df['price'],autolag='AIC')
dfout = pd.Series(dftest[0:4],index=['ADF test statistic','p-value','# lags used','# observations'])

for key,val in dftest[4].items():
    dfout[f'critical value ({key})']=val
print(dfout)

ADF test statistic        -1.631209
p-value                    0.466858
# lags used               29.000000
# observations          3404.000000
critical value (1%)       -3.432273
critical value (5%)       -2.862389
critical value (10%)      -2.567222
dtype: float64
