In [1]:
# lib manipulasi data
import pandas as pd
import numpy as np

# pustaka uji stasioneritas arch
from arch.unitroot import *
from arch.unitroot import ADF
from arch.unitroot import PhillipsPerron
from arch.unitroot import KPSS

In [2]:
# load csv
dataset = pd.read_csv("dataset/BTC-USD-norm.csv", parse_dates=["Date"])

In [3]:
# show metadata
print(dataset.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3408 entries, 0 to 3407
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    3408 non-null   datetime64[ns]
 1   Open    3408 non-null   float64       
 2   High    3408 non-null   float64       
 3   Low     3408 non-null   float64       
 4   Close   3408 non-null   float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 133.2 KB
None


- Cek Stasioneritas Data

In [4]:
adf = ADF(y=dataset["Close"], lags=60, trend="ct")
print(adf.summary().as_text())

   Augmented Dickey-Fuller Results   
Test Statistic                 -2.783
P-value                         0.203
Lags                               60
-------------------------------------

Trend: Constant and Linear Time Trend
Critical Values: -3.96 (1%), -3.41 (5%), -3.13 (10%)
Null Hypothesis: The process contains a unit root.
Alternative Hypothesis: The process is weakly stationary.


In [5]:
pp = PhillipsPerron(y=dataset["Close"], lags=60, trend="ct")
print(pp.summary().as_text())

     Phillips-Perron Test (Z-tau)    
Test Statistic                 -2.371
P-value                         0.395
Lags                               60
-------------------------------------

Trend: Constant and Linear Time Trend
Critical Values: -3.96 (1%), -3.41 (5%), -3.13 (10%)
Null Hypothesis: The process contains a unit root.
Alternative Hypothesis: The process is weakly stationary.


In [6]:
kpss = KPSS(y=dataset["Close"], lags=60, trend="ct")
print(kpss.summary().as_text())

    KPSS Stationarity Test Results   
Test Statistic                  0.241
P-value                         0.006
Lags                               60
-------------------------------------

Trend: Constant and Linear Time Trend
Critical Values: 0.22 (1%), 0.15 (5%), 0.12 (10%)
Null Hypothesis: The process is weakly stationary.
Alternative Hypothesis: The process contains a unit root.


- Cek autokorelasi data

In [7]:
# lib analysis statistic
import statsmodels.api as sm
import statsmodels.stats.stattools as tsa

# lib data preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [8]:
# split data train and test
train_data, test_data = train_test_split(dataset["Close"], train_size=0.80, test_size=0.20, shuffle=False)

In [9]:
# function for supervised learning
def create_dataset(look_back, dataset):
    
    # declare variable X and Y
    dataX = []
    dataY = []
    
    # for loop for create supervised learning
    for i in range(look_back, len(dataset)):
        dataX.append(dataset[i-look_back:i, 0])
        dataY.append(dataset[i, 0])
        
    # return value X and Y
    return np.array(dataX), np.array(dataY)

In [10]:
# 4. supervised learning
x_train, y_train = create_dataset(60, np.array(train_data).reshape(-1,1))
x_test, y_test = create_dataset(60, np.array(test_data).reshape(-1,1))

In [11]:
# results preprocessing of normalize data
data = pd.concat([
  pd.DataFrame(x_test),
  pd.DataFrame(y_test, columns=["Y"])
], axis=1)

In [12]:
# set x and y
x = np.array(data.drop(["Y"], axis=1))
y = np.array(data["Y"])

In [13]:
res = sm.OLS(y,x).fit()
print(res.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.999
Model:                            OLS   Adj. R-squared (uncentered):              0.999
Method:                 Least Squares   F-statistic:                          1.461e+04
Date:                Wed, 21 Aug 2024   Prob (F-statistic):                        0.00
Time:                        12:02:35   Log-Likelihood:                          1861.0
No. Observations:                 622   AIC:                                     -3602.
Df Residuals:                     562   BIC:                                     -3336.
Df Model:                          60                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [14]:
#perform Durbin-Watson test
tsa.durbin_watson(res.resid)

1.998572913134974