In [1]:
import pandas as pd
import os

In [34]:
df = pd.read_csv("./complete-merged-df.csv", index_col=0, parse_dates=True)
df.head()

In [38]:
print("NaN values: ",len(df[df.isna().any(axis=1)]))

NaN values:  0


# Feature selection

In [40]:
from scipy.stats import linregress
import numpy as np

def correlation_test(v1,v2, graph=False):
    coef = np.corrcoef(v1, v2)
    linreg = linregress(v1, v2)
    print("Coef : ", coef)
    print("Linear Regression results : ")
    print(f"\tp = {round(linreg.pvalue,5)}")
    print(f"\tslope = {round(linreg.slope,5)}")
    print(f"\tintercept = {round(linreg.intercept,5)}")
    print(f"\tstd. err = {round(linreg.stderr, 5)}")
    print("-----------------------------")
    if graph:
        plt.scatter(v1, v2)
        plt.show()

correlation_test(df['n-transactions'],  df['SVI'])
correlation_test(df['close'],  df['cost-per-transaction'])
correlation_test(df['close'],  df['hash-rate'])
correlation_test(df['close'],  df['n-transactions'])
correlation_test(df['close'],  df['Gold price'])

# Unit Root Testing

In [None]:
from statsmodels.tsa.stattools import adfuller

def make_stationary(data: pd.Series, alpha: float = 0.05, max_diff_order: int = 10) -> dict:
    # Test to see if the time series is already stationary
    if adfuller(data)[1] < alpha:
        return {
            'differencing_order': 0,
            'time_series': np.array(data)
        }

    p_values = [] # A list to store P-Values
    # Test for differencing orders from 1 to max_diff_order (included)
    for i in range(1, max_diff_order + 1):
        result = adfuller(data.diff(i).fillna(data.mean())) # Perform ADF test
        p_values.append((i, result[1])) # Append P-value
        
    significant = [p for p in p_values if p[1] < alpha] # Keep only those where P-value is lower than significance level
    significant = sorted(significant, key=lambda x: x[0]) # Sort by the differencing order
    diff_order = significant[0][0] # Get the differencing order
    stationary_series = data.diff(diff_order).fillna(data.mean()) # Make the time series stationary
    
    return {
        'differencing_order': diff_order,
        'time_series': np.array(stationary_series)
    }