In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import re
import math as ma

from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.stattools import adfuller

from pandas_profiling import ProfileReport
from numpy import log

In [2]:
def days_prior(df: pd.DataFrame, days: int, time_frame: int = 0) -> pd.DataFrame:

    epoch = len(df.index) - ma.floor(days)

    if time_frame == 0.0:

        return df[(df.index >= epoch)]

    else:
        
        end = epoch + ma.floor(time_frame)

        return df[(df.index >= epoch) & (df.index < end)]
        

def differencing(df: pd.DataFrame, columns: list, duration: int) -> pd.DataFrame:
    '''
    duration in days
    columns name where the first column name provided will be subtracted by the second column
    '''
    
    new_df = days_prior(df, duration)
    deviation_data = [new_df[columns[0]][i] - new_df[columns[1]][i] for i in new_df.index]

    return pd.DataFrame({'Date': new_df['Date'], 'Deviation': deviation_data})

In [3]:
df_ether = pd\
    .read_csv('../data/Ethereum.csv')\
    .drop(columns = ['Currency', '24h Open (USD)', '24h High (USD)', '24h Low (USD)'])\
    .rename(columns = {'Date': 'date', 'Closing Price (USD)': 'price'})\
    .set_index('date')
    
df_ether

Unnamed: 0_level_0,price
date,Unnamed: 1_level_1
2016-12-17,7.819352
2016-12-18,7.865732
2016-12-19,7.860455
2016-12-20,7.618576
2016-12-21,7.640694
...,...
2021-12-31,3710.231568
2022-01-01,3676.966786
2022-01-02,3766.822444
2022-01-03,3831.536416


In [4]:
# Augmented Dickey Fuller Test (ADF Test)
result = adfuller(df_ether)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

ADF Statistic: 0.365150
p-value: 0.980144
