In [2]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller

In [9]:
# load price
file_path_price = r'C:\Users\Leo Hubmann\Desktop\thesis\data\price_data\bitcoin_2021_2024.csv'
df_price = pd.read_csv(file_path_price, parse_dates=['date'], low_memory=False).set_index('date')

btc = df_price.loc['2021-01-01':'2024-12-31'].copy()
daily_ret = np.log(btc['close']).diff().dropna()

In [10]:
# load sentiment
vader_path   = r'C:\Users\Leo Hubmann\Desktop\BachelorThesis_data\all_daily_vader_sentiment_v2.csv'
finbert_path = r'C:\Users\Leo Hubmann\Desktop\BachelorThesis_data\all_daily_finbert_sentiment_v4.csv'

df_v = pd.read_csv(vader_path,   parse_dates=['date']).set_index('date')
df_f = pd.read_csv(finbert_path, parse_dates=['date']).set_index('date')

In [11]:
df = pd.concat(
    {
        'btc_ret' : daily_ret,
        'vader'   : df_v['daily_vader_sentiment'],
        'finbert' : df_f['daily_finbert_score'],
    },
    axis=1
).dropna()

df['vader_z']     = (df['vader']   - df['vader'].mean())   / df['vader'].std(ddof=0)
df['finbert_z']   = (df['finbert'] - df['finbert'].mean()) / df['finbert'].std(ddof=0)
df['composite_z'] = df[['vader_z', 'finbert_z']].mean(axis=1)

In [None]:
print(df.info)

In [24]:
def adf_test(series_dict):
    rows = []
    for name, ser in series_dict.items():
        adf_stat, p, lags, n, crit, _ = adfuller(ser, autolag='AIC')
        rows.append([name,
                     round(adf_stat, 3),
                     round(p, 3),
                     round(crit['5%'], 3),
                     'Stationary' if p < 0.05 else 'Non-stationary'])
    return pd.DataFrame(rows,
                        columns=['Series', 'ADF stat', 'p-value', '5% crit', 'Conclusion'])

daily_res = adf_test({
    'BTC_log_returns' : df['btc_ret'],
    'VADER_z'         : df['vader_z'],
    'FinBERT_z'       : df['finbert_z'],
    'Composite_z'     : df['composite_z']
})

print("\n DAILY FREQUENCY ")
display(daily_res)


 DAILY FREQUENCY 


Unnamed: 0,Series,ADF stat,p-value,5% crit,Conclusion
0,BTC_log_returns,-39.478,0.0,-2.864,Stationary
1,VADER_z,-4.447,0.0,-2.864,Stationary
2,FinBERT_z,-4.595,0.0,-2.864,Stationary
3,Composite_z,-4.252,0.001,-2.864,Stationary


In [22]:
weekly = df.resample('W-MON').mean()
weekly_res = adf_test({
    'BTC_log_ret_W'  : weekly['btc_ret'],
    'VADER_z_W'      : weekly['vader_z'],
    'FinBERT_z_W'    : weekly['finbert_z'],
    'Composite_z_W'  : weekly['composite_z']
})

print("\n WEEKLY FREQUENCY")
display(weekly_res)


 WEEKLY FREQUENCY


Unnamed: 0,Series,ADF stat,p-value,5% crit,Conclusion
0,BTC_log_ret_W,-14.741,0.0,-2.875,Stationary
1,VADER_z_W,-4.993,0.0,-2.876,Stationary
2,FinBERT_z_W,-3.767,0.003,-2.876,Stationary
3,Composite_z_W,-3.905,0.002,-2.876,Stationary
