In [None]:
! pip install -r ../requirements.txt

In [1]:
# Write PSY bubble prediction algorithm in python

import numpy as np
from statsmodels.tsa.stattools import adfuller
from tqdm import tqdm
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from lppls import lppls, data_loader, lppls_cmaes
import sys
# sys.path.append('../')
# import risk_measures as rm

def PSY(y, window=None, IC=0, adflag=0):
    '''
    t = 100
    window = 10
    r2 = range(10,100)
    r1 = range(0,1), range(0,2), ..., range(0,90)
    y_final = [0:11], [1:12], ..., [90:100]
    '''
    t = len(y)

    if window is None:
        window = int(np.floor(t * (0.01 + 1.8 / np.sqrt(t))))

    bsadfs = np.empty(t)
    bsadfs[:] = np.nan

    for r2 in tqdm(range(window, t)):
        rwadft = np.empty(r2 - window + 1)
        rwadft[:] = -999
        for r1 in range(0, r2 - window + 1):
            rwadft[r1] = adfuller(y[r1:r2+1], maxlag=adflag, autolag=None)[0]

        bsadfs[r2] = np.max(rwadft)

    bsadf = np.hstack((np.zeros(window), bsadfs[window:t]))

    return bsadf

def forwardMDD(df, window=12):
    """
    fmdd is defined as the forward maximum drawdown
    (considering open prices)
    arg:
        df = dataframe with the 30min data
    output:
        dataframe with 6 hour fmdd added as a column
    """
    temp_list = []

    df = df[["close"]]
    for i in range(len(df)-window):
        forward_df = df.iloc[i:i+window]
        max_idx = forward_df.idxmax("index")
        max = forward_df.loc[max_idx].close.iloc[0]
        forward_df = forward_df.loc[max_idx[0]:]
        min = forward_df.min("index")[0]
        mdd = (max-min)/max
        temp_list.append(mdd)
    for i in range(window):
        temp_list.append(0)

    return temp_list  


In [19]:
price_data_df = pd.read_csv('data/30m_BTC_USD.csv')
# price_data_df.drop(columns=['Unnamed: 0'], inplace=True)
# Take first 100 rows
start, length = 5000, 1000
price_data_df = price_data_df.iloc[start:start+length]

basetime = 0.5 # 0.5 hours or 30 mins
timeperiod_list = ['3h', '6h', '12h', '1d', '3d', '7d']
for timeperiod in timeperiod_list:
    if f'PSY_{timeperiod}' not in price_data_df.columns:
        if timeperiod[-1] == 'h':
            window = int(int(timeperiod[:-1]) * 1/basetime)
        elif timeperiod[-1] == 'd':
            window = int(int(timeperiod[:-1]) * 1/basetime * 24)
        price_data_df[f'PSY_{timeperiod}'] = PSY(price_data_df['close'].values, window=window, IC=0, adflag=0)
        price_data_df[f'FMDD_{timeperiod}'] = forwardMDD(price_data_df, window=window)
        

# price_data_df.to_csv('data/30m_BTC_USD_PSY.csv', index=False)

100%|██████████| 994/994 [01:29<00:00, 11.09it/s]
100%|██████████| 988/988 [01:29<00:00, 11.04it/s]
100%|██████████| 976/976 [01:26<00:00, 11.22it/s]
100%|██████████| 952/952 [01:22<00:00, 11.60it/s]
100%|██████████| 856/856 [01:07<00:00, 12.60it/s]
100%|██████████| 664/664 [00:41<00:00, 15.82it/s]


In [20]:
psy95_list = list()
for timeperiod in timeperiod_list:
    psy95_list.append(np.percentile(price_data_df[f'PSY_{timeperiod}'], 95))
    price_data_df[f'PSY_{timeperiod}_95'] = price_data_df[f'PSY_{timeperiod}'].apply(lambda x: 1 if x >= psy95_list[-1] else 0)
psy95_list

[0.7847196232025657,
 0.6462355396757701,
 0.40928247370944737,
 0.3103970897066077,
 0.2567457626865657,
 0.04063190036417481]

In [22]:
for i, timeperiod in enumerate(timeperiod_list):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=price_data_df['timestamp'], y=price_data_df['close'], name='BTC Price'),
                secondary_y=False)
    fig.add_trace(go.Scatter(x=price_data_df['timestamp'], y=price_data_df[f'PSY_{timeperiod}_95'], name=f'PSY {timeperiod} 95% confidence'),
                secondary_y=True)
    # fig.add_trace(go.Scatter(x=price_data_df['timestamp'], y=[psy95_list[i] for _ in range(len(price_data_df['timestamp']))], name=f'95% confidence'),
    #             secondary_y=True)
    fig.add_trace(go.Scatter(x=price_data_df['timestamp'], y=price_data_df[f'FMDD_{timeperiod}'], name=f'FMDD'),
                secondary_y=True)
    # fig.add_trace(go.Scatter(x=price_data_df['timestamp'], y=price_data_df['PSY_int'], name='PSY Integer'),
    #               row=2, col=1)
    fig.show()