In [None]:
from enum import IntEnum
from glob import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
from timer import timer

from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

from bokeh.io import output_notebook, export_png
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource, DatetimeTickFormatter, NumeralTickFormatter, HoverTool #, LinearAxis, Range1d
from bokeh.plotting import figure, show, output_file, save

pd.options.display.max_rows = 100
output_notebook()

In [None]:
import jupyter_helpers
importlib.reload(jupyter_helpers)
from jupyter_helpers import bokeh_helpers, preprocess_data
importlib.reload(bokeh_helpers)
importlib.reload(preprocess_data)

# Globals
d = '/home/kapil/Desktop/polkadot-trading'
pair_addresses = set(pd.read_csv(f'{d}/data/stellaswap_liquid_pairs.csv').pair_address)
x = pd.read_csv(f'{d}/data/stellaswap_metadata_snapshot.csv')
x = x[x.pair_address.isin(pair_addresses)]
pair_d = dict(zip(x.pair_address, x.symbol))
pair_d['0x555B74dAFC4Ef3A5A1640041e3244460Dc7610d1'] = 'USDC_multi/WGLMR'
pair_d['0x9bFcf685e641206115dadc0C9ab17181e1d4975c'] = 'USDC_mad/WGLMR'
token_d = {**dict(zip(x.token0_address, x.token0_symbol)), **dict(zip(x.token1_address, x.token1_symbol))}
pair_to_tokens = dict(zip(x.pair_address, tuple(zip(x.token0_address, x.token1_address))))
print('Token pairs:', list(pair_d.values()))
print('Tokens:', list(token_d.values()))
bh = bokeh_helpers.BokehHelper(pair_d, token_d)

def highlight(s):
    m = {
        DataRowType.SWAP_TXN: 'background-color: yellow',
        DataRowType.ON_UPDATE_TOKEN_PAIR_SNAPSHOT: 'background-color: #90ee90',
        DataRowType.END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT: 'background-color: lime',
        DataRowType.ON_UPDATE_TOKEN_SNAPSHOT: 'background-color: #89cff0',
        DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT: 'background-color: #00bfff',
    }
    return [m[s.row_type] for _ in range(len(s))]

class DataRowType(IntEnum):
    END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT = 1
    ON_UPDATE_TOKEN_PAIR_SNAPSHOT = 2
    END_OF_BLOCK_TOKEN_SNAPSHOT = 3
    ON_UPDATE_TOKEN_SNAPSHOT = 4
    SWAP_TXN = 5

In [None]:
files = sorted(glob(f'{d}/data/stellaswap_txn_history/end_of_block_token_pair/stellaswap_data_1[6-8]*.feather'))
files[:2] + ['...'] + files[-2:]

In [None]:
%%time

df = pd.concat([pd.read_feather(f) for f in files]).reset_index(drop=True)
df['rate'] = df.reserve1 / df.reserve0
df['revrate'] = df.reserve0 / df.reserve1
df['block_timestamp'] = df['block_timestamp'] * 1000 # bokeh interprets epoch time in milliseconds

# df = preprocess_data.compute_deltas_token(df)
# df = df.groupby(['row_type', 'token_address'], dropna=False).apply(preprocess_data.add_exp_smooth_token)

df = preprocess_data.compute_deltas_token_pair(df)
df = df.groupby(['row_type', 'pair_address'], dropna=False).apply(preprocess_data.add_exp_smooth_token_pair)
df = df.groupby(['row_type', 'pair_address'], dropna=False).apply(preprocess_data.add_detrended_rate)

# df = preprocess_data.augment_swap_rows(df, pair_to_tokens)

print("Memory usage:", df.memory_usage(index=True).sum() / 1e6, 'MB')
df

In [None]:
x = df[df['pair_address'] == '0x555B74dAFC4Ef3A5A1640041e3244460Dc7610d1']
plt.plot(x.block_number, x.detrended_rate)
plt.plot(x.block_number, x.rate)

In [None]:
x

In [None]:
%%time
x = bh.plot_combined_token_pairs(df, plot_normalized=True, plot_smoothed=False, plot_detrended=True)
output_file(filename="figures/token_pairs.html", title="Token pairs: blocks 1,600,000 - 1,900,000")
save(x)

In [None]:
%%time
pa = '0x555B74dAFC4Ef3A5A1640041e3244460Dc7610d1'
dd = df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT) & (df.pair_address == pa)].reset_index(drop=True).dropna(axis=1, how='all')

x = bh.plot_token_pair(dd)
output_file(filename="figures/test.html", title="Test")
save(x)

In [None]:
%%time
x = bh.plot_combined_tokens(df)
# show(x)
output_file(filename="figures/tokens.html", title="Token values: blocks 1,700,000 - 1,850,000")
save(x)

In [None]:
t = '0xAcc15dC74880C9944775448304B263D191c6077F'
x = bh.plot_token(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t)].reset_index(drop=True), jump_bps_thresh=100)
show(x)

In [None]:
%%time
for pa, name in list(pair_d.items()):
    try:
        file_prefix = name.replace('/', '_')
        x = bh.plot_token_pair(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT) & (df.pair_address == pa)].reset_index(drop=True).dropna(axis=1, how='all'))
        export_png(x, filename=f'figures/pair_{file_prefix}.png')
    except:
        print(f'Skipping {pa} ({name}), likely no data')

In [None]:
%%time
for t, name in token_d.items():
    try:
        file_prefix = f'{name}_{t[:5]}'
        x = bh.plot_token(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t)].reset_index(drop=True), jump_bps_thresh=100)
        export_png(x, filename=f'figures/token_{file_prefix}.png')
    except:
        print(f'Skipping {t} ({name}), likely no data')

In [None]:
# Correlation might be skewed because we don't fill in missing block_numbers, but I feel like it doesn't matter

data = df[df.row_type == DataRowType.END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT].dropna(axis=1, how='all')
block_range_str = f'(blocks {data.block_number.min()} - {data.block_number.max()})'

token_pair_rates = data.pivot(index='block_number', columns='pair_address', values='rate')
token_pair_rates = token_pair_rates.rename(columns={x: pair_d[x] for x in token_pair_rates.columns}).reset_index().fillna(method='ffill')
token_pair_rates.columns.name = None

token_pair_detrended_rates = data.pivot(index='block_number', columns='pair_address', values='detrended_rate')
token_pair_detrended_rates = token_pair_detrended_rates.rename(columns={x: pair_d[x] for x in token_pair_detrended_rates.columns}).reset_index().fillna(method='ffill')
token_pair_detrended_rates.columns.name = None

token_pair_smoothed_rates = data.pivot(index='block_number', columns='pair_address', values='smoothed_rate')
token_pair_smoothed_rates = token_pair_smoothed_rates.rename(columns={x: pair_d[x] for x in token_pair_smoothed_rates.columns}).reset_index().fillna(method='ffill')
token_pair_smoothed_rates.columns.name = None

cols = [c for c in token_pair_rates.columns if c != 'block_number']

In [None]:
sns.set(rc = {'figure.figsize':(30,15)})

In [None]:
corr = token_pair_rates[cols].pct_change().corr()
plot = sns.heatmap(corr, annot=True)
plt.title(f'Correlation matrix of StellaSwap token pair rate deltas {block_range_str}')
plt.figtext(0.45, -0.03, f'Average absolute value correlation = {corr.abs().mean().mean():0.3f}', wrap=True, horizontalalignment='center', fontsize=12)
# plot.get_figure().savefig("figures/stellaswap_token_pair_raw_rate_correlation_heatmap.png", bbox_inches='tight')

In [None]:
corr = token_pair_smoothed_rates[cols].pct_change().corr()
plot = sns.heatmap(corr, annot=True)
plt.title(f'Correlation matrix of StellaSwap token pair rate (exponentially smoothed, α=0.25) deltas {block_range_str}')
plt.figtext(0.45, -0.03, f'Average absolute value correlation = {corr.abs().mean().mean():0.3f}', wrap=True, horizontalalignment='center', fontsize=12)
plot.get_figure().savefig("figures/stellaswap_token_pair_smoothed_rate_correlation_heatmap.png", bbox_inches='tight') 

In [None]:
# blocks_per_bin = 5000
for blocks_per_bin in [2, 3, 4, 5, 10, 15, 20, 25, 50, 100, 150, 300, 600, 1500, 7200]:
    bin_minute_length = blocks_per_bin / 5 # approximate because a block is produced approx each 12 s
    num_blocks = int(token_pair_rates.block_number.max() - token_pair_rates.block_number.min() + 1)
    num_bins = num_blocks // blocks_per_bin # 10 block bins ~ 2 minutes bin
    token_pair_binned_rates = token_pair_rates.groupby(pd.cut(token_pair_rates['block_number'], num_bins)).mean()

    corr = token_pair_binned_rates[cols].pct_change().corr()
    plot = sns.heatmap(corr, annot=True)
    plt.figtext(0.45, -0.03, f'Average absolute value correlation = {corr.abs().mean().mean():0.3f}', wrap=True, horizontalalignment='center', fontsize=12)
    plt.title(f'Correlation matrix of StellaSwap token pair rate ({blocks_per_bin} blocks, i.e. ~{bin_minute_length} min, per bin) deltas {block_range_str}')
    plot.get_figure().savefig(f'figures/correlation_plots/stellaswap_token_pair_{blocks_per_bin}_bin_rate_correlation_heatmap.png', bbox_inches='tight')
    plt.clf()

In [None]:
# blocks_per_bin = 5000
for blocks_per_bin in [2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 50, 100, 150, 300, 500, \
                       600, 900, 1200, 1500, 3000, 6000, 7200]:
    filename = f'figures/correlation_plots/detrended/stellaswap_token_pair_{blocks_per_bin}_bin_rate_correlation_heatmap.png'
    if os.path.isfile(filename):
        print(f'{filename} exists. Skipping it')
        continue
    bin_minute_length = blocks_per_bin / 5 # approximate because a block is produced approx each 12 s
    num_blocks = int(token_pair_detrended_rates.block_number.max() - token_pair_detrended_rates.block_number.min() + 1)
    num_bins = num_blocks // blocks_per_bin # 10 block bins ~ 2 minutes bin
    token_pair_binned_rates = token_pair_detrended_rates.groupby(pd.cut(token_pair_detrended_rates['block_number'], num_bins)).mean()

    corr = token_pair_binned_rates[cols].corr() #pct_change().corr()
    plot = sns.heatmap(corr, annot=True)
    plt.figtext(0.45, -0.03, f'Average absolute value correlation = {corr.abs().mean().mean():0.3f}', wrap=True, horizontalalignment='center', fontsize=12)
    plt.title(f'Correlation matrix of StellaSwap token pair de-trended rate ({blocks_per_bin} blocks, i.e. ~{bin_minute_length} min, per bin) {block_range_str}')
    plot.get_figure().savefig(filename, bbox_inches='tight')
    plt.clf()

In [None]:
blocks_per_bin = 10
bin_minute_length = blocks_per_bin / 5 # approximate because a block is produced approx each 12 s
num_blocks = int(token_pair_detrended_rates.block_number.max() - token_pair_detrended_rates.block_number.min() + 1)
num_bins = num_blocks // blocks_per_bin # 10 block bins ~ 2 minutes bin
token_pair_binned_rates = token_pair_detrended_rates.groupby(
    pd.cut(token_pair_detrended_rates['block_number'], num_bins)).mean().fillna(method='ffill')

# corr = token_pair_binned_rates[cols].pct_change().corr()
token_pair_binned_rates
token_pair_binned_rates[['USDC_multi/WGLMR', 'USDC/BNB']]

In [None]:
token_pair_detrended_rates[token_pair_detrended_rates.block_number.between(1896621, 1896641)]

In [None]:
df.pair_address

In [None]:
df[(df.block_number.between(1896621, 1896641)) & (df.pair_address == '0x555B74dAFC4Ef3A5A1640041e3244460Dc7610d1')
  ].dropna(axis=1, how='all') #.sort_values(['pair_address', 'block_number'])

In [None]:
x = token_pair_binned_rates[['USDC_multi/WGLMR', 'USDC/BNB']].reset_index()
plt.plot(x.index, x['USDC_multi/WGLMR'].pct_change())
# plt.plot(x.index, x['USDC/BNB'].pct_change())

In [None]:
x['USDC/BNB'].min()

In [None]:
token_pair_detrended_rates

In [None]:
pa = '0xa927E1e1E044CA1D9fe1854585003477331fE2Af'
bh.plot_token_pair(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT) & (df.pair_address == pa)].reset_index(drop=True).dropna(axis=1, how='all'))

In [None]:
t = '0xAcc15dC74880C9944775448304B263D191c6077F'
bh.plot_token(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t)], jump_bps_thresh=100, smoothing_level=5e-3)

t2 = '0xFfFFfFff1FcaCBd218EDc0EbA20Fc2308C778080'
bh.plot_token(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t2)], jump_bps_thresh=100, smoothing_level=5e-3)

In [None]:
dai_data = pd.DataFrame({ f'{token_name}': \
 df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == token_address)]['dai-multi_equiv_no_fees'].reset_index(drop=True) \
 for token_address, token_name in token_d.items()
}).dropna(axis=1, how='all')

smoothed_dai_data = pd.DataFrame({ f'{token_name}': \
 df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == token_address)]['smoothed_dai-multi_equiv_no_fees'].reset_index(drop=True) \
 for token_address, token_name in token_d.items()
}).dropna(axis=1, how='all')

dai_delta_data = pd.DataFrame({ f'{token_name}': \
 10_000 * df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == token_address)]['dai-multi_equiv_no_fees'].pct_change().reset_index(drop=True) \
 for token_address, token_name in token_d.items()
}).dropna(axis=1, how='all')

smoothed_dai_delta_data = pd.DataFrame({ f'{token_name}': \
 10_000 * df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == token_address)]['smoothed_dai-multi_equiv_no_fees'].pct_change().reset_index(drop=True) \
 for token_address, token_name in token_d.items()
}).dropna(axis=1, how='all')

In [None]:
sns.set(rc = {'figure.figsize':(15,8)})
#sns.heatmap(dai_data.corr(), annot=True)
#sns.heatmap(smoothed_dai_data.corr(), annot=True)
#sns.heatmap(smoothed_dai_delta_data.corr(), annot=True)

plot = sns.heatmap(dai_delta_data.corr(), annot=True)
plt.title('Correlation matrix of StellaSwap token DAI-equivalent-value deltas')
plot.get_figure().savefig("figures/stellaswap_token_correlation_heatmap.png") 

In [None]:
t1 = '0xAcc15dC74880C9944775448304B263D191c6077F'
t2 = '0xFfFFfFff1FcaCBd218EDc0EbA20Fc2308C778080'
data1 = df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t1)]
data2 = df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t2)]
sns.lineplot(x=pd.to_datetime(data1.block_timestamp, unit='ms'), y=data1['dai-multi_equiv_no_fees'].pct_change(), alpha=0.75, label=f'{token_d[t1]}')
sns.lineplot(x=pd.to_datetime(data2.block_timestamp, unit='ms'), y=data2['dai-multi_equiv_no_fees'].pct_change(), alpha=0.75, label=f'{token_d[t2]}')

In [None]:
df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == '0x818ec0A7Fe18Ff94269904fCED6AE3DaE6d6dC0b')]['dai-multi_equiv_no_fees'].pct_change()

In [None]:
x = list(range(11))
y0 = x
y1 = [10 - i for i in x]
y2 = [abs(i - 5) for i in x]

# create three plots with one renderer each
s1 = figure(width=250, height=250, background_fill_color="#fafafa")
s1.circle(x, y0, size=12, color="#53777a", alpha=0.8)

s2 = figure(width=250, height=250, background_fill_color="#fafafa")
s2.triangle(x, y1, size=12, color="#c02942", alpha=0.8)

s3 = figure(width=250, height=250, background_fill_color="#fafafa")
s3.square(x, y2, size=12, color="#d95b43", alpha=0.8)


p = figure(
    title="Plot sizing example",
    width=350,
    height=250,
    x_axis_label="x",
    y_axis_label="y",
)
p.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")