In [None]:
from enum import IntEnum
from glob import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
from timer import timer

from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

from bokeh.io import output_notebook, export_png
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource, DatetimeTickFormatter, NumeralTickFormatter, HoverTool #, LinearAxis, Range1d
from bokeh.plotting import figure, show, output_file, save

pd.options.display.max_rows = 100
output_notebook()

In [None]:
import jupyter_helpers
importlib.reload(jupyter_helpers)
from jupyter_helpers import bokeh_helpers, preprocess_data
importlib.reload(bokeh_helpers)
importlib.reload(preprocess_data)

# Globals
d = '/home/kapil/Desktop/polkadot-trading'
pair_addresses = set(pd.read_csv(f'{d}/data/stellaswap_liquid_pairs.csv').pair_address)
x = pd.read_csv(f'{d}/data/stellaswap_metadata_snapshot.csv')
x = x[x.pair_address.isin(pair_addresses)]
pair_d = dict(zip(x.pair_address, x.symbol))
token_d = {**dict(zip(x.token0_address, x.token0_symbol)), **dict(zip(x.token1_address, x.token1_symbol))}
token_d['0x818ec0A7Fe18Ff94269904fCED6AE3DaE6d6dC0b'] = 'USDC_multi'
token_d['0x8f552a71EFE5eeFc207Bf75485b356A0b3f01eC9'] = 'USDC_mad'
assert(len(set(token_d.values())) == len(token_d.values()))

token_to_decimals = {**dict(zip(x.token0_address, x.token0_decimals)), **dict(zip(x.token1_address, x.token1_decimals))}
pair_to_tokens = dict(zip(x.pair_address, tuple(zip(x.token0_address, x.token1_address))))
print('Token pairs:', list(pair_d.values()))
print('Tokens:', list(token_d.values()))
bh = bokeh_helpers.BokehHelper(pair_d, token_d)

def highlight(s):
    m = {
        DataRowType.SWAP_TXN: 'background-color: yellow',
        DataRowType.ON_UPDATE_TOKEN_PAIR_SNAPSHOT: 'background-color: #90ee90',
        DataRowType.END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT: 'background-color: lime',
        DataRowType.ON_UPDATE_TOKEN_SNAPSHOT: 'background-color: #89cff0',
        DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT: 'background-color: #00bfff',
    }
    return [m[s.row_type] for _ in range(len(s))]

class DataRowType(IntEnum):
    END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT = 1
    ON_UPDATE_TOKEN_PAIR_SNAPSHOT = 2
    END_OF_BLOCK_TOKEN_SNAPSHOT = 3
    ON_UPDATE_TOKEN_SNAPSHOT = 4
    SWAP_TXN = 5

In [None]:
x

In [None]:
files = sorted(glob(f'{d}/data/stellaswap_txn_history/end_of_block_token/stellaswap_data_1[8]*.feather'))
files[:2] + ['...'] + files[-2:]

In [None]:
%%time

df = pd.concat([pd.read_feather(f) for f in files]).reset_index(drop=True)
df['rate'] = df.reserve1 / df.reserve0
df['revrate'] = df.reserve0 / df.reserve1
df['block_timestamp'] = df['block_timestamp'] * 1000 # bokeh interprets epoch time in milliseconds

df = preprocess_data.compute_deltas_token(df)
df = df.groupby(['row_type', 'token_address'], dropna=False).apply(preprocess_data.add_exp_smooth_token)

# df = preprocess_data.compute_deltas_token_pair(df)
# df = df.groupby(['row_type', 'pair_address'], dropna=False).apply(preprocess_data.add_exp_smooth_token_pair)

# df = preprocess_data.augment_swap_rows(df, pair_to_tokens)

print("Memory usage:", df.memory_usage(index=True).sum() / 1e6, 'MB')
df

## Plotting

In [None]:
%%time
x = bh.plot_combined_token_pairs(df)
output_file(filename="figures/token_pairs.html", title="Token pairs: blocks 1,600,000 - 1,850,000")
save(x)

In [None]:
%%time
pa = '0x555B74dAFC4Ef3A5A1640041e3244460Dc7610d1'
dd = df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT) & (df.pair_address == pa)].reset_index(drop=True).dropna(axis=1, how='all')

x = bh.plot_token_pair(dd)
output_file(filename="figures/test.html", title="Test")
save(x)

In [None]:
%%time
x = bh.plot_combined_tokens(df)
# show(x)
output_file(filename="figures/tokens.html", title="Token values: blocks 1,700,000 - 1,850,000")
save(x)

In [None]:
t = '0xAcc15dC74880C9944775448304B263D191c6077F'
x = bh.plot_token(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t)].reset_index(drop=True), jump_bps_thresh=100)
show(x)

In [None]:
%%time
for pa, name in list(pair_d.items()):
    try:
        file_prefix = name.replace('/', '_')
        x = bh.plot_token_pair(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT) & (df.pair_address == pa)].reset_index(drop=True).dropna(axis=1, how='all'))
        export_png(x, filename=f'figures/pair_{file_prefix}.png')
    except:
        print(f'Skipping {pa} ({name}), likely no data')

In [None]:
%%time
for t, name in token_d.items():
    try:
        file_prefix = f'{name}_{t[:5]}'
        x = bh.plot_token(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t)].reset_index(drop=True), jump_bps_thresh=100)
        export_png(x, filename=f'figures/token_{file_prefix}.png')
    except:
        print(f'Skipping {t} ({name}), likely no data')

## Correlations

In [None]:
usdc_addr = '0x818ec0A7Fe18Ff94269904fCED6AE3DaE6d6dC0b'
usdc_col = 'usdc-multi_equiv_with_fees'
data = df[df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT][
    ['row_type', 'block_number', 'block_timestamp', 'token_address', usdc_col]] #, 'dai-multi_equiv_no_fees_bps_delta']]
data = data.rename(columns={usdc_col: 'usd_value'})
mult = {t: 10**(token_to_decimals[t] - token_to_decimals[usdc_addr]) for t in data['token_address'].unique()}
data['usd_value'] = pd.Series(zip(data['token_address'], data['usd_value'])).apply(lambda x: mult[x[0]] * x[1])
data

In [None]:
block_range_str = f'(blocks {data.block_number.min()} - {data.block_number.max()})'
token_values = data.pivot(index='block_number', columns='token_address', values='usd_value')
token_values = token_values.rename(columns={x: token_d[x] for x in token_values.columns}).reset_index().fillna(method='ffill')
token_values.columns.name = None
cols = [c for c in token_values.columns if c != 'block_number']
token_values
token_values.pct_change()

## Avoiding spurious token correlations
Virtually every token (except STELLA and ETH and axlATOM, which have direct STELLA/USDC and ETH/USDC and axlATOM/USDC pools) must go through WGLMR -> USDC_multi to calculate the value. If WGLMR goes up in value (based on its WGLMR -> USDC pool), then every other token goes up the same amount (creating this spurious correlation). To make this explicit:

Let's say r0 = WGLMR/USDC initial rate, s0 = xcDOT/WGLMR initial rate, t = s * r = derived xcDOT -> (WGLMR -> ) USDC rate
If r1 = r0 * c and s1 = s0 * d, then t1 = t0 * c * d.
pctdiff_r = (r1 - r0) / r0 = r0(c - 1) / r0 = c - 1 ->  c = 1 + pctdiff_r

There may be cases where STELLA, ETH, axlATOM go through WGLMR but we pretend this doesn't happen (by looking at the values with fees to punish extra jumps). We simply divide Then the remaining diffs are due to the token's rate to WGLMR changing e.g. the value of xcDOT now only changes if there is a swap between xcDOT/WGLMR (since xcDOT -> wGLMR -> USDC).

Honestly Idk how to deal w this. Gonna do some sketchy stuff to see if there are interesting results.

**This is garbage. I think artifically constraining to use the equiv columns I've calculated is a bad idea. Use CEX data to compute correlations if needed. **

In [None]:
p = token_values.pct_change()
adj_pctdiff = pd.DataFrame({k: p[k] for k in ['WGLMR', 'STELLA', 'ETH', 'axlATOM', 'BNB']})
for col in ['xcDOT', 'xcINTR', 'xcaUSD', 'FTM', 'WELL', 'MATIC', 'AVAX']:
    adj_pctdiff[col] = p[col] - p['WGLMR']
# adj_pctdiff['block_number'] = token_values['block_number']
adj_pctdiff

In [None]:
adj_pctdiff[adj_pctdiff.xcINTR.abs() > 1e-10]

In [None]:
token_values.pct_change().iloc[33979:33982].T

In [None]:
token_values.pct_change()

In [None]:
sns.set(rc = {'figure.figsize':(20,10)})

In [None]:
corr = adj_pctdiff.corr()
plot = sns.heatmap(corr, annot=True)
plt.title(f'Correlation matrix of StellaSwap token value deltas {block_range_str}')
plt.figtext(0.45, -0.03, f'Average absolute value correlation = {corr.abs().mean().mean():0.3f}', wrap=True, horizontalalignment='center', fontsize=12)
#plot.get_figure().savefig("figures/stellaswap_token_pair_raw_rate_correlation_heatmap.png", bbox_inches='tight')

In [None]:
corr = adj_p.corr()
plot = sns.heatmap(corr, annot=True)
plt.title(f'Correlation matrix of StellaSwap token value deltas {block_range_str}')
plt.figtext(0.45, -0.03, f'Average absolute value correlation = {corr.abs().mean().mean():0.3f}', wrap=True, horizontalalignment='center', fontsize=12)
#plot.get_figure().savefig("figures/stellaswap_token_pair_raw_rate_correlation_heatmap.png", bbox_inches='tight')

In [None]:
corr = adj_p.corr()
plot = sns.heatmap(corr, annot=True)
plt.title(f'Correlation matrix of StellaSwap token value deltas {block_range_str}')
plt.figtext(0.45, -0.03, f'Average absolute value correlation = {corr.abs().mean().mean():0.3f}', wrap=True, horizontalalignment='center', fontsize=12)
#plot.get_figure().savefig("figures/stellaswap_token_pair_raw_rate_correlation_heatmap.png", bbox_inches='tight')

In [None]:
# How do you  even bin this and avoid spurious correlations? Use WGLMR as the base token?
# Answer: Subtract by the WGLMR_delta?
# blocks_per_bin = 5000
for blocks_per_bin in [2, 3, 4, 5, 10, 15, 20, 25, 50, 100, 150, 300, 600, 1500, 7200]:
    bin_minute_length = blocks_per_bin / 5 # approximate because a block is produced approx each 12 s
    num_blocks = int(token_pair_rates.block_number.max() - token_pair_rates.block_number.min() + 1)
    num_bins = num_blocks // blocks_per_bin # 10 block bins ~ 2 minutes bin
    token_pair_binned_rates = token_pair_rates.groupby(pd.cut(token_pair_rates['block_number'], num_bins)).mean()

    corr = token_pair_binned_rates[cols].pct_change().corr()
    plot = sns.heatmap(corr, annot=True)
    plt.figtext(0.45, -0.03, f'Average absolute value correlation = {corr.abs().mean().mean():0.3f}', wrap=True, horizontalalignment='center', fontsize=12)
    plt.title(f'Correlation matrix of StellaSwap token pair rate ({blocks_per_bin} blocks, i.e. ~{bin_minute_length} min, per bin) deltas {block_range_str}')
    plot.get_figure().savefig(f'figures/stellaswap_token_pair_{blocks_per_bin}_bin_rate_correlation_heatmap.png', bbox_inches='tight')
    plt.clf()

In [None]:
token_pair_binned_rates

In [None]:
pa = '0xa927E1e1E044CA1D9fe1854585003477331fE2Af'
bh.plot_token_pair(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_PAIR_SNAPSHOT) & (df.pair_address == pa)].reset_index(drop=True).dropna(axis=1, how='all'))

In [None]:
t = '0xAcc15dC74880C9944775448304B263D191c6077F'
bh.plot_token(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t)], jump_bps_thresh=100, smoothing_level=5e-3)

t2 = '0xFfFFfFff1FcaCBd218EDc0EbA20Fc2308C778080'
bh.plot_token(df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == t2)], jump_bps_thresh=100, smoothing_level=5e-3)

In [None]:
dai_data = pd.DataFrame({ f'{token_name}': \
 df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == token_address)]['dai-multi_equiv_no_fees'].reset_index(drop=True) \
 for token_address, token_name in token_d.items()
}).dropna(axis=1, how='all')

smoothed_dai_data = pd.DataFrame({ f'{token_name}': \
 df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == token_address)]['smoothed_dai-multi_equiv_no_fees'].reset_index(drop=True) \
 for token_address, token_name in token_d.items()
}).dropna(axis=1, how='all')

dai_delta_data = pd.DataFrame({ f'{token_name}': \
 10_000 * df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == token_address)]['dai-multi_equiv_no_fees'].pct_change().reset_index(drop=True) \
 for token_address, token_name in token_d.items()
}).dropna(axis=1, how='all')

smoothed_dai_delta_data = pd.DataFrame({ f'{token_name}': \
 10_000 * df[(df.row_type == DataRowType.END_OF_BLOCK_TOKEN_SNAPSHOT) & (df.token_address == token_address)]['smoothed_dai-multi_equiv_no_fees'].pct_change().reset_index(drop=True) \
 for token_address, token_name in token_d.items()
}).dropna(axis=1, how='all')

In [None]:
sns.set(rc = {'figure.figsize':(15,8)})
#sns.heatmap(dai_data.corr(), annot=True)
#sns.heatmap(smoothed_dai_data.corr(), annot=True)
#sns.heatmap(smoothed_dai_delta_data.corr(), annot=True)

plot = sns.heatmap(dai_delta_data.corr(), annot=True)
plt.title('Correlation matrix of StellaSwap token DAI-equivalent-value deltas')
plot.get_figure().savefig("figures/stellaswap_token_correlation_heatmap.png") 