In [None]:
import pandas as pd
import powerlaw
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from matplotlib.dates import DateFormatter
from matplotlib.patches import Patch
from matplotlib.ticker import ScalarFormatter
from scipy.stats import spearmanr
import numpy as np
import seaborn as sns
import pickle

In [None]:
TOKENS = ['sUSD','FRAX','EURS','XCHF','XAUt','FEI','GUSD','BUSD']
FIRST_BLOCKS = [5767935,11465581,5835474,6622987,9339031,12168368,6302486,8523552]
LAST_BLOCK = 14497033
colors = ['#E9B872', '#90A959','#A63D40','#6494AA','#1E5085','#433061','#565656','#151515']

In [None]:
# estimates best alpha for power-law fit
for TOKEN in TOKENS:
    velocity = pd.read_csv('{}_microVelocities_last.csv'.format(TOKEN))
    velocity = velocity.set_index('timestamp')
    velocity = velocity.drop(['total'],axis=1)
    velocity = pd.DataFrame(velocity.T)
    DATES_ = velocity.columns.values.tolist()
    velocity = velocity.reset_index()
    alpha = {}
    xmin = {}
    test = {}
    for DATE in DATES_:
        v = velocity[['index',DATE]]
        v = v.sort_values(by=[DATE],ascending=False)
        v = v.loc[v[DATE] > 0]
        v[DATE] = v[DATE] / v[DATE].sum()
        d1 = v[DATE].to_numpy()
        fit = powerlaw.Fit(d1)
        alpha[DATE] = fit.power_law.alpha
        xmin[DATE] = fit.power_law.xmin

       
    df = pd.DataFrame.from_dict(alpha, orient='index',columns=['alpha']).reset_index()
    df = df.rename(columns={'index':'timestamp'})
    df.to_csv('{}_alpha.csv'.format(TOKEN), index=False)
    df = pd.DataFrame.from_dict(xmin, orient='index',columns=['alpha']).reset_index()
    df = df.rename(columns={'index':'timestamp'})
    df.to_csv('{}_xmin.csv'.format(TOKEN), index=False)
    f = open("{}_distribution_test.pkl".format(TOKEN),"wb")
    pickle.dump(test,f)
    f.close()
    

In [None]:
# log-likelihood ratio test for power-law distribution and exponential distribution
for TOKEN in TOKENS:
    velocity = pd.read_csv('{}_microVelocities_last.csv'.format(TOKEN))
    velocity = velocity.set_index('timestamp')
    velocity = velocity.drop(['total'],axis=1)
    velocity = pd.DataFrame(velocity.T)
    DATES_ = velocity.columns.values.tolist()
    velocity = velocity.reset_index()
    R_exp = {}
    p_exp = {}
    for DATE in DATES_:
        v = velocity[['index',DATE]]
        v = v.sort_values(by=[DATE],ascending=False)
        v = v.loc[v[DATE] > 0]
        v[DATE] = v[DATE] / v[DATE].sum()
        d1 = v[DATE].to_numpy()
        fit = powerlaw.Fit(d1)
        R, p = fit.distribution_compare('power_law', 'exponential')
        R_exp[DATE] = R
        p_exp[DATE] = p

    df1 = pd.DataFrame.from_dict(R_exp, orient='index',columns=['R-value']).reset_index()
    df1 = df1.rename(columns={'index':'timestamp'})
    df2 = pd.DataFrame.from_dict(p_exp, orient='index',columns=['p-value']).reset_index()
    df2 = df2.rename(columns={'index':'timestamp'})
    df1 = df1.merge(df2, on=['timestamp'], how='left')
    df1.to_csv('{}_loglikelihood_ratio.csv'.format(TOKEN), index=False)

In [None]:
a = pd.read_csv('{}.csv'.format(TOKEN))
a.alpha.describe()

In [None]:
# calculates CCDF for all tokens on the given DATES
DATES = ['2019-01-01','2020-01-01','2021-01-01','2022-01-01']
dct1 = {}
for TOKEN in TOKENS:
    dct2 = {}
    velocity = pd.read_csv('{}_microVelocities_last.csv'.format(TOKEN))
    velocity = velocity.set_index('timestamp')
    velocity = velocity.drop(['total'],axis=1)
    velocity = pd.DataFrame(velocity.T)
    DATES_ = velocity.columns.values.tolist()
    velocity = velocity.reset_index()
    print('done')
    for DATE in DATES:
        if DATE in DATES_:
            v = velocity[['index',DATE]]
            v = v.sort_values(by=[DATE],ascending=False)
            v = v.loc[v[DATE] > 0]
            v[DATE] = v[DATE] / v[DATE].sum()
            d1 = v[DATE].to_numpy()
            dct2[DATE] = d1
    dct1[TOKEN] = dct2
    
f = open("TOKENS_ccdf.pkl","wb")
pickle.dump(dct1,f)
f.close()     

In [None]:
dct = pd.read_pickle('TOKENS_ccdf.pkl')
date = ['2019-01-01','2020-01-01','2021-01-01','2022-01-01']
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(12,3.3))

# powerlaw.plot_ccdf(data=dct['BUSD'][date[0]],color=colors[0],ax=axes[0])
powerlaw.plot_ccdf(data=dct['GUSD'][date[0]],color=colors[1],ax=axes[0])
powerlaw.plot_ccdf(data=dct['EURS'][date[0]],color=colors[2],ax=axes[0])
powerlaw.plot_ccdf(data=dct['XCHF'][date[0]],color=colors[3],ax=axes[0])
# powerlaw.plot_ccdf(data=dct['XAUt'][date[0]],color=colors[4],ax=axes[0])
powerlaw.plot_ccdf(data=dct['sUSD'][date[0]],color=colors[5],ax=axes[0])
# powerlaw.plot_ccdf(data=dct['FRAX'][date[0]],color=colors[6],ax=axes[0])
# powerlaw.plot_ccdf(data=dct['FEI'][date[0]],color=colors[7],ax=axes[0])
powerlaw.plot_ccdf(data=ran,color='darkorange',ax=axes[0])

axes[0].set_title(date[0], weight='bold')
axes[0].tick_params(axis='both', which='minor', bottom=False, left=False)
axes[0].set_xlabel(r'$V_i \,\,/\, V$', size=16)
axes[0].set_ylabel('CCDF',weight='bold')

powerlaw.plot_ccdf(data=dct['BUSD'][date[1]],color=colors[0],ax=axes[1])
powerlaw.plot_ccdf(data=dct['GUSD'][date[1]],color=colors[1],ax=axes[1])
powerlaw.plot_ccdf(data=dct['EURS'][date[1]],color=colors[2],ax=axes[1])
powerlaw.plot_ccdf(data=dct['XCHF'][date[1]],color=colors[3],ax=axes[1])
#powerlaw.plot_ccdf(data=dct['XAUt'][date[1]],color=colors[4],ax=axes[1])
powerlaw.plot_ccdf(data=dct['sUSD'][date[1]],color=colors[5],ax=axes[1])
#powerlaw.plot_ccdf(data=dct['FRAX'][date[1]],color=colors[6],ax=axes[1])
#powerlaw.plot_ccdf(data=dct['FEI'][date[1]],color=colors[7],ax=axes[1])

axes[1].set_title(date[1], weight='bold')
axes[1].tick_params(axis='both', which='minor', bottom=False, left=False)
axes[1].set_xlabel(r'$V_i \,\,/\, V$', size=16)
axes[1].set_ylabel('CCDF',weight='bold')

powerlaw.plot_ccdf(data=dct['BUSD'][date[2]],color=colors[0],ax=axes[2])
powerlaw.plot_ccdf(data=dct['GUSD'][date[2]],color=colors[1],ax=axes[2])
powerlaw.plot_ccdf(data=dct['EURS'][date[2]],color=colors[2],ax=axes[2])
powerlaw.plot_ccdf(data=dct['XCHF'][date[2]],color=colors[3],ax=axes[2])
powerlaw.plot_ccdf(data=dct['XAUt'][date[2]],color=colors[4],ax=axes[2])
powerlaw.plot_ccdf(data=dct['sUSD'][date[2]],color=colors[5],ax=axes[2])
powerlaw.plot_ccdf(data=dct['FRAX'][date[2]],color=colors[6],ax=axes[2])
#powerlaw.plot_ccdf(data=dct['FEI'][date[2]],color=colors[7],ax=axes[2])

axes[2].set_title(date[2], weight='bold')
axes[2].tick_params(axis='both', which='minor', bottom=False, left=False)
axes[2].set_xlabel(r'$V_i \,\,/\, V$', size=16)
axes[2].set_ylabel('CCDF',weight='bold')

powerlaw.plot_ccdf(data=dct['BUSD'][date[3]],color=colors[0],ax=axes[3])
powerlaw.plot_ccdf(data=dct['GUSD'][date[3]],color=colors[1],ax=axes[3])
powerlaw.plot_ccdf(data=dct['EURS'][date[3]],color=colors[2],ax=axes[3])
powerlaw.plot_ccdf(data=dct['XCHF'][date[3]],color=colors[3],ax=axes[3])
powerlaw.plot_ccdf(data=dct['XAUt'][date[3]],color=colors[4],ax=axes[3])
powerlaw.plot_ccdf(data=dct['sUSD'][date[3]],color=colors[5],ax=axes[3])
powerlaw.plot_ccdf(data=dct['FRAX'][date[3]],color=colors[6],ax=axes[3])
powerlaw.plot_ccdf(data=dct['FEI'][date[3]],color=colors[7],ax=axes[3])
  
axes[3].set_title(date[3], weight='bold')
axes[3].tick_params(axis='both', which='minor', bottom=False, left=False)
axes[3].legend(['BUSD','GUSD','EURS','XCHF','XAUt','sUSD','FRAX','FEI'],loc='lower left')
axes[3].set_xlabel(r'$V_i \,\,/\, V$', size=16)
axes[3].set_ylabel('CCDF',weight='bold')

fig.tight_layout()
plt.savefig('image_ccdf.png')
plt.show()

In [None]:
# calculates Spearman rank correlation between microVelocity and balance of an agent
for TOKEN in TOKENS:
    balances = pd.read_csv('{}_balances_daily.csv'.format(TOKEN))
    balances = balances.set_index('timestamp')
    velocity = pd.read_csv('{}_microVelocities_last.csv'.format(TOKEN))
    velocity = velocity.set_index('timestamp')
    velocity = velocity.drop(['total'],axis=1)

    balances = pd.DataFrame(balances.T)
    velocity = pd.DataFrame(velocity.T)
    dates = balances.columns.values.tolist()
    balances = balances.reset_index()
    velocity = velocity.reset_index()

    cor = {}
    for date in dates:
        b = balances[['index',date]]
        b = b.sort_values(by=[date],ascending=False)
        b = b.loc[b[date] > 0]
        v = velocity[['index',date]]
        v = v.sort_values(by=[date],ascending=False)
        v = v.loc[v[date] > 0]
        m = b.merge(v, on=['index'], how='left').dropna()
        df = m[['{}_x'.format(date),'{}_y'.format(date)]]
        cor[date] = spearmanr(df).correlation
    df = pd.DataFrame.from_dict(cor, orient='index',columns=[TOKEN]).reset_index()
    df = df.rename(columns={'index':'timestamp'})
    df.to_csv('{}_spearman.csv'.format(TOKEN), index=False)
    

In [None]:
TOKENS = ['FRAX','BUSD', 'GUSD', 'EURS', 'XCHF', 'XAUt','FEI']
df1 = pd.read_csv('sUSD_spearman.csv')
for TOKEN in TOKENS:
    df2 = pd.read_csv('{}_spearman.csv'.format(TOKEN))
    df1 = df1.merge(df2,on=['timestamp'], how='left')
df1    

In [None]:
spear = pd.read_csv('spearman.csv', parse_dates=['timestamp'])
ax = spear.plot(x='timestamp', y=['sUSD','EURS','GUSD','XCHF','BUSD','XAUt','FRAX','FEI'],
        kind='line',
        figsize=(13.4,3),
        color=[colors[5],colors[2],colors[1],colors[3],colors[0],colors[4],colors[6],colors[7]],
        rot=0,
        label=['sUSD','EURS','GUSD','XCHF','BUSD','XAUt','FRAX','FEI'],
        lw=1.5,
        style=['-','-','-','-','-','-','-','-'],
        x_compat=True)     

ax.set_xlabel('')
ax.set_ylabel(r'$\rho(M_i,V_i)$', size=16)
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1,13,6)))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) 
ax.yaxis.set_major_locator(plt.MaxNLocator(5))
ax.legend(['sUSD', 'EURS', 'GUSD', 'XCHF', 'BUSD', 'XAUt', 'FRAX', 'FEI'], loc='center right',bbox_to_anchor=(1.13, 0.5))

for tick in ax.xaxis.get_majorticklabels():
    tick.set_horizontalalignment("center") 
plt.tick_params(
    axis='both',          # changes apply to the x-axis
    which='major',      # both major and minor ticks are affected
    bottom=True,      # ticks along the bottom edge are off
    top=False,
    left=True)         # ticks along the top edge are off
plt.savefig('image_spearman_over_time.png')
plt.show()           

In [None]:
spear

In [None]:
# Spearman rank correlation scatter plot data for each token
DATES = ['2019-01-01','2020-01-01','2021-01-01','2022-01-01']

for TOKEN in TOKENS:
    dct = {}
    for DATE in DATES:
        balances = pd.read_csv('{}_balances_daily.csv'.format(TOKEN))
        balances = balances.set_index('timestamp')
        velocity = pd.read_csv('{}_microVelocities_last.csv'.format(TOKEN))
        velocity = velocity.set_index('timestamp')
        velocity = velocity.drop(['total'],axis=1)

        balances = pd.DataFrame(balances.T)
        velocity = pd.DataFrame(velocity.T)
        DATES_ = balances.columns.values.tolist()
        balances = balances.reset_index()
        velocity = velocity.reset_index()
        if DATE in DATES_:
            b = balances[['index',DATE]]
            b = b.sort_values(by=[DATE],ascending=False)
            b = b.loc[b[DATE] > 0]
            b[DATE] = b[DATE] / b[DATE].sum()
            v = velocity[['index',DATE]]
            v = v.sort_values(by=[DATE],ascending=False)
            v = v.loc[v[DATE] > 0]
            v[DATE] = v[DATE] / v[DATE].sum()
            m = b.merge(v, on=['index'], how='left').dropna()
            m = m.rename(columns={'{}_x'.format(DATE):'M_{}'.format(TOKEN),'{}_y'.format(DATE):'V_{}'.format(TOKEN)})
            M = m['M_{}'.format(TOKEN)].to_list()
            V = m['V_{}'.format(TOKEN)].to_list()
            dct[DATE] = [M,V]
        else:
            dct[DATE] = [[],[]]
            
    f = open("{}_spearman_snapshots.pkl".format(TOKEN),"wb")
    pickle.dump(dct,f)
    f.close() 
    print("{}_spearman_snapshots.pkl: success".format(TOKEN))

In [None]:
plt.rcParams['mathtext.fontset'] = 'stix'
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(13.4,3))

susd = pd.read_pickle('sUSD_spearman_snapshots.pkl')
eurs = pd.read_pickle('EURS_spearman_snapshots.pkl')
gusd = pd.read_pickle('GUSD_spearman_snapshots.pkl')
xchf = pd.read_pickle('XCHF_spearman_snapshots.pkl')
busd = pd.read_pickle('BUSD_spearman_snapshots.pkl')
xaut = pd.read_pickle('XAUt_spearman_snapshots.pkl')
frax = pd.read_pickle('FRAX_spearman_snapshots.pkl')
fei = pd.read_pickle('FEI_spearman_snapshots.pkl')

axes[0].scatter(susd['2019-01-01'][0], susd['2019-01-01'][1], s=40, edgecolor=colors[5], facecolor='None', marker="o", label='sUSD')
axes[0].scatter(eurs['2019-01-01'][0], eurs['2019-01-01'][1], s=40, edgecolor=colors[2], facecolor='None', marker="s", label='EURS')
axes[0].scatter(gusd['2019-01-01'][0], gusd['2019-01-01'][1], s=40, edgecolor=colors[1], facecolor='None', marker='^', label='GUSD')
axes[0].scatter(xchf['2019-01-01'][0], xchf['2019-01-01'][1], s=40, edgecolor=colors[3], facecolor='None', marker="1", label='XCHF')
axes[0].scatter(busd['2019-01-01'][0], busd['2019-01-01'][1], s=40, edgecolor=colors[0], facecolor='None', marker="+", label='BUSD')
axes[0].scatter(xaut['2019-01-01'][0], xaut['2019-01-01'][1], s=40, edgecolor=colors[4], facecolor='None', marker="x", label='XAUt')
axes[0].scatter(frax['2019-01-01'][0], frax['2019-01-01'][1], s=40, edgecolor=colors[6], facecolor='None', marker="D", label='FRAX')
axes[0].scatter(fei['2019-01-01'][0], fei['2019-01-01'][1], s=40, edgecolor=colors[7], facecolor='None', marker="*", label='FEI')
axes[0].axline([0, 0], [1, 1], c='darkorange', lw=2.5, ls='--')
axes[0].set_yscale('log')
axes[0].set_xscale('log')
axes[0].set_ylim(1 * 10**(-21),100)
axes[0].set_xlim(1 * 10**(-21),100)
axes[0].set_title('2019-01-01', weight='bold')
axes[0].set_yticklabels([])
axes[0].set_xticklabels([])
axes[0].set_xlabel(r'$M_i\,\,/\,M$', size=16)
axes[0].set_ylabel(r'$V_i\,\,/\,V$', size=16)
axes[0].tick_params(left=False)
axes[0].tick_params(axis='both', which='major', bottom=False, left=False)

axes[1].scatter(susd['2020-01-01'][0], susd['2020-01-01'][1], s=40, edgecolor=colors[5], facecolor='None', marker="o", label='sUSD')
axes[1].scatter(eurs['2020-01-01'][0], eurs['2020-01-01'][1], s=40, edgecolor=colors[2], facecolor='None', marker="s", label='EURS')
axes[1].scatter(gusd['2020-01-01'][0], gusd['2020-01-01'][1], s=40, edgecolor=colors[1], facecolor='None', marker='^', label='GUSD')
axes[1].scatter(xchf['2020-01-01'][0], xchf['2020-01-01'][1], s=40, edgecolor=colors[3], facecolor='None', marker="1", label='XCHF')
axes[1].scatter(busd['2020-01-01'][0], busd['2020-01-01'][1], s=40, edgecolor=colors[0], facecolor='None', marker="+", label='BUSD')
axes[1].scatter(xaut['2020-01-01'][0], xaut['2020-01-01'][1], s=40, edgecolor=colors[4], facecolor='None', marker="x", label='XAUt')
axes[1].scatter(frax['2020-01-01'][0], frax['2020-01-01'][1], s=40, edgecolor=colors[6], facecolor='None', marker="D", label='FRAX')
axes[1].scatter(fei['2020-01-01'][0], fei['2020-01-01'][1], s=40, edgecolor=colors[7], facecolor='None', marker="*", label='FEI')
axes[1].axline([0, 0], [1, 1], c='darkorange', lw=2.5, ls='--')
axes[1].set_yscale('log')
axes[1].set_xscale('log')
axes[1].set_ylim(1 * 10**(-17),100)
axes[1].set_xlim(1 * 10**(-17),100)
axes[1].set_title('2020-01-01', weight='bold')
axes[1].set_yticklabels([])
axes[1].set_xticklabels([])
axes[1].set_xlabel(r'$M_i\,\,/\,M$', size=16)
axes[1].set_ylabel(r'$V_i\,\,/\,V$', size=16)
axes[1].tick_params(left=False)
axes[1].tick_params(axis='both', which='major', bottom=False, left=False)

axes[2].scatter(susd['2021-01-01'][0], susd['2021-01-01'][1], s=40, edgecolor=colors[5], facecolor='None', marker="o", label='sUSD')
axes[2].scatter(eurs['2021-01-01'][0], eurs['2021-01-01'][1], s=40, edgecolor=colors[2], facecolor='None', marker="s", label='EURS')
axes[2].scatter(gusd['2021-01-01'][0], gusd['2021-01-01'][1], s=40, edgecolor=colors[1], facecolor='None', marker='^', label='GUSD')
axes[2].scatter(xchf['2021-01-01'][0], xchf['2021-01-01'][1], s=40, edgecolor=colors[3], facecolor='None', marker="1", label='XCHF')
axes[2].scatter(busd['2021-01-01'][0], busd['2021-01-01'][1], s=40, edgecolor=colors[0], facecolor='None', marker="+", label='BUSD')
axes[2].scatter(xaut['2021-01-01'][0], xaut['2021-01-01'][1], s=40, edgecolor=colors[4], facecolor='None', marker="x", label='XAUt')
axes[2].scatter(frax['2021-01-01'][0], frax['2021-01-01'][1], s=40, edgecolor=colors[6], facecolor='None', marker="D", label='FRAX')
axes[2].scatter(fei['2021-01-01'][0], fei['2021-01-01'][1], s=40, edgecolor=colors[7], facecolor='None', marker="*", label='FEI')
axes[2].axline([0, 0], [1, 1], c='darkorange', lw=2.5, ls='--')
axes[2].set_yscale('log')
axes[2].set_xscale('log')
axes[2].set_ylim(1 * 10**(-23),100)
axes[2].set_xlim(1 * 10**(-23),100)
axes[2].set_title('2021-01-01', weight='bold')
axes[2].set_yticklabels([])
axes[2].set_xticklabels([])
axes[2].set_xlabel(r'$M_i\,\,/\,M$', size=16)
axes[2].set_ylabel(r'$V_i\,\,/\,V$', size=16)
axes[2].tick_params(left=False)
axes[2].tick_params(axis='both', which='major', bottom=False, left=False)

axes[3].scatter(susd['2022-01-01'][0], susd['2022-01-01'][1], s=40, edgecolor=colors[5], facecolor='None', marker="o", label='sUSD')
axes[3].scatter(eurs['2022-01-01'][0], eurs['2022-01-01'][1], s=40, edgecolor=colors[2], facecolor='None', marker="s", label='EURS')
axes[3].scatter(gusd['2022-01-01'][0], gusd['2022-01-01'][1], s=40, edgecolor=colors[1], facecolor='None', marker='^', label='GUSD')
axes[3].scatter(xchf['2022-01-01'][0], xchf['2022-01-01'][1], s=40, facecolor=colors[3], marker="1", label='XCHF')
axes[3].scatter(busd['2022-01-01'][0], busd['2022-01-01'][1], s=40, facecolor=colors[0], marker="+", label='BUSD')
axes[3].scatter(xaut['2022-01-01'][0], xaut['2022-01-01'][1], s=40, facecolor=colors[4], marker="x", label='XAUt')
axes[3].scatter(frax['2022-01-01'][0], frax['2022-01-01'][1], s=40, edgecolor=colors[6], facecolor='None', marker="D", label='FRAX')
axes[3].scatter(fei['2022-01-01'][0], fei['2022-01-01'][1], s=40, edgecolor=colors[7], facecolor='None', marker="*", label='FEI')
axes[3].axline([0, 0], [1, 1], c='darkorange', lw=2.5, ls='--')
axes[3].set_yscale('log')
axes[3].set_xscale('log')
axes[3].set_ylim(1 * 10**(-23),100)
axes[3].set_xlim(1 * 10**(-23),100)
axes[3].set_title('2022-01-01', weight='bold')
axes[3].set_yticklabels([])
axes[3].set_xticklabels([])
axes[3].set_xlabel(r'$M_i\,\,/\,M$', size=16)
axes[3].set_ylabel(r'$V_i\,\,/\,V$', size=16)
axes[3].tick_params(left=False)
axes[3].tick_params(axis='both', which='major', bottom=False, left=False)
axes[3].legend([r'$V_i\,\,/\,V = M_i\,\,/\,M$', 'sUSD', 'EURS', 'GUSD', 'XCHF', 'BUSD', 'XAUt', 'FRAX', 'FEI'], loc='center right',bbox_to_anchor=(1.8, 0.5))
fig.tight_layout()
plt.savefig('image_spearman_scatter.png')
plt.subplots_adjust(wspace=0.35)

In [None]:
# percentages for every log-likelihood ratio test
df = pd.read_csv('FEI_dist_comp_exp.csv')
df = df[:-1]
R = df.loc[df['R-value'] > 0]
p = df.loc[df['p-value'] < 0.05]
print(len(R)/(len(df)))
print(len(p)/(len(df)))

In [None]:
# calculates number of addresses with nonzero balance

def getBalance(block, address):
    d = balances[address]
    valid_keys = [key for key in d if key <= block]
    if valid_keys == []:
        return 0
    else:
        balance = d[block] if block in d else d[min(valid_keys, key=lambda k: abs(k-block))] 
        return int(balance)

for i in range(len(TOKENS)):
    TOKEN = TOKENS[i]
    FIRST_BLOCK = FIRST_BLOCKS[i]
    ts = pd.read_csv('block_timestamps_complete.csv', parse_dates=['timestamp'])
    ts = ts.loc[(ts.block_number >= FIRST_BLOCK) & (ts.block_number <= LAST_BLOCK)]
    last = pd.DataFrame(ts[['timestamp','block_number']].groupby(pd.Grouper(key='timestamp', axis=0, freq='W')).last()).reset_index()
    blocks = last['block_number'].to_list()
    timestamps = last['timestamp'].to_list()
    balances = pd.read_pickle('{}_balances.pkl'.format(TOKEN))
    addresses = list(balances.keys())
    numberOfAddresses = []                        
    for block in blocks:
        num = 0
        for address in addresses:
            if getBalance(block,address) != 0:
                num+=1
        numberOfAddresses.append(num)
    if TOKEN == 'sUSD':    
        df0 = pd.DataFrame({'timestamp':timestamps, TOKEN:numberOfAddresses}) 
    else:
        df1 = pd.DataFrame({'timestamp':timestamps, TOKEN:numberOfAddresses}) 
        df0 = df0.merge(df1, on=['timestamp'], how='left')
    df0.to_csv('nonzero_addresses.csv',index=False)

In [None]:
# calculates number of active addresses (that sent transaction in a given week)
for i in range(len(TOKENS)):
    TOKEN = TOKENS[i]
    FIRST_BLOCK = FIRST_BLOCKS[i]
    ts = pd.read_csv('block_timestamps_complete.csv', parse_dates=['timestamp'])
    ts = ts.loc[(ts.block_number >= FIRST_BLOCK) & (ts.block_number <= LAST_BLOCK)]
    last = pd.DataFrame(ts[['timestamp','block_number']].groupby(pd.Grouper(key='timestamp', axis=0, freq='D')).first()).reset_index()
    blocks = last['block_number'].to_list()
    timestamps = last['timestamp'].to_list()
    transfers = pd.read_csv('{}_token_transfers.csv'.format(TOKEN), parse_dates=['timestamp'])
    transfers['timestamp'] = pd.to_datetime((transfers['timestamp']).dt.date)
    numberOfUniques = []                        
    for t in timestamps:
        _transfers = transfers.loc[transfers.timestamp == t]
        from_address = _transfers.from_address.to_list()
        to_address = _transfers.to_address.to_list()
        numberOfUniques.append(len(set(from_address + to_address)))
    if TOKEN == 'sUSD':    
        df0 = pd.DataFrame({'timestamp':timestamps, '{}_transfer'.format(TOKEN):numberOfUniques}) 
    else:
        df1 = pd.DataFrame({'timestamp':timestamps, '{}_transfer'.format(TOKEN):numberOfUniques}) 
        df0 = df0.merge(df1, on=['timestamp'], how='left')
    df0 = pd.DataFrame(df0.groupby(pd.Grouper(key='timestamp', axis=0, freq='W')).sum()).reset_index()
    df0.to_csv('active_addresses.csv',index=False)

In [None]:
uniques = pd.read_csv('unique_addresses.csv', parse_dates=['timestamp'])
nonzero = pd.read_csv('nonzero_addresses.csv', parse_dates=['timestamp'])
active = pd.read_csv('active_addresses.csv', parse_dates=['timestamp'])
merge = uniques.merge(active, on=['timestamp'])
merge = merge.merge(transfer, on=['timestamp'], how='left').fillna(method='ffill')

fig, axes = plt.subplots(nrows=4, ncols=2)

b = merge.plot(x='timestamp', y=['BUSD_x','BUSD_y','BUSD_transfer'],
        ax=axes[0,0],       
        kind='line',
        figsize=(5,5),
        color=[colors[4],colors[2],colors[1]],
        rot=0,
        lw=1,
        style=['--','-'],
        x_compat=True, 
        sharex=True,
        legend=False)

b.set_title('BUSD') 
b.set_xlabel('')
b.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1,13,12)))
b.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
b.set_yscale('log')
b.yaxis.set_major_formatter(ScalarFormatter())
b.tick_params(axis='y', which='minor', left=False)
b.tick_params(axis='x', which='both', bottom=False)
for tick in b.xaxis.get_majorticklabels():
    tick.set_horizontalalignment("center")
    
g = merge.plot(x='timestamp', y=['GUSD_x','GUSD_y','GUSD_transfer'],
        ax=axes[0,1],      
        kind='line',
        figsize=(5,5),
        color=[colors[4],colors[2],colors[1]],
        rot=0,
        lw=1,
        style=['--','-'],
        x_compat=True,
        sharex=True,
        legend=False)

g.set_title('GUSD') 
g.set_xlabel('')
g.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1,13,12)))
g.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
g.set_yscale('log')
g.yaxis.set_major_formatter(ScalarFormatter())
g.tick_params(axis='y', which='minor', left=False)
g.tick_params(axis='x', which='both', bottom=False)
for tick in g.xaxis.get_majorticklabels():
    tick.set_horizontalalignment("center")

e = merge.plot(x='timestamp', y=['EURS_x','EURS_y','EURS_transfer'],
        ax=axes[1,0],       
        kind='line',
        figsize=(5,5),
        color=[colors[4],colors[2],colors[1]],
        rot=0,
        lw=1,
        style=['--','-'],
        x_compat=True,
        sharex=True,
        legend=False)

e.set_title('EURS') 
e.set_xlabel('')
e.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1,13,12)))
e.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
e.set_yscale('log')
e.yaxis.set_major_formatter(ScalarFormatter())
e.tick_params(axis='y', which='minor', left=False)
e.tick_params(axis='x', which='both', bottom=False)
for tick in e.xaxis.get_majorticklabels():
    tick.set_horizontalalignment("center")

xc = merge.plot(x='timestamp', y=['XCHF_x','XCHF_y','XCHF_transfer'],
    ax=axes[1,1],      
    kind='line',
    figsize=(5,5),
    color=[colors[4],colors[2],colors[1]],
    rot=0,
    lw=1,
    style=['--','-'],
    x_compat=True,
    sharex=True,
    legend=False)

xc.set_title('XCHF') 
xc.set_xlabel('')
xc.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1,13,12)))
xc.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
xc.set_yscale('log')
xc.yaxis.set_major_formatter(ScalarFormatter())
xc.tick_params(axis='y', which='minor', left=False)
xc.tick_params(axis='x', which='both', bottom=False)
for tick in xc.xaxis.get_majorticklabels():
    tick.set_horizontalalignment("center")
    
xa = merge.plot(x='timestamp', y=['XAUt_x','XAUt_y','XAUt_transfer'],
        ax=axes[2,0],       
        kind='line',
        figsize=(5,5),
        color=[colors[4],colors[2],colors[1]],
        rot=0,
        lw=1,
        style=['--','-'],
        x_compat=True,
        sharex=True,
        legend=False)

xa.set_title('XAUt') 
xa.set_xlabel('')
xa.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1,13,12)))
xa.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
xa.set_yscale('log')
xa.yaxis.set_major_formatter(ScalarFormatter())
xa.tick_params(axis='y', which='minor', left=False)
xa.tick_params(axis='x', which='both', bottom=False)
for tick in xa.xaxis.get_majorticklabels():
    tick.set_horizontalalignment("center") 
    
s = merge.plot(x='timestamp', y=['sUSD_x','sUSD_y','sUSD_transfer'],
        ax=axes[2,1],
        kind='line',
        figsize=(4,5),
        color=[colors[5],colors[2],colors[1]],
        rot=0,
        lw=1,
        style=['--','-'],
        x_compat=True,
        sharex=True,
        legend=False)

s.set_title('sUSD') 
s.set_xlabel('')
s.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1,13,12)))
s.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
s.set_yscale('log')
s.yaxis.set_major_formatter(ScalarFormatter())
s.tick_params(axis='y', which='minor', left=False)
s.tick_params(axis='x', which='both', bottom=False)
for tick in s.xaxis.get_majorticklabels():
    tick.set_horizontalalignment("center")

fr = merge.plot(x='timestamp', y=['FRAX_x','FRAX_y','FRAX_transfer'],
        ax=axes[3,0],
        kind='line',
        figsize=(5,5),
        color=[colors[4],colors[2],colors[1]],
        rot=0,
        lw=1,
        style=['--','-'],
        x_compat=True,
        sharex=True,
        legend=False)

fr.set_title('FRAX') 
fr.set_xlabel('')
fr.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1,13,12)))
fr.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
fr.set_yscale('log')
fr.yaxis.set_major_formatter(ScalarFormatter())
fr.tick_params(axis='y', which='minor', left=False)
fr.tick_params(axis='x', which='minor', bottom=False)
for tick in fr.xaxis.get_majorticklabels():
    tick.set_horizontalalignment("center")
    
fe = merge.plot(x='timestamp', y=['FEI_x','FEI_y','FEI_transfer'],
        ax=axes[3,1],
        kind='line',
        figsize=(10,10),
        color=[colors[4],colors[2],colors[1]],
        rot=0,
        lw=1,
        label=['Unique addresses','Addresses with nonzero balances','Weekly active addresses'],
        style=['--','-'],
        x_compat=True,
        sharex=True)

fe.set_title('FEI') 
fe.set_xlabel('')
fe.xaxis.set_major_locator(mdates.MonthLocator(bymonth=range(1,13,12)))
fe.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
fe.set_yscale('log')
fe.yaxis.set_major_formatter(ScalarFormatter())
fe.tick_params(axis='y', which='minor', left=False)
fe.tick_params(axis='x', which='minor', bottom=False)
for tick in fe.xaxis.get_majorticklabels():
    tick.set_horizontalalignment("center")
   
fig.tight_layout()
fig.text(-0.02, 0.5, 'Number of Addresses', va='center', rotation='vertical', size=12, weight='bold')
fig.text(0.5, 0, 'Date', va='center', size=12, weight='bold')
plt.savefig('image_activeness.png', bbox_inches='tight')
plt.show()                     