In [1]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
from decimal import Decimal

In [2]:
df_ori = pd.read_parquet('ekubo_market_depth_dataset.parquet')

In [3]:
df = df_ori.copy()

# Compute timestamp:
df['datetime'] = pd.to_datetime(df_ori['BLOCK_TIMESTAMP'])
df['timestamp'] = df['datetime'].astype('int64') // 10**9  # Convert nanoseconds to seconds
df = df.sort_values(by='timestamp')
df = df.reset_index(drop=True)

# Obtain names of Token0 and Token1:

# Tag different token address:
# Token0
# ETH: 0x049d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7
# USDC: 0x053c91253bc9682c04929ca02ed00b3e423f6710d2ee7e0d5ebb06f3ecf368a8
# STRK: 0x04718f5a0fc34cc1af16a1cdee98ffb20c31f5cd61d6ab07201858f4287c938d

# Token1
# ETH: 0x049d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7
# USDC: 0x053c91253bc9682c04929ca02ed00b3e423f6710d2ee7e0d5ebb06f3ecf368a8
# USDT: 0x068f5c6a61780768455de69077e07e89787839bf8166decfbf92b645209c0fb8
df['Token0_name'] = ''
df['Token1_name'] = ''

address = '0x049d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7'
df.loc[df['TOKEN0_ADDRESS'] == address, 'Token0_name'] = 'ETH'

address = '0x053c91253bc9682c04929ca02ed00b3e423f6710d2ee7e0d5ebb06f3ecf368a8'
df.loc[df['TOKEN0_ADDRESS'] == address, 'Token0_name'] = 'USDC'

address = '0x04718f5a0fc34cc1af16a1cdee98ffb20c31f5cd61d6ab07201858f4287c938d'
df.loc[df['TOKEN0_ADDRESS'] == address, 'Token0_name'] = 'STRK'

address = '0x049d36570d4e46f48e99674bd3fcc84644ddd6b96f7c741b1562b82f9e004dc7'
df.loc[df['TOKEN1_ADDRESS'] == address, 'Token1_name'] = 'ETH'

address = '0x053c91253bc9682c04929ca02ed00b3e423f6710d2ee7e0d5ebb06f3ecf368a8'
df.loc[df['TOKEN1_ADDRESS'] == address, 'Token1_name'] = 'USDC'

address = '0x068f5c6a61780768455de69077e07e89787839bf8166decfbf92b645209c0fb8'
df.loc[df['TOKEN1_ADDRESS'] == address, 'Token1_name'] = 'USDT'


# Convert each column to either int or float:
columns_int = ['BLOCK_NUMBER', 'TOKEN0_DECIMALS', 'TOKEN1_DECIMALS', 'TICK_SPACING']
df[columns_int] = df[columns_int].applymap(lambda x: int(x))

columns_float = ['TOKEN0_RAW_AMOUNT', 'TOKEN0_REAL_AMOUNT', 'TOKEN1_RAW_AMOUNT', 'TOKEN1_REAL_AMOUNT',
                 'FEE_TIER', 'LIQUIDITY_AMOUNT', 'LOWER_TICK', 'UPPER_TICK', 'SWAP_TICK']
df[columns_float] = df[columns_float].applymap(lambda x: float(x))

# Create a 'tag' column for 'token0_token1'
df['tag'] = df['Token0_name']+'_'+df['Token1_name']

In [4]:
list_targets = df['tag'].unique()
list_targets.sort()
print(list_targets)

['ETH_USDC' 'STRK_ETH' 'STRK_USDC' 'STRK_USDT' 'USDC_USDT']


In [5]:
def compute_aggregation_daily(df_day, columns_agg, tag):
    ''' This is the function to compute and obtain daily aggregations inforamtion. 
    '''
    # Seperate input df to 'mint', 'burn', and 'swap'
    df_day_mint = df_day[df_day['EVENT_NAME']=='Mint']
    df_day_burn = df_day[df_day['EVENT_NAME']=='Burn']
    df_day_swap = df_day[df_day['EVENT_NAME']=='Swap']
    
    # Obtain general daily information
    df_new = pd.DataFrame(columns=columns_agg, index=[0])
    
    df_new['tag'] = df_day['tag'].iloc[0]
    df_new['date'] = pd.Timestamp(df_day['datetime'].dt.date.iloc[0])
    df_new['timestamp'] = (df_day['datetime'].iloc[0] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
    
    df_new['token0'] = df_day['Token0_name'].iloc[0]
    df_new['token1'] = df_day['Token1_name'].iloc[0]
    
    df_new['total_events_daily_mint'] = len(df_day_mint)
    df_new['total_events_daily_burn'] = len(df_day_burn)
    df_new['total_events_daily_swap'] = len(df_day_swap)
    df_new['total_events_daily'] = len(df_day_mint) + len(df_day_burn) + len(df_day_swap)

    # Compute Token0 daily price and amount:
    def _compute_daily_price_amount(df_day_input):
        ''' Method: 
            Individual event price: 'TOKEN1_REAL_AMOUNT' / 'TOKEN0_REAL_AMOUNT'
            The final daily price '''
        df_day_output = pd.DataFrame()
        df_day_output['ratio_token1_token0'] = df_day_input['TOKEN1_REAL_AMOUNT']/df_day_input['TOKEN0_REAL_AMOUNT'] # Compute TOKEN1_REAL_AMOUNT/TOKEN0_REAL_AMOUNT
        df_day_output['TOKEN0_REAL_AMOUNT'] = df_day_input['TOKEN0_REAL_AMOUNT']     
        df_day_output = df_day_output[(df_day_output != 0) & (df_day_output != np.inf) & (df_day_output != -np.inf)].dropna() # Remove rows with 0, inf, or -inf values

        # Below only apply to 'mint' and 'burn'
        if (df_day_input['EVENT_NAME'].iloc[0]=='Mint') | (df_day_input['EVENT_NAME'].iloc[0]=='Burn'):
            # df_day_output['percentage'] = df_day_output['TOKEN0_REAL_AMOUNT']/df_day_output['TOKEN0_REAL_AMOUNT'].sum()
            # df_day_output['price_token0_daily'] = np.sum(df_day_output['ratio_token1_token0']*df_day_output['percentage'])
            df_day_output['price_token0_daily'] = df_day_output['ratio_token1_token0'].median()
            df_day_output['amount_token0_daily'] = df_day_output['TOKEN0_REAL_AMOUNT'].sum()
            df_day_output['liquidity_daily_sum'] = df_day_input['LIQUIDITY_AMOUNT'].sum()

        # Below only apply to 'swap'
        if df_day_input['EVENT_NAME'].iloc[0]=='Swap':
            df_day_output['token0_daily_swap_buy']   = df_day_input[df_day_input['TOKEN0_REAL_AMOUNT']>0]['TOKEN0_REAL_AMOUNT'].sum()
            df_day_output['token0_daily_swap_sell']  = df_day_input[df_day_input['TOKEN0_REAL_AMOUNT']<0]['TOKEN0_REAL_AMOUNT'].sum()
            df_day_output['token0_daily_swap_total'] = df_day_output['token0_daily_swap_buy'] + df_day_output['token0_daily_swap_sell']

        return df_day_output

    # Compute 'mint' and 'burn'
    if len(df_day_mint)>0:
        df_day_tmp_mint = _compute_daily_price_amount(df_day_mint)
        if len(df_day_tmp_mint)>0:
            df_new['token0_daily_price_median_mint']  = df_day_tmp_mint['price_token0_daily'].iloc[0]
            df_new['token0_daily_amount_mint'] = df_day_tmp_mint['amount_token0_daily'].iloc[0]
            df_new['liquidity_daily_sum_mint'] = df_day_tmp_mint['liquidity_daily_sum'].iloc[0]
        else:
            df_new['token0_daily_price_median_mint']  = np.nan
            df_new['token0_daily_amount_mint'] = 0
            df_new['liquidity_daily_sum_mint'] = 0            
    else:
        df_new['token0_daily_price_median_mint']  = np.nan
        df_new['token0_daily_amount_mint'] = 0
        df_new['liquidity_daily_sum_mint'] = 0

    if len(df_day_burn)>0:
        df_day_tmp_burn = _compute_daily_price_amount(df_day_burn)
        if len(df_day_tmp_burn)>0:
            df_new['token0_daily_price_median_burn']  = df_day_tmp_burn['price_token0_daily'].iloc[0]
            df_new['token0_daily_amount_burn'] = df_day_tmp_burn['amount_token0_daily'].iloc[0]
            df_new['liquidity_daily_sum_burn'] = df_day_tmp_burn['liquidity_daily_sum'].iloc[0]
        else:
            df_new['token0_daily_price_median_burn']  = np.nan
            df_new['token0_daily_amount_burn'] = 0
            df_new['liquidity_daily_sum_burn'] = 0            
    else:
        df_new['token0_daily_price_median_burn']  = np.nan
        df_new['token0_daily_amount_burn'] = 0
        df_new['liquidity_daily_sum_burn'] = 0

    df_new['token0_daily_amount_net_mint_burn'] = df_new['token0_daily_amount_mint'] + df_new['token0_daily_amount_burn']
    df_new['liquidity_daily_net_mint_burn'] = df_new['liquidity_daily_sum_mint'] + df_new['liquidity_daily_sum_burn']
        
    # Compute 'swap':
    if len(df_day_swap)>0:
        df_day_tmp_swap = _compute_daily_price_amount(df_day_swap)
        if len(df_day_tmp_swap)>0:
            df_new['token0_daily_price_median_swap'] = abs(df_day_tmp_swap['ratio_token1_token0'].median())
            df_new['token0_daily_price_min_swap']  = abs(df_day_tmp_swap['ratio_token1_token0'].min())
            df_new['token0_daily_price_max_swap']  = abs(df_day_tmp_swap['ratio_token1_token0'].max())
            df_new['token0_daily_price_std_swap']  = abs(df_day_tmp_swap['ratio_token1_token0'].std())
        
            df_new['token0_daily_amount_buy_swap']  = df_day_tmp_swap['token0_daily_swap_buy'].iloc[0]
            df_new['token0_daily_amount_sell_swap'] = df_day_tmp_swap['token0_daily_swap_sell'].iloc[0]
            df_new['token0_daily_amount_net_swap']  = df_new['token0_daily_amount_buy_swap'] + df_new['token0_daily_amount_sell_swap']
            # df_new['token0_daily_volumn_buy_swap_inUSD'] = df_new['token0_daily_price_median_swap'] * df_new['token0_daily_amount_buy_swap']
            # df_new['token0_daily_volumn_sell_swap_inUSD'] = df_new['token0_daily_price_median_swap'] * df_new['token0_daily_amount_sell_swap']
            # df_new['token0_daily_volumn_net_swap_inUSD'] = df_new['token0_daily_volumn_buy_swap_inUSD'] + df_new['token0_daily_volumn_net_swap_inUSD']
        else:
            df_new['token0_daily_price_median_swap'] = np.nan
            df_new['token0_daily_price_min_swap']  = np.nan
            df_new['token0_daily_price_max_swap']  = np.nan
            df_new['token0_daily_price_std_swap']  = np.nan
            df_new['token0_daily_amount_buy_swap']  = np.nan
            df_new['token0_daily_amount_sell_swap'] = np.nan
            df_new['token0_daily_amount_net_swap']  = np.nan            
    else:
        df_new['token0_daily_price_median_swap'] = np.nan
        df_new['token0_daily_price_min_swap']  = np.nan
        df_new['token0_daily_price_max_swap']  = np.nan
        df_new['token0_daily_price_std_swap']  = np.nan
        df_new['token0_daily_amount_buy_swap']  = np.nan
        df_new['token0_daily_amount_sell_swap'] = np.nan
        df_new['token0_daily_amount_net_swap']  = np.nan
    # Compute 'LIQUIDITY_AMOUNT' for 'Mint' and 'Burn' only:
    # df_new['liquidity_amount_sum'] = df_day['LIQUIDITY_AMOUNT'].sum()    
        
    df_new = df_new.sort_values(by='timestamp')
    df_new = df_new.reset_index(drop=True)
    return df_new

In [7]:
# Create an empty dataframe with defaulted column names 
columns_agg = ['tag', 'token0', 'token1', 
               'date', 'timestamp', 
               
               'token0_daily_price_median_mint', 'token0_daily_amount_mint', 
               'token0_daily_price_median_burn', 'token0_daily_amount_burn', 
               'token0_daily_amount_net_mint_burn',
               
               'liquidity_daily_sum_mint', 'liquidity_daily_sum_burn', 'liquidity_daily_net_mint_burn',
               
               'token0_daily_price_median_swap', 'token0_daily_price_min_swap', 
               'token0_daily_price_max_swap', 'token0_daily_price_std_swap',
               
               'token0_daily_amount_buy_swap', 'token0_daily_amount_sell_swap', 'token0_daily_amount_net_swap',
               # 'token0_daily_volumn_buy_swap_inUSD', 'token0_daily_volumn_sell_swap_inUSD', 'token0_daily_volumn_net_swap_inUSD',
               
               'total_events_daily_mint', 'total_events_daily_burn', 'total_events_daily_swap', 'total_events_daily' ]
df_all = pd.DataFrame(columns=columns_agg)

# list_targets2 = ['ETH_USDC']

for tag in list_targets:
    print('Computing',tag)
    # Select data type: e.g., STRK_USDC_Mint, ETH_USDC_Burn, STRK_ETH_Swap, ... etc
    df2 = df[df['tag']==tag]
    
    # create a temporal dataframe
    df_day_tmp = pd.DataFrame(columns=columns_agg)
    
    # Obtain the begin and end date of selected dataframe
    dt_begin = df2['datetime'].iloc[0]
    dt_end   = df2['datetime'].iloc[-1]
    d_begin = dt_begin.date()
    d_end   = dt_end.date()
    
    # Obatin a list with all 'days' from the selected dataframe  
    date_range = pd.date_range(start=d_begin, end=d_end)
    
    for day in date_range:
        # print(day)
        df_day = df2[df2['datetime'].dt.date == day.date()] # Only select data within a certain day
        if len(df_day)>0:
            df_new = compute_aggregation_daily(df_day, columns_agg, tag)   # Compute aggregation values
            df_day_tmp = pd.concat([df_day_tmp, df_new])
            df_day_tmp = df_day_tmp.reset_index(drop=True)

    df_all = pd.concat([df_all, df_day_tmp])
df_all = df_all.sort_values(by='timestamp')
df_all = df_all.reset_index(drop=True)

df_all.to_parquet('df_aggregation_daily.parquet')
print('End of Computing')

Computing ETH_USDC
Computing STRK_ETH
Computing STRK_USDC
Computing STRK_USDT
Computing USDC_USDT
End of Computing
