In [1]:
import os
from tqdm.notebook import tqdm
import polars as pl
import json
import plotly.graph_objects as go


In [2]:

# Existing dataset dir
data_dir = './data/'

# Existing plots dir
plots_dir = data_dir+'/plots/'
os.makedirs(data_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)



In [3]:
from plot_utils_2 import get_plotly_layout
from plot_utils_2 import colors
width, height = 1000, 450

In [4]:
plot_settings = {
    'arbitrum': {'color': colors['blue'],   'label': 'Arbitrum', 'style': 'solid', 'width': 4, 'marker_symbol': 'circle'},
    'arbitrum2': {'color': colors['blue'],   'label': 'Arbitrum', 'style': 'solid', 'width': 4, 'marker_symbol': 'circle'},
    'base': {'color': colors['red'],        'label': 'Base', 'style': 'dash', 'width': 3.5, 'marker_symbol': 'diamond'},
    'ethereum': {'color': colors['green'],  'label': 'Ethereum', 'style': 'dot', 'width': 3, 'marker_symbol': 'square'},
    'optimism': {'color': colors['grey'],   'label': 'Optimism', 'style': 'dashdot', 'width': 2.5, 'marker_symbol': 'triangle-up'},
    'zksync': {'color': colors['pink'],     'label': 'zkSync', 'style': 'solid', 'width': 2, 'marker_symbol': 'cross'},
    'arbitrum2': {'color': colors['brown'],    'label': 'Fantom', 'style': 'dash', 'width': 1.5, 'marker_symbol': 'star'},
}

file_settings = {
    'ethereum': {  'nr': '1', 'file_name': './data/swaps-ethereum-Uniswap-v3-WETH-USDC.paraquet' , 'output_file': './data/NaN-ethereum-Uniswap-v3-WETH-USDC.paraquet' },
    'arbitrum': {  'nr': '2', 'file_name': './data/swaps-arbitrum-Uniswap-v3-WETH-USDC.paraquet' , 'output_file': './data/NaN-arbitrum-Uniswap-v3-WETH-USDC.paraquet'},
    'arbitrum2': { 'nr': '3', 'file_name': './data/swaps-arbitrum-Uniswap-v3-WETH-USDCe.paraquet', 'output_file': './data/NaN-arbitrum-Uniswap-v3-WETH-USDCe.paraquet'},
    'base': {      'nr': '4', 'file_name': './data/swaps-base-Uniswap-v3-WETH-USDC.paraquet'     , 'output_file': './data/NaN-base-Uniswap-v3-WETH-USDC.paraquet'},
    'optimism': {  'nr': '5', 'file_name': './data/swaps-optimism-Uniswap-v3-WETH-USDC.paraquet' , 'output_file': './data/NaN-optimism-Uniswap-v3-WETH-USDC.paraquet'},
    'zksync': {    'nr': '6', 'file_name': './data/swaps-zksync-Uniswap-v3-WETH-USDC.paraquet'   , 'output_file': './data/NaN-zksync-Uniswap-v3-WETH-USDC.paraquet' } ,
}



In [5]:
chains = [
    'ethereum',
    'arbitrum',
    'arbitrum2',
    'base',
    'optimism',
    'zksync'
]


my_date = pl.datetime(2024,4,30)


In [6]:
# Constants
TICK_SPACING = 10
BASE = 1.0001
MULTI = 1e12

MUL_TOKEN0 = 1e18  # WETH
MUL_TOKEN1 = 1e6   # USDC

LP_FEE = 0.0005    # 20bps at ZKsync
DAYS = 365
alpha = 2_400
ZKSYNC = 8
ETHEREUM = 0.25
R_S = 0.0347
r_W = 100_000_000
W = r_W / alpha


In [7]:
# Initialize the dictionary to store the daily results per chain
daily_results = {}

for chain in chains:
    # Load the file for the current chain
    file_name = file_settings[chain]['file_name']
    swaps_df = pl.scan_parquet(file_name).collect(streaming=True)

    # Step 1: Calculate tick-related and sqrt-related columns
    swaps_df = swaps_df.with_columns([
        ((abs(pl.col('tick')) // TICK_SPACING) * TICK_SPACING).alias('tick_l'),
        (((abs(pl.col('tick')) // TICK_SPACING) * TICK_SPACING) + TICK_SPACING).alias('tick_u'),
        (pl.col('sqrtPriceX96').mul(1 / 2**96)).alias('sqrtPriceX')
    ])

    swaps_df = swaps_df.with_columns(
        pl.when(pl.col('sqrtPriceX') < 1)
        .then(1 / pl.col('sqrtPriceX'))
        .otherwise(pl.col('sqrtPriceX'))
        .alias('sqrtPriceX')  # Rename the result back to 'sqrtPriceX'
    )

    swaps_df = swaps_df.with_columns([
        (BASE ** pl.col('tick_l')).pow(0.5).alias('sqrtRatioL'),
        (BASE ** pl.col('tick_u')).pow(0.5).alias('sqrtRatioU')
    ])

    # Step 2: Group by date and aggregate
    grouped_df = (
        swaps_df
        .with_columns(pl.col('block_timestamp').dt.date().alias('date'))
        .groupby('date')
        .agg([
            pl.sum('volume_USD').alias('daily_volume_USD'),
            pl.last('reserve_USD').alias('reserve_USD'),
            pl.last('reserve_ETH').alias('reserve_ETH'),
            pl.last('spot_price').alias('spot_price'),
            (pl.last('reserve_USD') + pl.last('reserve_ETH') * pl.last('spot_price')).alias('virtual_TVL'),
            pl.last('liquidity').alias('liquidity'),
            pl.last('tick').alias('tick'),
            pl.last('tick_l').alias('tick_l'),
            pl.last('tick_u').alias('tick_u'),
            pl.last('sqrtPriceX').alias('sqrtPriceX'),
            pl.last('sqrtRatioL').alias('sqrtRatioL'),
            pl.last('sqrtRatioU').alias('sqrtRatioU')
        ])
    )

    # Step 3: Perform further calculations in multiple steps
    #grouped_df = grouped_df.with_columns([
    #((abs(pl.col('tick')) // TICK_SPACING) * TICK_SPACING).alias('tick_l'),
    #(((abs(pl.col('tick')) // TICK_SPACING) * TICK_SPACING) + TICK_SPACING).alias('tick_u')
    #])


    grouped_df = grouped_df.with_columns([
        (1 / BASE ** pl.col('tick_l') * MULTI).alias('price_l'),
        (1 / BASE ** pl.col('tick_u') * MULTI).alias('price_u')
    ])

    # Calculate token0 and token1 amounts
    grouped_df = grouped_df.with_columns([
        (pl.col('liquidity') * (pl.col('sqrtRatioU') - pl.col('sqrtPriceX')) / (pl.col('sqrtPriceX') * pl.col('sqrtRatioU')) / MUL_TOKEN1).alias('token0'),
        (pl.col('liquidity') * (pl.col('sqrtPriceX') - pl.col('sqrtRatioL')) / MUL_TOKEN0).alias('token1')
    ])

    # Calculate real TVL
    grouped_df = grouped_df.with_columns([
        (pl.col('token0') + pl.col('token1') * pl.col('spot_price')).alias('real_TVL')
    ])

    # Step 4: Calculate returns
    grouped_df = grouped_df.with_columns([
        (pl.col('daily_volume_USD') * LP_FEE / pl.col('virtual_TVL')).alias('v_daily_return'),
        (pl.col('daily_volume_USD') * LP_FEE / pl.col('real_TVL') / alpha).alias('r_daily_return')
    ])

    # Step 5: Adjust new returns for specific chains based on the loop variable
    if chain == 'zksync':
        grouped_df = grouped_df.with_columns([
            (ZKSYNC * pl.col('v_daily_return')).alias('v_daily_return'),
            (ZKSYNC * pl.col('r_daily_return')).alias('r_daily_return')
        ])
    elif chain == 'ethereum':
        grouped_df = grouped_df.with_columns([
            (ETHEREUM * pl.col('v_daily_return')).alias('v_daily_return'),
            (ETHEREUM * pl.col('r_daily_return')).alias('r_daily_return')
        ])

    # Step 5: Adjust new returns for specific chains
    if chain == 'zksync':
        grouped_df = grouped_df.with_columns([
            (ZKSYNC * pl.col('v_daily_return')).alias('v_daily_return'),
            (ZKSYNC * pl.col('r_daily_return')).alias('r_daily_return')
        ])
    elif chain == 'ethereum':
        grouped_df = grouped_df.with_columns([
            (ETHEREUM * pl.col('v_daily_return')).alias('v_daily_return'),
            (ETHEREUM * pl.col('r_daily_return')).alias('r_daily_return')
        ])

    grouped_df = grouped_df.with_columns([
        ((1 + pl.col('v_daily_return')).pow(DAYS) - 1).alias('v_annual_return'),
        ((1 + pl.col('r_daily_return')).pow(DAYS) - 1).alias('r_annual_return')
    ])

    # Step 6: Calculate optimal allocation
    #grouped_df = grouped_df.with_columns([
    #    (pl.col('real_TVL') * ((pl.col('r_annual_return') / R_S).pow(0.5) - 1)).alias('allocation'),
    #    ((pl.col('daily_volume_USD') / alpha) * LP_FEE / (pl.col('real_TVL') + pl.col('allocation'))).alias('new_return')
    #])

    #grouped_df = grouped_df.with_columns([
    #    (pl.col('real_TVL') * ((pl.col('r_annual_return').fill_null(0) / R_S).clip_min(1e-10).pow(0.5) - 1)).alias('allocation'),
    #    ((pl.col('daily_volume_USD') / alpha) * LP_FEE / (pl.col('real_TVL') + pl.col('allocation').clip_min(1e-10))).alias('new_return')
    #])


    # Step 7: Adjust new returns for specific chains
    #grouped_df = grouped_df.with_columns([
    #    pl.when(pl.col('chain') == 'zksync')
    #    .then(ZKSYNC * pl.col('new_return'))
    #    .otherwise(pl.col('new_return'))
    #    .alias('new_return'),
    #    pl.when(pl.col('chain') == 'ethereum')
    #    .then(ETHEREUM * pl.col('new_return'))
    #    .otherwise(pl.col('new_return'))
    #    .alias('new_return'),
    #    ((1 + pl.col('new_return')).pow(DAYS) - 1).alias('new_return_ap'),
    #    (pl.col('real_TVL') * ((pl.col('r_annual_return') / R_S).pow(0.5) - 1)).alias('LP1_returns')
    #])

    # Step 8: Store the grouped dataframe in the daily results dictionary
    daily_results[chain] = grouped_df

# Now `daily_results` holds a dictionary with chain as key and aggregated DataFrame as value


  swaps_df


In [8]:
#result = daily_results['arbitrum'].filter(pl.col('date') == my_date)

# Create a list to store filtered DataFrames
result = []

# Iterate over each chain, filter by date, and add a 'chain' column
for chain, df in daily_results.items():
    filtered_df = df.filter(pl.col('date') == my_date).with_columns(
        pl.lit(chain).alias('chain')
    )
    result.append(filtered_df)

# Concatenate the list into a single Polars DataFrame
final_result = pl.concat(result)

# Display the final result
final_result


date,daily_volume_USD,reserve_USD,reserve_ETH,spot_price,virtual_TVL,liquidity,tick,tick_l,tick_u,sqrtPriceX,sqrtRatioL,sqrtRatioU,price_l,price_u,token0,token1,real_TVL,v_daily_return,r_daily_return,v_annual_return,r_annual_return,chain
date,f64,f64,f64,f64,f64,f64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str
2024-04-30,344880000.0,438820000.0,145720.574427,3011.413988,877650000.0,7.9966e+18,196218,196210,196220,18222.785718,18215.160261,18224.269663,3013.935867,3010.923588,35732.12756,60.977835,219361.632048,1.2e-05,2e-05,0.004492,0.0075,"""ethereum"""
2024-04-30,145130000.0,175130000.0,58161.245054,3011.028601,350250000.0,3.1915e+18,-196220,196220,196230,18223.951864,18224.269663,18233.38362,3010.923588,3007.91432,90588.671325,-1.014247,87534.743785,0.000207,0.000345,0.078544,0.134341,"""arbitrum"""
2024-04-30,58602000.0,76669000.0,25463.529586,3010.919542,153340000.0,1.3972e+18,-196221,196220,196230,18224.281907,18224.269663,18233.38362,3010.923588,3007.91432,38271.337112,0.017108,38322.847521,0.000191,0.000319,0.07223,0.12329,"""arbitrum2"""
2024-04-30,142940000.0,137280000.0,45573.548802,3012.250529,274560000.0,2.5013e+18,-196216,196210,196220,18220.25519,18215.160261,18224.269663,3013.935867,3010.923588,30240.038931,12.743729,68627.342227,0.00026,0.000434,0.099659,0.171573,"""base"""
2024-04-30,16157000.0,12299000.0,4084.540387,3011.158781,24598000.0,2.2414e+17,-196220,196220,196230,18223.557925,18224.269663,18233.38362,3010.923588,3007.91432,6627.853214,-0.159526,6147.49652,0.000328,0.000548,0.127326,0.221151,"""optimism"""
2024-04-30,26430.687273,207951.983496,68.957763,3015.642837,415903.966992,3786800000000000.0,196204,196200,196210,18210.00429,18206.055412,18215.160261,3016.951159,3013.935867,58.862749,0.014954,103.957571,0.002034,0.00339,1.099103,2.439179,"""zksync"""


In [9]:
# Specify the columns to keep
columns_to_keep = [
    'date',
    'daily_volume_USD',
    'virtual_TVL',
    'real_TVL',
    'v_daily_return',
    'r_daily_return',
    'v_annual_return',
    'r_annual_return'
]

# Filter columns for each chain in daily_results
for chain in daily_results:
    daily_results[chain] = daily_results[chain].select(columns_to_keep)


In [10]:

daily_results['optimism'].columns


['date',
 'daily_volume_USD',
 'virtual_TVL',
 'real_TVL',
 'v_daily_return',
 'r_daily_return',
 'v_annual_return',
 'r_annual_return']

In [11]:
import numpy as np
from sklearn.linear_model import LinearRegression

# Initialize a dictionary to store results for each chain
elasticity_results = {}

# Iterate over each chain in daily_results
for chain, df in daily_results.items():
    # Ensure no missing or zero values before applying log
    df = df.filter((pl.col('virtual_TVL') > 0) & (pl.col('daily_volume_USD') > 0))

    # Log-transform the TVL and Volume columns
    df = df.with_columns([
        (np.log(df["virtual_TVL"])).alias("log_TVL"),
        (np.log(df["daily_volume_USD"])).alias("log_Volume")
    ])

    # Convert the data to a format suitable for sklearn
    X = df.select("log_TVL").to_numpy()
    y = df.select("log_Volume").to_numpy()

    # Fit the linear regression model
    model = LinearRegression().fit(X, y)

    # Extract the scaling constant and elasticity
    epsilon_v = model.coef_[0][0]
    log_k = model.intercept_[0]
    k = np.exp(log_k)

    # Store the results
    elasticity_results[chain] = {
        "Elasticity (epsilon_v)": epsilon_v,
        "Scaling constant (k)": k
    }

# Print the results for each chain
for chain, results in elasticity_results.items():
    print(f"\nChain: {chain}")
    print(f"Elasticity (epsilon_v): {results['Elasticity (epsilon_v)']}")
    print(f"Scaling constant (k): {results['Scaling constant (k)']}")



Chain: ethereum
Elasticity (epsilon_v): -0.12079669406881786
Scaling constant (k): 3053256390.8597474

Chain: arbitrum
Elasticity (epsilon_v): -0.17739561829090997
Scaling constant (k): 3945020961.934215

Chain: arbitrum2
Elasticity (epsilon_v): -0.14315816719879604
Scaling constant (k): 919336413.9939187

Chain: base
Elasticity (epsilon_v): 1.0450695599277045
Scaling constant (k): 0.11842139113810288

Chain: optimism
Elasticity (epsilon_v): -0.178371047060111
Scaling constant (k): 236925148.26732588

Chain: zksync
Elasticity (epsilon_v): 0.6542095099033871
Scaling constant (k): 4.7007550166815815


In [12]:
daily_results

{'ethereum': shape: (121, 8)
 ┌────────────┬────────────┬────────────┬───────────┬───────────┬───────────┬───────────┬───────────┐
 │ date       ┆ daily_volu ┆ virtual_TV ┆ real_TVL  ┆ v_daily_r ┆ r_daily_r ┆ v_annual_ ┆ r_annual_ │
 │ ---        ┆ me_USD     ┆ L          ┆ ---       ┆ eturn     ┆ eturn     ┆ return    ┆ return    │
 │ date       ┆ ---        ┆ ---        ┆ f64       ┆ ---       ┆ ---       ┆ ---       ┆ ---       │
 │            ┆ f64        ┆ f64        ┆           ┆ f64       ┆ f64       ┆ f64       ┆ f64       │
 ╞════════════╪════════════╪════════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡
 │ 2024-04-01 ┆ 2.8443e8   ┆ 1.2276e9   ┆ 306822.18 ┆ 0.000007  ┆ 0.000012  ┆ 0.002646  ┆ 0.004415  │
 │            ┆            ┆            ┆ 9264      ┆           ┆           ┆           ┆           │
 │ 2024-04-19 ┆ 4.4040e8   ┆ 1.1884e9   ┆ 297035.96 ┆ 0.000012  ┆ 0.000019  ┆ 0.004236  ┆ 0.007071  │
 │            ┆            ┆            ┆ 4139      ┆

In [13]:

# Create an empty list to store DataFrames with the chain column added
all_data = []

# Iterate over each chain and add the chain name as a new column
for chain, df in daily_results.items():
    df = df.with_columns(pl.lit(chain).alias("chain"))
    all_data.append(df)

# Concatenate all DataFrames into one
combined_df = pl.concat(all_data)

# Export the combined DataFrame to a single CSV file
combined_df.write_csv("all_chains_daily_results.csv")

print("Exported to all_chains_daily_results.csv")


Exported to all_chains_daily_results.csv
