In [30]:
import os
from tqdm.notebook import tqdm
import polars as pl
import json
import plotly.graph_objects as go



In [31]:
# Existing dataset dir
data_dir = './data/'

# Existing plots dir
plots_dir = data_dir+'/plots/'
os.makedirs(data_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)


In [32]:
file_name = "./data/swaps-arbitrum-Uniswap-v3-WETH-USDC.paraquet"
swaps_df = pl.scan_parquet(file_name).collect(streaming=True)

# Calculate required metrics
unique = swaps_df['transaction_hash'].n_unique()
print(unique)

swaps = swaps_df['transaction_hash'].shape[0]
print(swaps)

block_numbers = swaps_df['block_number'].n_unique()
print(block_numbers)

avgInBlock = swaps / block_numbers
minBlock = swaps_df['block_number'].min()
maxBlock = swaps_df['block_number'].max()
total_block =  maxBlock - minBlock
avgInBlock2 = swaps / total_block
minTimestamp = swaps_df['block_timestamp'].min()
maxTimestamp = swaps_df['block_timestamp'].max()
avgBlockTime = (maxTimestamp - minTimestamp).total_seconds() / (total_block)
swapPerTransaction = swaps / unique

# Create formatted string
my_row = f"{avgBlockTime:.2f}s & \\num{{{avgInBlock2:.2f}}} & \\num{{{swapPerTransaction:.2f}}}  & \\num{{{avgInBlock:.2f}}} "
my_row

2367361
2400000
1709619


'0.25s & \\num{0.13} & \\num{1.01}  & \\num{1.40} '

In [43]:
#max_MAV_df = swaps_df.group_by('MAV_groups').agg(pl.max("MAV_USD").alias("max_MAV_USD"))

swaps_df = swaps_df.with_columns(
    (pl.when(pl.col('amount_ETH') > 0).then(1).otherwise(-1)).alias('sign')
)

transactions = swaps_df.group_by('block_number').agg(
    [
        pl.col('transaction_hash').count().alias('transaction_count'),
        pl.sum('sign').alias('sum_sign')
    ]
)
transactions

block_number,transaction_count,sum_sign
i64,u32,i32
203025804,6,6
199095196,1,-1
192401643,1,1
197658569,2,-2
206026748,1,1
…,…,…
198822914,1,-1
199252468,1,-1
197080856,1,1
199249839,2,-2


In [44]:
transactions2 = transactions.filter(pl.col('transaction_count') > 1)
transactions2.sort(pl.col('transaction_count'))

block_number,transaction_count,sum_sign
i64,u32,i32
197658569,2,-2
187706590,2,-2
203488105,2,-2
203937848,2,-2
192100282,2,2
…,…,…
202755108,38,-36
202778355,38,38
201837856,39,-39
201901374,41,39


In [46]:
transactions2s = transactions2.filter(pl.col('transaction_count') != abs(pl.col('sum_sign')))
transactions2s

block_number,transaction_count,sum_sign
i64,u32,i32
203291720,2,0
202339980,6,-4
202493227,2,0
204821206,2,0
192024382,3,1
…,…,…
192034674,3,-1
204137561,2,0
200840380,2,0
190569355,2,0


In [35]:
transactions2[200_090]

block_number,transaction_count
i64,u32
195671979,5


In [49]:
filtered_swaps = swaps_df.filter(pl.col('block_number') == 204137561)
filtered_swaps


transaction_hash,transaction_index,log_index,block_number,block_timestamp,gas_price,amount_ETH,amount_USD,sqrtPriceX96,tick,liquidity,effective_price,spot_price,reserve_ETH,reserve_USD,CEX_price,gas_fee_gwei,gas_fee_USD,sign
str,i64,i64,i64,datetime[μs],i64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,i32
"""0xebe44a6357da…",4,88,204137561,2024-04-23 22:26:36,10000000,-0.002117,6.811127,4.4928e+24,-195562,5.2961e+18,3217.325072,3215.716276,93393.558455,300330000.0,3215.51,18146.24,0.058349,-1
"""0xa9cf37f38a51…",2,10,204137561,2024-04-23 22:26:36,10000000,0.071608,-230.154555,4.4928e+24,-195562,5.286e+18,3214.110731,3215.71613,93216.028092,299760000.0,3215.51,6535.15,0.021014,1


In [37]:
swaps = swaps_df.group_by('transaction_hash').agg(
    pl.col('transaction_hash').count().alias('transaction_count')
)
swaps

transaction_hash,transaction_count
str,u32
"""0xa73592aa7608…",1
"""0x6e16ff4dd62b…",1
"""0x4798d5dd8c20…",1
"""0x74e9e315b294…",1
"""0x3b9da0371ebb…",1
…,…
"""0x3a19560c675d…",1
"""0xb62a4b5564c4…",1
"""0x7b8340a628ec…",1
"""0x0661d1bdbd85…",1


In [38]:
swaps2 = swaps.filter(pl.col('transaction_count') > 1)
swaps2.sort(pl.col('transaction_count'))

transaction_hash,transaction_count
str,u32
"""0xcf98e038dca5…",2
"""0x7e916484f40c…",2
"""0x75efcfc989b0…",2
"""0x5414386b52b3…",2
"""0x7419c00b5ee5…",2
…,…
"""0x3b51912a622d…",23
"""0xe923ef38dd9f…",23
"""0xa2271a9c2441…",23
"""0x63e3fee45651…",23


In [39]:
nr = 7_018

In [40]:
swaps2[nr]['transaction_hash'][0]

'0xbd001bb4703a2c38a2eee639d6c7d0a5ade6570204178ab6749297ceb3843f27'

In [41]:
filtered_swaps = swaps_df.filter(pl.col('transaction_hash') == swaps2[nr]['transaction_hash'][0])
filtered_swaps

transaction_hash,transaction_index,log_index,block_number,block_timestamp,gas_price,amount_ETH,amount_USD,sqrtPriceX96,tick,liquidity,effective_price,spot_price,reserve_ETH,reserve_USD,CEX_price,gas_fee_gwei,gas_fee_USD
str,i64,i64,i64,datetime[μs],i64,f64,f64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64
"""0xbd001bb4703a…",3,310,205388667,2024-04-27 15:00:29,11000000,0.003485,-10.963758,4.4453e+24,-195775,5.382e+18,3146.430877,3148.00478,95923.758757,301970000.0,3147.4,134117.379,0.422121
"""0xbd001bb4703a…",3,293,205388667,2024-04-27 15:00:29,11000000,-0.003738,11.773493,4.4453e+24,-195775,5.382e+18,3149.579744,3148.005009,95923.755274,301970000.0,3147.4,134117.379,0.422121
"""0xbd001bb4703a…",3,403,205388667,2024-04-27 15:00:29,11000000,0.002026,-6.37598,4.4453e+24,-195775,5.382e+18,3146.430773,3148.004752,95923.759187,301970000.0,3147.4,134117.379,0.422121
"""0xbd001bb4703a…",3,172,205388667,2024-04-27 15:00:29,11000000,-5.2e-05,0.164739,4.4453e+24,-195775,5.382e+18,3149.591618,3148.00477,95923.758915,301970000.0,3147.4,134117.379,0.422121
"""0xbd001bb4703a…",3,219,205388667,2024-04-27 15:00:29,11000000,9.7e-05,-0.305929,4.4453e+24,-195775,5.382e+18,3146.421217,3148.004763,95923.759012,301970000.0,3147.4,134117.379,0.422121
"""0xbd001bb4703a…",3,327,205388667,2024-04-27 15:00:29,11000000,0.001637,-5.149801,4.4453e+24,-195775,5.382e+18,3146.430278,3148.004673,95923.760393,301970000.0,3147.4,134117.379,0.422121
"""0xbd001bb4703a…",3,386,205388667,2024-04-27 15:00:29,11000000,-0.002184,6.879144,4.4453e+24,-195775,5.382e+18,3149.579799,3148.004885,95923.757161,301970000.0,3147.4,134117.379,0.422121
"""0xbd001bb4703a…",3,83,205388667,2024-04-27 15:00:29,11000000,9.4e-05,-0.29535,4.4453e+24,-195775,5.382e+18,3146.42821,3148.004766,95923.758967,301970000.0,3147.4,134117.379,0.422121
"""0xbd001bb4703a…",3,28,205388667,2024-04-27 15:00:29,11000000,-9.8e-05,0.309303,4.4453e+24,-195775,5.382e+18,3149.58311,3148.004773,95923.758873,301970000.0,3147.4,134117.379,0.422121
"""0xbd001bb4703a…",3,362,205388667,2024-04-27 15:00:29,11000000,-0.001047,3.297956,4.4453e+24,-195775,5.382e+18,3149.579518,3148.004742,95923.759346,301970000.0,3147.4,134117.379,0.422121
