In [1]:
import os
from tqdm.notebook import tqdm
import polars as pl
import json
import plotly.graph_objects as go



In [2]:
# Existing dataset dir
data_dir = './data/'

# Existing plots dir
plots_dir = data_dir+'/plots/'
os.makedirs(data_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)



In [3]:
def process_swaps(file_name):
    # Load the parquet file
    swaps_df = pl.scan_parquet(file_name).collect(streaming=True)

    # Calculate required metrics
    unique = swaps_df['transaction_hash'].n_unique()
    swaps = swaps_df['transaction_hash'].shape[0]
    block_numbers = swaps_df['block_number'].n_unique()
    avgInBlock = swaps / block_numbers
    minBlock = swaps_df['block_number'].min()
    maxBlock = swaps_df['block_number'].max()
    total_block =  maxBlock - minBlock
    minTimestamp = swaps_df['block_timestamp'].min()
    maxTimestamp = swaps_df['block_timestamp'].max()
    avgBlockTime = (maxTimestamp - minTimestamp).total_seconds() / (maxBlock - minBlock)

    # Create formatted string
    my_row = f"\\num{{{swaps}}} & \\num{{{unique}}} & \\num{{{block_numbers}}} & \\num{{{minBlock}}}--\\num{{{maxBlock}}} & \\num{{{avgInBlock:.2f}}} & {avgBlockTime:.2f}s"
    my_row = minTimestamp.strftime('%Y%m%d')+" - "+maxTimestamp.strftime('%Y%m%d')

    return my_row

def process_swaps_1(file_name):
    # Load the parquet file
    swaps_df = pl.scan_parquet(file_name).collect(streaming=True)

    # Calculate required metrics
    unique = swaps_df['transaction_hash'].n_unique()
    swaps = swaps_df['transaction_hash'].shape[0]
    block_numbers = swaps_df['block_number'].n_unique()
    avgInBlock = swaps / block_numbers
    minBlock = swaps_df['block_number'].min()
    maxBlock = swaps_df['block_number'].max()
    total_block =  maxBlock - minBlock
    avgInBlock2 = swaps / total_block
    minTimestamp = swaps_df['block_timestamp'].min()
    maxTimestamp = swaps_df['block_timestamp'].max()
    avgBlockTime = (maxTimestamp - minTimestamp).total_seconds() / (total_block)
    swapPerTransaction = swaps / unique

    # Create formatted string
    my_row = f"{avgBlockTime:.2f}s & \\num{{{avgInBlock2:.2f}}} & \\num{{{swapPerTransaction:.2f}}}  & \\num{{{avgInBlock:.2f}}} "

    return my_row


In [4]:
file_name = "./data/raw-data-arbitrum-Uniswap-v3-WETH-USDC.paraquet"
swaps_df = pl.scan_parquet(file_name).collect(streaming=True)

# Calculate required metrics
unique = swaps_df['transaction_hash'].n_unique()
swaps = swaps_df['transaction_hash'].shape[0]
block_numbers = swaps_df['block_number'].n_unique()
avgInBlock = swaps / block_numbers
minBlock = swaps_df['block_number'].min()
maxBlock = swaps_df['block_number'].max()
total_block =  maxBlock - minBlock
minTimestamp = swaps_df['block_timestamp'].min()
maxTimestamp = swaps_df['block_timestamp'].max()
avgBlockTime = (maxTimestamp - minTimestamp).total_seconds() / (maxBlock - minBlock)

# Create formatted string
my_row = f"\\num{{{swaps}}} & \\num{{{unique}}} & \\num{{{block_numbers}}} & \\num{{{minBlock}}}--\\num{{{maxBlock}}} & \\num{{{avgInBlock:.2f}}} & {avgBlockTime:.2f}s"


In [5]:
my_row

'\\num{2400000} & \\num{2367361} & \\num{1709619} & \\num{187373628}--\\num{206540031} & \\num{1.40} & 0.25s'

In [6]:
file_name = "./data/raw-data-ethereum-Uniswap-v3-WETH-USDC.paraquet"
result = process_swaps(file_name)
print(result)

20240101 - 20240430


In [7]:
file_name = "./data/raw-data-arbitrum-Uniswap-v3-WETH-USDC.paraquet"
result = process_swaps(file_name)
print(result)

20240305 - 20240430


In [38]:
file_name = "./data/raw-data-arbitrum-Uniswap-v3-WETH-USDCe.paraquet"
result = process_swaps(file_name)
print(result)

20240101 - 20240430


In [39]:
file_name = "./data/raw-data-base-Uniswap-v3-WETH-USDC.paraquet"
result = process_swaps(file_name)
print(result)

20240101 - 20240430


In [40]:
file_name = "./data/raw-data-optimism-Uniswap-v3-WETH-USDC.paraquet"
result = process_swaps(file_name)
print(result)

20240101 - 20240430


In [41]:
file_name = "./data/raw-data-zkSync-Uniswap-v3-WETH-USDC.paraquet"
result = process_swaps(file_name)
print(result)

20240101 - 20240430


In [42]:
file_name = "./data/raw-data-zkSync-SyncSwap-WETH-USDCe.paraquet"
result = process_swaps(file_name)
print(result)

20240429 - 20240430
