In [1]:
import os
from tqdm.notebook import tqdm
import polars as pl
import plotly.graph_objects as go
import json


In [2]:
from plot_utils_2 import get_plotly_layout
from plot_utils_2 import colors
width, height = 1000, 450

In [3]:
# Existing dataset dir
data_dir = './data/'

# Existing plots dir
plots_dir = data_dir+'/plots/'
os.makedirs(data_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)


In [4]:
plot_settings = {
    'arbitrum': {'color': colors['blue'],   'label': 'Arbitrum', 'style': 'solid', 'width': 4, 'marker_symbol': 'circle'},
    'arbitrum2': {'color': colors['blue'],   'label': 'Arbitrum', 'style': 'solid', 'width': 4, 'marker_symbol': 'circle'},
    'base': {'color': colors['red'],        'label': 'Base', 'style': 'dash', 'width': 3.5, 'marker_symbol': 'diamond'},
    'ethereum': {'color': colors['green'],  'label': 'Ethereum', 'style': 'dot', 'width': 3, 'marker_symbol': 'square'},
    'optimism': {'color': colors['grey'],   'label': 'Optimism', 'style': 'dashdot', 'width': 2.5, 'marker_symbol': 'triangle-up'},
    'zksync': {'color': colors['pink'],     'label': 'zkSync', 'style': 'solid', 'width': 2, 'marker_symbol': 'cross'},
    'arbitrum2': {'color': colors['brown'],    'label': 'Fantom', 'style': 'dash', 'width': 1.5, 'marker_symbol': 'star'},
}

file_settings = {
    'ethereum': {  'nr': '1', 'file_name': './data/mav-ethereum-Uniswap-v3-WETH-USDC.paraquet' , 'output_file': './data/mav2-ethereum-Uniswap-v3-WETH-USDC.paraquet' },
    'arbitrum': {  'nr': '2', 'file_name': './data/mav-arbitrum-Uniswap-v3-WETH-USDC.paraquet' , 'output_file': './data/mav2-arbitrum-Uniswap-v3-WETH-USDC.paraquet'},
    'arbitrum2': { 'nr': '3', 'file_name': './data/mav-arbitrum-Uniswap-v3-WETH-USDCe.paraquet', 'output_file': './data/mav2-arbitrum-Uniswap-v3-WETH-USDCe.paraquet'},
    'base': {      'nr': '4', 'file_name': './data/mav-base-Uniswap-v3-WETH-USDC.paraquet'     , 'output_file': './data/mav2-base-Uniswap-v3-WETH-USDC.paraquet'},
    'optimism': {  'nr': '5', 'file_name': './data/mav-optimism-Uniswap-v3-WETH-USDC.paraquet' , 'output_file': './data/mav2-optimism-Uniswap-v3-WETH-USDC.paraquet'},
    'zksync': {    'nr': '6', 'file_name': './data/mav-zksync-Uniswap-v3-WETH-USDC.paraquet'   , 'output_file': './data/mav2-zksync-Uniswap-v3-WETH-USDC.paraquet' } ,
}



In [5]:
chains = [
    #'ethereum',
    #'arbitrum',
    #'arbitrum2',
    'base',
    'optimism',
    'zksync'
]


In [7]:
def bin_amount_usd(amount):
    return int(amount)

In [8]:
swap_amounts = {}  # Dictionary to store tx count data for each chain

for chain in chains:

    file_name = file_settings[chain]['file_name']
    swaps_df = pl.scan_parquet(file_name).collect(streaming=True)

    swaps_df = swaps_df.select('amount_USD')
    #swap_amounts[chain] = swaps_df['amount_USD']
    #swap_amounts[chain] = swaps_df['amount_USD']

    # Calculate Q1, Q3, and IQR
    Q1 = swaps_df['amount_USD'].quantile(0.25)
    Q3 = swaps_df['amount_USD'].quantile(0.75)
    IQR = Q3 - Q1

    # Define bounds for outlier removal
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Filter out outliers
    swaps_df = swaps_df.filter(
        (pl.col('amount_USD') >= lower_bound) & (pl.col('amount_USD') <= upper_bound)
    )

    # Apply the binning function
    binned_df = swaps_df.with_columns(
        swaps_df['amount_USD'].apply(bin_amount_usd).alias('bins')
    )

    # Aggregate the data by bins
    binned_data = binned_df.groupby('bins').agg([
        pl.col('amount_USD').count().alias('count')
    ])

    # Convert to list and store in dictionary
    swap_amounts[chain] = binned_data.to_dict(as_series=False)
    del swaps_df

print(swap_amounts['zksync'])


  swaps_df['amount_USD'].apply(bin_amount_usd).alias('bins')
Series.map_elements is significantly slower than the native series API.
Only use if you absolutely CANNOT implement your logic otherwise.
Replace this expression...
  - s.map_elements(bin_amount_usd)
with this one instead:
  + s.cast(pl.Int64)

  swaps_df['amount_USD'].apply(bin_amount_usd).alias('bins')
  binned_data = binned_df.groupby('bins').agg([


{'bins': [39, 21, 0, 33, 18, 15, 36, 6, 45, 12, 27, -83, 24, 42, 3, 9, 30, 48, 57, -47, -38, 72, -56, -68, -59, -62, 63, 54, -50, 75, 78, 69, -35, 66, -77, -53, -74, -65, 81, -80, 51, -71, -44, -41, 60, -20, 13, -29, -2, 16, 4, -14, -8, -23, 10, -11, -5, 1, -32, -17, -26, 7, -73, 28, 46, 22, -76, 43, 55, 52, -79, -67, 19, -70, -82, 49, 31, 25, 34, 58, 37, 61, 40, 64, -49, 70, -28, -34, -40, 67, -61, -37, -25, -22, 79, -55, -43, 76, -52, -46, -58, 82, -64, 73, -31, -19, 8, 26, -16, 32, -13, 17, 23, -1, 11, -10, -4, -7, 5, 29, 20, 2, 14, 71, 35, -72, 44, 74, -54, -69, 80, 50, 53, -78, 68, -63, 59, 77, -81, -66, -51, 65, 62, 38, 47, -75, -60, -84, -57, 56, 41, -21, -27, -30, -24, -36, -39, -12, -33, -6, -9, -48, -18, -3, -15, -42, -45], 'count': [65, 137, 7558, 103, 172, 240, 75, 317, 90, 194, 101, 48, 123, 55, 565, 178, 144, 73, 58, 84, 82, 39, 56, 60, 71, 46, 49, 58, 95, 41, 44, 43, 110, 51, 46, 63, 59, 48, 42, 34, 75, 56, 80, 62, 62, 206, 189, 140, 768, 212, 300, 235, 187, 175, 389, 37

In [None]:

sfd ds

In [None]:


# Plot the violin plot
fig = go.Figure(layout=get_plotly_layout(width=width, height=height))

# Add traces for both DataFrames
for chain in chains:
    binned_data = swap_amounts[chain]
    fig.add_trace(go.Violin(x==binned_data['bins'], y=binned_data['count'], box_visible=True, line_color=plot_settings[chain]['color'], name=plot_settings[chain]['label']))

fig.update_layout(
    title="Distribution of Swap Volume (USD)",
    showlegend=False,
    yaxis_title="Volume (USD)",
    yaxis_tickformat='.23'#'.2f'
)

# Save the plot as a PDF file
fig.write_image(plots_dir + "amount_usd_distribution_violin_aa.pdf")

# Show the plot
fig.show()


In [None]:
del swap_amounts

In [None]:
import polars as pl
import plotly.graph_objects as go

# Assuming chains, file_settings, and plot_settings are already defined

# Dictionary to store pre-processed and aggregated amount_usd data
processed_amounts = {}

for chain in chains:
    file_name = file_settings[chain]['file_name']
    
    # Read and collect data using Polars
    swaps_df = pl.scan_parquet(file_name).collect()
    
    # Calculate Q1, Q3, and IQR
    Q1 = swaps_df['amount_USD'].quantile(0.25)
    Q3 = swaps_df['amount_USD'].quantile(0.75)
    IQR = Q3 - Q1

    # Define bounds for outlier removal
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Filter out outliers
    filtered_swaps_df = swaps_df.filter(
        (pl.col('amount_USD') >= lower_bound) & (pl.col('amount_USD') <= upper_bound)
    )
    
    # Bin the amount_USD values by 1.0
    bins = pl.cut(filtered_swaps_df['amount_USD'], bins=range(int(filtered_swaps_df['amount_USD'].min()), int(filtered_swaps_df['amount_USD'].max()) + 2), labels=False)
    
    # Aggregate the data by bins
    binned_data = filtered_swaps_df.with_column(bins.alias('bins')).groupby('bins').agg([
        pl.col('amount_USD').count().alias('count')
    ])
    
    # Convert to list and store in dictionary
    processed_amounts[chain] = binned_data.to_dict(as_series=False)

# Define the layout dimensions
width = 800
height = 600
plots_dir = './'  # Specify your plots directory

# Create the figure with optimized layout
fig = go.Figure(layout=go.Layout(width=width, height=height))

# Add traces for each chain
for chain in chains:
    binned_data = processed_amounts[chain]  # Access pre-processed and aggregated data
    settings = plot_settings[chain]        # Access plot settings
    fig.add_trace(go.Bar(
        x=binned_data['bins'],
        y=binned_data['count'],
        marker_color=settings['color'],
        name=settings['label']
    ))

# Update layout to hide legend and format y-axis to scientific notation
fig.update_layout(
    title="Distribution of Swap Volume (USD)",
    showlegend=False,
    xaxis_title="Volume (USD) Bins",
    yaxis_title="Count",
    yaxis_tickformat='.2e'  # Scientific notation
)

# Save the plot as a PDF file
fig.write_image(f"{plots_dir}amount_usd_distribution_bar.pdf")

# Show the plot
fig.show()


In [None]:
# Assuming swaps_df1 and swaps_df2 are already defined and are Polars DataFrames

def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df.filter((df[column] >= lower_bound) & (df[column] <= upper_bound))

# Remove outliers from both DataFrames
swaps_df1 = remove_outliers(swaps_df1, 'amount_USD')
swaps_df2 = remove_outliers(swaps_df2, 'amount_USD')

In [None]:
sfdd fwedfa

In [None]:
file_name = "./data/swaps-base-Uniswap-v3-WETH-USDC.paraquet"
swaps_df = pl.scan_parquet(file_name).collect(streaming=True)
swaps_df1 = swaps_df

In [None]:
swaps_df['amount_USD']

In [None]:
# Calculate the first and third quartiles (Q1 and Q3) for 'amount_USD'
Q1 = swaps_df['amount_USD'].quantile(0.25)
Q3 = swaps_df['amount_USD'].quantile(0.75)

# Calculate the interquartile range (IQR)
IQR = Q3 - Q1

# Define the lower and upper bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Filter out the outliers
cleaned_swaps_df = swaps_df.filter((swaps_df['amount_USD'] >= lower_bound) & (swaps_df['amount_USD'] <= upper_bound))

# Convert the cleaned 'amount_USD' column to a list
amount_usd_list_cleaned = cleaned_swaps_df['amount_USD'].to_list()

# Define the layout dimensions
width = 800
height = 600
plots_dir = './'  # Specify your plots directory
chain = 'chain_name'  # Specify your chain name

# Plot the histogram
fig = go.Figure(layout=go.Layout(width=width, height=height))

fig.add_trace(go.Histogram(x=amount_usd_list_cleaned, marker_color='blue', name='Amount USD'))

fig.update_layout(
    title="Distribution of Amount USD (Without Outliers)",
    xaxis_title="Amount USD",
    yaxis_title="Count",
    bargap=0.2,
)

# Save the plot as a PDF file
fig.write_image(plots_dir + "amount_usd_distribution_" + chain + ".pdf")

# Show the plot
fig.show()


In [None]:
file_name = "./data/swaps-ethereum-Uniswap-v3-WETH-USDC.paraquet"
swaps_df = pl.scan_parquet(file_name).collect(streaming=True)
swaps_df2 = swaps_df

In [None]:

# Calculate the first and third quartiles (Q1 and Q3) for 'amount_USD'
Q1 = swaps_df['amount_USD'].quantile(0.25)
Q3 = swaps_df['amount_USD'].quantile(0.75)

# Calculate the interquartile range (IQR)
IQR = Q3 - Q1

# Define the lower and upper bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Filter out the outliers
swaps_df = swaps_df.filter((swaps_df['amount_USD'] >= lower_bound) & (swaps_df['amount_USD'] <= upper_bound))

# Convert the cleaned 'amount_USD' column to a list
#amount_usd_list_cleaned = cleaned_swaps_df['amount_USD'].to_list()

# Define the layout dimensions
width = 800
height = 600
plots_dir = './'  # Specify your plots directory
chain = 'chain_name'  # Specify your chain name

# Plot the histogram
fig = go.Figure(layout=go.Layout(width=width, height=height))

fig.add_trace(go.Histogram(x=swaps_df['amount_USD'], marker_color='blue', name='Amount USD'))

fig.update_layout(
    title="Distribution of Amount USD (Without Outliers)",
    xaxis_title="Amount USD",
    yaxis_title="Count",
    bargap=0.2,
)

# Save the plot as a PDF file
fig.write_image(plots_dir + "amount_usd_distribution_" + chain + ".pdf")

# Show the plot
fig.show()


In [None]:
# Define the layout dimensions
width = 800
height = 600
plots_dir = './'  # Specify your plots directory
chain = 'chain_name'  # Specify your chain name

# Plot the violin plot
fig = go.Figure(layout=go.Layout(width=width, height=height))

fig.add_trace(go.Violin(y=swaps_df['amount_USD'], box_visible=True, line_color='blue', name='Amount USD'))

fig.update_layout(
    title="Distribution of Amount USD (Without Outliers)",
    xaxis_title="Amount USD",
    yaxis_title="Density",
    yaxis_tickformat='.2f'
)

# Save the plot as a PDF file
fig.write_image(plots_dir + "amount_usd_distribution_violin_" + chain + ".pdf")

# Show the plot
fig.show()


In [None]:


# Assuming swaps_df1 and swaps_df2 are already defined and are Polars DataFrames

def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df.filter((df[column] >= lower_bound) & (df[column] <= upper_bound))

# Remove outliers from both DataFrames
swaps_df1 = remove_outliers(swaps_df1, 'amount_USD')
swaps_df2 = remove_outliers(swaps_df2, 'amount_USD')

# Define the layout dimensions
width = 800
height = 600
plots_dir = './'  # Specify your plots directory
chain = 'chain_name'  # Specify your chain name

# Plot the violin plot
fig = go.Figure(layout=go.Layout(width=width, height=height))

# Add traces for both DataFrames
fig.add_trace(go.Violin(y=swaps_df1['amount_USD'], box_visible=True, line_color='blue', name='Amount USD - DataFrame 1'))
fig.add_trace(go.Violin(y=swaps_df2['amount_USD'], box_visible=True, line_color='green', name='Amount USD - DataFrame 2'))

fig.update_layout(
    title="Distribution of Amount USD (Without Outliers)",
    xaxis_title="DataFrames",
    yaxis_title="Amount USD",
    yaxis_tickformat='.2f'
)

# Save the plot as a PDF file
fig.write_image(plots_dir + "amount_usd_distribution_violin_" + chain + ".pdf")

# Show the plot
fig.show()


In [None]:
del swaps_df1
del swaps_df2

In [None]:
#Top Operations
chain = chains[0]
data = inscriptions_groups_dict[chain]
value_counts = get_top_operations(data)

# Plot the bar chart
fig = go.Figure(layout=get_plotly_layout(width=width, height=height))

fig.add_trace(go.Bar(x=value_counts['op'], y=value_counts['count'],
              marker_color=colors['blue'], textposition='auto', text=value_counts['count'].round(2), name='Operation'))
fig.update_layout(yaxis_title="Percentage",
                  xaxis_title="Operation", yaxis_ticksuffix="%")

fig.update_traces(
    texttemplate='<b>%{text:,.4}</b>', textfont_size=18)
fig.update_yaxes(range=[0, 100])

fig.write_image(plots_dir+"top-15-operation-"+chain+".pdf")

fig.show()
