In [1]:
import polars as pl
import altair as alt


In [2]:
df = pl.read_parquet('solana_arbs.parquet')

In [3]:
df.head()

sig,slot,date_cet,in_bundle,jito_tip_sol,revenue_sol,signer,program
str,u64,date,bool,f64,f64,str,str
"""W3sXWjdkB4zeNUQXC2n3PPM95N3pkz…",287041445,2024-09-01,True,79.19886,395.994322,"""benRLpbWCL8P8t51ufYt522419hGF5…","""Evo1ve6p41CUZSdh7WCofrStMdhzUK…"
"""eqtqJ1ZabjaMf1uggVMdJYCpxLpEtH…",292044925,2024-09-26,False,,97.352975,"""7dGrdJRYtsNR8UYxZ3TnifXGjGc9eR…","""JUP6LkbZbjS1jKKwapdHNy74zcZ3tL…"
"""48vDxwKqzKN47vZ5Lys6P9K6EmQiFy…",292173428,2024-09-26,True,10.0,90.209841,"""GuiU6MpLahPHSHYcsfSRjwLUm1AtZ9…","""JUP6LkbZbjS1jKKwapdHNy74zcZ3tL…"
"""3MTDD9Y2VHd1EZ4w8zcUPVqoo7ZPUa…",292882418,2024-09-30,False,,75.548641,"""HJLqkCFiNMUsXvqA9btLXFwKpWgCAX…","""E8uB9xQ8ffx3YyCbXDVKNMV2VpaHhg…"
"""Mt1srubEyC8mfzdyFa2WysZqY1oS2G…",287651958,2024-09-04,True,10.0,70.258993,"""GuiU6MpLahPHSHYcsfSRjwLUm1AtZ9…","""JUP6LkbZbjS1jKKwapdHNy74zcZ3tL…"


In [4]:
df.describe()

statistic,sig,slot,date_cet,in_bundle,jito_tip_sol,revenue_sol,signer,program
str,str,f64,str,f64,f64,f64,str,str
"""count""","""1059953""",1059953.0,"""1059953""",1059953.0,356867.0,1059953.0,"""1059953""","""1059953"""
"""null_count""","""0""",0.0,"""0""",0.0,703086.0,0.0,"""0""","""0"""
"""mean""",,290520000.0,"""2024-09-18 00:39:16.460000""",0.336682,0.014406,0.025464,,
"""std""",,1812300.0,,,0.21902,0.496252,,
"""min""","""112Fv1uZcpHHvaJD3h3S5ReDHmQq6C…",287026776.0,"""2024-09-01""",0.0,0.0,0.001,"""22222pDJQXX5kdD4poV7veS6Rtej81…","""2CSekKsHWCwcQp3eMdn77UDyTV3VLm…"
"""25%""",,288974589.0,"""2024-09-10""",,0.00075,0.001702,,
"""50%""",,290826317.0,"""2024-09-20""",,0.00161,0.00339,,
"""75%""",,292179756.0,"""2024-09-26""",,0.00439,0.009722,,
"""max""","""zzzp8WEAGPrkUfy441aXSFjRd6F1D1…",292961509.0,"""2024-09-30""",1.0,79.19886,395.994322,"""zLukaDvbxpL5Hvr3EJhot2NdYH7mfg…","""pigiKqKi9dy5kh1kXpUj5cWLFmFspu…"


In [27]:
df.select(
    pl.sum('jito_tip_sol').alias('total_tips'),
    pl.sum('revenue_sol').alias('total_revenue'),
    pl.sum('in_bundle').alias('num_in_bundle'),
    pl.n_unique('sig').alias('total_txns'),
    pl.col('jito_tip_sol').eq(0.0).count().alias('num_zero_tip'),
    pl.when('in_bundle').then('revenue_sol').sum().alias('rev_from_bundles'),
    pl.when('in_bundle').then('jito_tip_sol').sum().alias('tips_for_bundles')
).select(
    pl.all(),
    (pl.col('total_tips')/pl.col('total_revenue')).alias('total_pct_to_jito_proposer'),
    (pl.col('tips_for_bundles')/pl.col('rev_from_bundles')).alias('pct_to_proposer_when_jito'),
    (pl.col('num_in_bundle')/pl.col('total_txns')).alias('pct_in_bundle'),
    (pl.col('rev_from_bundles')/pl.col('total_revenue')).alias('pct_rev_from_bundled_txns')
)

total_tips,total_revenue,num_in_bundle,total_txns,num_zero_tip,rev_from_bundles,tips_for_bundles,total_pct_to_jito_proposer,pct_to_proposer_when_jito,pct_in_bundle,pct_rev_from_bundled_txns
f64,f64,u32,u32,u32,f64,f64,f64,f64,f64,f64
5141.15618,26990.811003,356867,1059953,356867,10800.503268,5141.15618,0.190478,0.476011,0.336682,0.400155


In [6]:
chart_data1 = (
    df.group_by('program').agg(
            pl.count('sig').alias('no_txns'),
            pl.sum('revenue_sol').alias('rev_sum'),
            pl.sum('jito_tip_sol').alias('tips_sum')
        )
)

In [7]:
(
    alt.Chart(chart_data1).mark_bar().encode(
            alt.X('program:N', sort='-y'),
            y="no_txns:Q"
        )
)

In [8]:
(
    alt.Chart(chart_data1).mark_circle().encode(
            x='no_txns:Q',
            y="rev_sum:Q"
        )
)

In [9]:
(
    alt.Chart(chart_data1).mark_circle().encode(
            x='no_txns:Q',
            y="tips_sum:Q"
        )
)

In [10]:
(
    alt.Chart(chart_data1).mark_circle().encode(
            x="tips_sum:Q",
            y='rev_sum:Q'
        )
)

In [11]:
# Three programs

(
    df.group_by('program')
    .agg(
        pl.sum('revenue_sol').alias('total_revenue'),
        pl.sum('jito_tip_sol').alias('total_tips'),
        (pl.sum('jito_tip_sol')/pl.sum('revenue_sol')).alias('pct_paid_to_proposer'),
        pl.sum('in_bundle').alias('jito_use_count'),
        pl.count('sig').alias('no_transactions'),
        (pl.sum('in_bundle')/pl.count('sig')).alias('pct_using_jito'),
        pl.n_unique('signer').alias('unique_senders')
    )
    .filter(
        (pl.col('total_tips') < 250) & (pl.col('total_revenue') > 1200)
    )
    .sort('total_revenue', descending=True)
)


program,total_revenue,total_tips,pct_paid_to_proposer,jito_use_count,no_transactions,pct_using_jito,unique_senders
str,f64,f64,f64,u32,u32,f64,u32
"""3JmzqBoDLvNTPapBGCN7x23kTE5o7z…",2493.079182,153.06605,0.061396,4072,28090,0.144963,3
"""7PWnthtTsGnSpR4JLENYVoCJ5y5Xwg…",2241.467595,0.0,0.0,0,38406,0.0,1
"""bank7GaK8LkjyrLpSZjGuXL8z7yae6…",1339.141304,25.42232,0.018984,7848,95013,0.082599,1


In [12]:
# Totals

(
    df.select(
        pl.sum('revenue_sol').alias('total_revenue'),
        pl.sum('jito_tip_sol').alias('total_tips'),
        (pl.sum('jito_tip_sol')/pl.sum('revenue_sol')).alias('pct_paid_to_proposer'),
        pl.sum('in_bundle').alias('jito_use_count'),
        pl.count('sig').alias('total_transactions'),
        (pl.sum('in_bundle')/pl.count('sig')).alias('pct_using_jito'),
        pl.n_unique('signer').alias('unique_senders')
    )
)

total_revenue,total_tips,pct_paid_to_proposer,jito_use_count,total_transactions,pct_using_jito,unique_senders
f64,f64,f64,u32,u32,f64,u32
26990.811003,5141.15618,0.190478,356867,1059953,0.336682,414


In [13]:
(
    alt.Chart(
        df
        .filter(pl.col('program') == '3JmzqBoDLvNTPapBGCN7x23kTE5o7zkQ2fQhuyU3j9x6')
        .group_by(['program', 'date_cet'])
        .agg(
            pl.n_unique('signer').alias('no_signers')
        )
    ).mark_line().encode(
            x='date_cet:T',
            y=alt.Y('no_signers:Q', scale=alt.Scale(domain=[0, 5]))
        )
)

In [14]:
chart_data_3jmz_signer = (
    df
        .filter(pl.col('program') == '3JmzqBoDLvNTPapBGCN7x23kTE5o7zkQ2fQhuyU3j9x6')
        .group_by(['signer', 'date_cet'])
        .agg(
            pl.sum('revenue_sol').alias('total_rev'),
            pl.sum('jito_tip_sol').alias('total_tips'),
            pl.sum('in_bundle').alias('count_in_bundles'),
            pl.count('sig').alias('no_txns')
        )
)

In [15]:
(
    alt.Chart(
        chart_data_3jmz_signer
    ).mark_bar().encode(
        x="date_cet",
        y="no_txns",
        color=alt.Color("signer:N").scale(scheme='dark2')
    )
)

In [16]:
(
    alt.Chart(
        chart_data_3jmz_signer
    ).mark_bar().encode(
        x="date_cet",
        y="total_rev",
        color=alt.Color("signer:N").scale(scheme='dark2')
    )
)

In [17]:
(
    alt.Chart(
        chart_data_3jmz_signer
    ).mark_bar().encode(
        x="date_cet",
        y="total_tips",
        color=alt.Color("signer:N").scale(scheme='dark2')
    )
)

In [19]:
(
    alt.Chart(
        chart_data_3jmz_signer
    ).mark_bar().encode(
        x="date_cet:T",
        y="count_in_bundles:Q",
        color=alt.Color("signer:N").scale(scheme='dark2')
    )
)