In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import datetime

In [2]:
raw_data = pd.read_csv("gen/userStats.csv")
raw_data["First Tx Date"] = pd.to_datetime(raw_data["First Tx Date"])
raw_data["Last Tx Date"] = pd.to_datetime(raw_data["Last Tx Date"])


In [3]:
MIN_TXS = 2
START_DATE = datetime.datetime(2024, 9, 26)
# -------------------------------------------------------------------------------------------------
data = raw_data[raw_data["Total # Txs"] > MIN_TXS][raw_data["Last Tx Date"] >= START_DATE]
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1053 entries, 1 to 3820
Data columns (total 17 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Address                    1053 non-null   object        
 1   Username                   1050 non-null   object        
 2   First Tx Date              1053 non-null   datetime64[ns]
 3   Last Tx Date               1053 non-null   datetime64[ns]
 4   Total # Txs                1053 non-null   int64         
 5   Total Fees (wei)           1053 non-null   int64         
 6   Total Fees (Eth)           1053 non-null   float64       
 7   Avg Total Fees (wei)       1053 non-null   int64         
 8   Avg Total Fees (Eth)       1053 non-null   float64       
 9   Total L1 Fees (wei)        1053 non-null   int64         
 10  Total L1 Fees (Eth)        1053 non-null   float64       
 11  Total L2 Fees (wei)        1053 non-null   int64         
 12  Total L2 Fe

  data = raw_data[raw_data["Total # Txs"] > MIN_TXS][raw_data["Last Tx Date"] >= START_DATE]


In [4]:
def format_short_scale(num, decimals=2):
    n = float(num)

    if n < 1e9:
        return "< 1B"
    elif n < 1e12:
        return f"{n/1e9:.{decimals}f}B"
    elif n < 1e14:
        return f"{n/1e12:.{decimals}f}T"
    else:
        return "> 100T"

In [5]:
mean_val = data["Avg Total Fees (wei)"].mean()
formatted_mean = format_short_scale(mean_val)
mean_val_eth = mean_val / 1e18
formatted_mean_eth = f"{mean_val_eth:.8f} ETH"
median_val = data["Avg Total Fees (wei)"].median()
formatted_median = format_short_scale(median_val)
median_val_eth = median_val / 1e18
formatted_median_eth = f"{median_val_eth:.8f} ETH"
mode_val = data["Avg Total Fees (wei)"].mode()[0]
formatted_mode = format_short_scale(mode_val)
mode_val_eth = mode_val / 1e18
formatted_mode_eth = f"{mode_val_eth:.8f} ETH"
fig = px.histogram(data, x="Avg Total Fees (wei)", nbins=10000,text_auto=True,
    # range_x=[0, 1e12]
)
fig.add_vline(x=mean_val,line_color="red",line_width=2,line_dash="dash",annotation_text="Mean",annotation_position="top right")
fig.add_vline(x=median_val,line_color="purple",line_width=2,line_dash="dot",annotation_text="Median",annotation_position="top right")
fig.add_vline(x=mode_val,line_color="green",line_width=2,line_dash="dashdot",annotation_text="Mode",annotation_position="top right")

fig.add_annotation(
    x=1,
    y=1,
    xref="paper",
    yref="paper",
    showarrow=False,
    align="left",
    bordercolor="black",
    borderwidth=1,
    borderpad=4,
    text=(
        f"<b>Stats</b><br>"
        f"Mean:   {formatted_mean} ({formatted_mean_eth})<br>"
        f"Median: {formatted_median} ({formatted_median_eth})<br>"
        f"Mode:   {formatted_mode} ({formatted_mode_eth})"
    )
)
fig.update_traces(
    marker_line_width=1,
    marker_line_color="black"
)

fig.show()

In [6]:
mean_val = data["Total Fees (wei)"].mean()
formatted_mean = format_short_scale(mean_val)
mean_val_eth = mean_val / 1e18
formatted_mean_eth = f"{mean_val_eth:.8f} ETH"
median_val = data["Total Fees (wei)"].median()
formatted_median = format_short_scale(median_val)
median_val_eth = median_val / 1e18
formatted_median_eth = f"{median_val_eth:.8f} ETH"
mode_val = data["Total Fees (wei)"].mode()[0]
formatted_mode = format_short_scale(mode_val)
mode_val_eth = mode_val / 1e18
formatted_mode_eth = f"{mode_val_eth:.8f} ETH"
fig = px.histogram(data, x="Total Fees (wei)", nbins=10000,text_auto=True,
    range_x=[0.00030 * 1e18, data["Total Fees (wei)"].max()]
)
fig.add_vline(x=mean_val,line_color="red",line_width=2,line_dash="dash",annotation_text="Mean",annotation_position="top left")
fig.add_vline(x=median_val,line_color="purple",line_width=2,line_dash="dot",annotation_text="Median",annotation_position="top right")
fig.add_vline(x=mode_val,line_color="green",line_width=2,line_dash="dashdot",annotation_text="Mode",annotation_position="top left")

fig.add_annotation(
    x=1,
    y=1,
    xref="paper",
    yref="paper",
    showarrow=False,
    align="left",
    bordercolor="black",
    borderwidth=1,
    borderpad=4,
    text=(
        f"<b>Stats</b><br>"
        f"Mean:   {formatted_mean} ({formatted_mean_eth})<br>"
        f"Median: {formatted_median} ({formatted_median_eth})<br>"
        f"Mode:   {formatted_mode} ({formatted_mode_eth})"
    )
)
fig.update_traces(
    marker_line_width=1,
    marker_line_color="black"
)

fig.show()

In [7]:
# Based on Jan 25, 1 USD = 0.00030 eth
one_usd_dollar_equivalent_wei = 0.00030 * 1e18
ten_usd_dollar_equivalent_wei = 10 * one_usd_dollar_equivalent_wei

# Print total number of users, number of rows
print("Total number of users: ", data.count()[0])

# print # of users who have fees > 1 USD
print("Number of users who have total fees > 1 USD: ", data[data["Total Fees (wei)"] >= one_usd_dollar_equivalent_wei].count()[0])

# print # of users who have fees > 1 USD < 10 USD
print("Number of users who have total fees > 1 USD and < 10 USD: ", data[data["Total Fees (wei)"] >= one_usd_dollar_equivalent_wei][data["Total Fees (wei)"] <= ten_usd_dollar_equivalent_wei].count()[0])




Total number of users:  1053
Number of users who have total fees > 1 USD:  419
Number of users who have total fees > 1 USD and < 10 USD:  250



Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Boolean Series key will be reindexed to match DataFrame index.


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`

