In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

In [5]:
raw_data = pd.read_csv("gen/userStats.csv")
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3821 entries, 0 to 3820
Data columns (total 17 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Address                    3821 non-null   object 
 1   Username                   1992 non-null   object 
 2   First Tx Date              3821 non-null   object 
 3   Last Tx Date               3821 non-null   object 
 4   Total # Txs                3821 non-null   int64  
 5   Total Fees (wei)           3821 non-null   int64  
 6   Total Fees (Eth)           3821 non-null   float64
 7   Avg Total Fees (wei)       3821 non-null   int64  
 8   Avg Total Fees (Eth)       3821 non-null   float64
 9   Total L1 Fees (wei)        3821 non-null   int64  
 10  Total L1 Fees (Eth)        3821 non-null   float64
 11  Total L2 Fees (wei)        3821 non-null   int64  
 12  Total L2 Fees (Eth)        3821 non-null   float64
 13  Total Base Fees (wei)      3821 non-null   int64

In [6]:
MIN_TXS = 2
data = raw_data[raw_data["Total # Txs"] >= MIN_TXS]
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2178 entries, 0 to 3820
Data columns (total 17 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Address                    2178 non-null   object 
 1   Username                   1992 non-null   object 
 2   First Tx Date              2178 non-null   object 
 3   Last Tx Date               2178 non-null   object 
 4   Total # Txs                2178 non-null   int64  
 5   Total Fees (wei)           2178 non-null   int64  
 6   Total Fees (Eth)           2178 non-null   float64
 7   Avg Total Fees (wei)       2178 non-null   int64  
 8   Avg Total Fees (Eth)       2178 non-null   float64
 9   Total L1 Fees (wei)        2178 non-null   int64  
 10  Total L1 Fees (Eth)        2178 non-null   float64
 11  Total L2 Fees (wei)        2178 non-null   int64  
 12  Total L2 Fees (Eth)        2178 non-null   float64
 13  Total Base Fees (wei)      2178 non-null   int64  
 1

In [3]:
def format_short_scale(num, decimals=2):
    n = float(num)

    if n < 1e9:
        return "< 1B"
    elif n < 1e12:
        return f"{n/1e9:.{decimals}f}B"
    elif n < 1e14:
        return f"{n/1e12:.{decimals}f}T"
    else:
        return "> 100T"

In [4]:
mean_val = data["Avg Total Fees (wei)"].mean()
formatted_mean = format_short_scale(mean_val)
median_val = data["Avg Total Fees (wei)"].median()
formatted_median = format_short_scale(median_val)
mode_val = data["Avg Total Fees (wei)"].mode()[0]
formatted_mode = format_short_scale(mode_val)

fig = px.histogram(data, x="Avg Total Fees (wei)", nbins=10000,text_auto=True,
    # range_x=[0, 1e12]
)
fig.add_vline(x=mean_val,line_color="red",line_width=2,line_dash="dash",annotation_text="Mean",annotation_position="top left")
fig.add_vline(x=median_val,line_color="purple",line_width=2,line_dash="dot",annotation_text="Median",annotation_position="top right")
fig.add_vline(x=mode_val,line_color="green",line_width=2,line_dash="dashdot",annotation_text="Mode",annotation_position="top left")

fig.add_annotation(
    x=1,
    y=1,
    xref="paper",
    yref="paper",
    showarrow=False,
    align="left",
    bordercolor="black",
    borderwidth=1,
    borderpad=4,
    text=(
        f"<b>Stats</b><br>"
        f"Mean:   {formatted_mean}<br>"
        f"Median: {formatted_median}<br>"
        f"Mode:   {formatted_mode}"
    )
)
fig.update_traces(
    marker_line_width=1,
    marker_line_color="black"
)

fig.show()