In [1]:
import sys, os
import numpy as np
import pandas as pd

# Visualization libraries
import plotly.graph_objects as go
from plotly.subplots import make_subplots

print("Python version: {}". format(sys.version))
print("NumPy version: {}". format(np.__version__))
print("pandas version: {}". format(pd.__version__))

#ignore warnings
import warnings
warnings.filterwarnings('ignore')
print('-'*25)

Python version: 3.12.10 (main, Apr 17 2025, 03:50:21) [Clang 17.0.0 (clang-1700.0.13.3)]
NumPy version: 2.2.5
pandas version: 2.2.3
-------------------------


In [2]:
DATA_FOLDER = '../../data'

In [3]:
DATA_FILE_PATH = os.path.join(DATA_FOLDER, "distributions/real_world_distributions.csv")
df = pd.read_csv(DATA_FILE_PATH)
df.head()

Unnamed: 0,norm_height_inch,norm_weight_lbs,uniform_draw_date,uniform_winning_number,uniform_draw_position,exp_earthquake_number,exp_time_since_last_earthquake_seconds,exp_magnitude,poisson_home_team,poisson_away_team,...,poisson_goals_away,ln_employee_name,ln_job_title,ln_annual_compensation,gamma_time_delta,gamma_data_length,beta_state,beta_year,beta_gender,beta_completion_100_rate
0,65.78331,112.9925,09/24/2020,2,1,2,113004.85,4.6,Arenas De Getxo,Atletico Madrid,...,3,Scott Pitzer,Facilities Systems Specialist,90805.0,0.0,90,Arizona,2011,B,0.094
1,71.51521,136.4873,09/24/2020,5,2,3,59221.2,4.3,Espanyol Barcelona,Real Union,...,2,Pauline Oboite,NURSE,76510.0,1e-06,8,Arizona,2011,B,0.17
2,69.39874,153.0269,09/24/2020,10,3,4,98579.45,4.4,Real Madrid,Ce Europa,...,0,Dawn Fong,Special Projects Coordinator,90805.0,1e-06,85,Arizona,2011,B,0.072
3,68.2166,142.3354,09/24/2020,15,4,5,40158.81,5.2,Real Sociedad,Athletic Bilbao,...,1,Sonya Kingsland,Behavioral Health Technician,70600.0,0.000448,29,Arizona,2011,B,0.111
4,67.78781,144.2971,09/24/2020,18,5,6,75909.94,4.3,Racing Santander,FC Barcelona,...,2,Octavia Fletcher,Behavioral Health Technician,70600.0,6.1e-05,1460,Arizona,2011,B,0.131


In [None]:
fig = make_subplots(
    rows=2, cols=3,
    subplot_titles=(
        "Normal Distribution", "Uniform Distribution",
        "Exponential Distribution", "Poisson Distribution",
        "Log Normal Distribution", "Beta Distribution"
    )
)

fig.add_trace(
    go.Histogram(x=df["norm_weight_lbs"]),
    row=1, col=1
)
fig.add_trace(
    go.Histogram(x=df["uniform_winning_number"]),
    row=1, col=2
)
time_since_last_earthquake = df["exp_time_since_last_earthquake_seconds"]
time_since_last_earthquake = time_since_last_earthquake.loc[time_since_last_earthquake < 50000]
fig.add_trace(
    go.Histogram(x=time_since_last_earthquake),
    row=1, col=3
)
fig.add_trace(
    go.Histogram(x=df["poisson_goals_home"]),
    row=2, col=1
)
fig.add_trace(
    go.Histogram(x=df["ln_annual_compensation"]),
    row=2, col=2
)
fig.add_trace(
    go.Histogram(x=df["beta_completion_100_rate"]),
    row=2, col=3
)

# Rotate x-axis labels
# fig.update_xaxes(tickangle=-45)

custom_template = dict(
    # layout=go.Layout(font=dict(family="Monaspace Neon"), )
    layout=go.Layout(
        font=dict(family="Inter"),
        xaxis={
            'linecolor': 'grey', 'linewidth': 1,
        },
    )
)

# Set theme, margin, and annotation in layout
fig.update_layout(
    template=custom_template,
    margin=dict(r=50, t=50, b=50, l=60),
    showlegend=False,
)

fig.show(renderer='iframe')