# Eye Balling Distributions

In [None]:
using Colors
using ColorSchemes
using ColorSchemeTools
using DataFramesMeta
using Distributions
using HypothesisTests
using Random
using StatsBase
using VegaLite

In [None]:
Random.seed!(123)

In [None]:
FONT = "Fira Code"

# colorscheme = ["#368BC1", "#8A9A67", "#DDA90D", "#CC771F", "#BB4430", "#8B5260"];
colorscheme = ["#368BC1", "#DDA90D", "#BB4430", "#8A9A67", "#CC771F", "#8B5260"];

## I. Visual Inspection of Distributions

In [None]:
size = 1000
df = DataFrame(
    beta=rand(Beta(100, 100), size),
    binomial=rand(Binomial(20, 0.25), size),
    gamma=rand(Gamma(9, 0.5), size),
    gumbel=rand(Gumbel(1, 2), size),
    normal=rand(Normal(0, 1), size),
    poisson=rand(Poisson(10), size),
);

df = @chain df begin
    @transform :beta = :beta .- mean(:beta)
    @transform :binomial = :binomial .- mean(:binomial)
    @transform :poisson = :poisson .- mean(:poisson)
    @transform :gumbel = :gumbel .- mean(:gumbel)
    @transform :normal = :normal .- mean(:normal)
    @transform :poisson = :poisson .- mean(:poisson)
end;

### Strip Plot

In [None]:
data = [
    DataFrame(data=df.beta, distribution="beta"),
    DataFrame(data=df.binomial, distribution="binomial"),
    DataFrame(data=df.normal, distribution="normal"),
    DataFrame(data=df.gamma, distribution="gamma"),
    DataFrame(data=df.gumbel, distribution="gumbel"),
    DataFrame(data=df.poisson, distribution="poisson"),
]

combined_df = DataFrame()
for d in data
    append!(combined_df, d)
end

jitter_plot = combined_df |> @vlplot(
    :circle, 
    x="data:Q",
    y="distribution:N",
    yOffset= {field= "jitter", type= "quantitative"},
    color={
        "distribution:N", 
        opacity=0.125,
        scale={
            range= ["#368BC1", "#8A9A67", "#DDA90D", "#CC771F", "#BB4430", "#8B5260"]
        }
    },
    config={
            background="#202124",
            view={stroke=:transparent},
            axis={
                labelFont=FONT,
                titleFont=FONT,
                titleColor="white",
                labelColor="white",
                tickColor="white",
                gridColor="grey",
                domainColor="white",
            },
            legend={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
            title={
                font=FONT,
                subtitleFont=FONT,
                color="white",
            },
            mark={
                font=FONT,
            },
            header={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
        },
    height=300,
    width=400,
    transform=[{calculate="random()", as= "jitter"}]
)

### Hidden Plots

In [None]:
binomial_plot = @vlplot(
        mark={:bar, color=colorscheme[1], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :binomial, 
            bin={binned=false, maxbins=20}, # step=0.25
            title=nothing
        }, 
        y={"count()", title=nothing},
        title=nothing,
        height=150,
        width=200,
    ) 

beta_plot = @vlplot(
        mark={:bar, color=colorscheme[2], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :beta, 
            bin={binned=false, maxbins=20}, # step=0.25
            title=nothing
        }, 
        y={"count()", title=nothing},
        title=nothing,
        height=150,
        width=200,
    ) 

gamma_plot = @vlplot(
        mark={:bar, color=colorscheme[3], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :gamma, 
            bin={binned=false, maxbins=20}, # step=0.25
            title=nothing
        }, 
        y={"count()", title=nothing},
        title=nothing,
        height=150,
        width=200,
    ) 

gumbel_plot = @vlplot(
        mark={:bar, color=colorscheme[4], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :gumbel, 
            bin={binned=false, maxbins=20}, # step=0.25
            title=nothing
        }, 
        y={"count()", title=nothing},
        title=nothing,
        height=150,
        width=200,
    ) 

normal_plot = @vlplot(
        mark={:bar, color=colorscheme[5], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :normal, 
            bin={binned=false, maxbins=20}, # step=0.25
            title=nothing,
        }, 
        y={"count()", title=nothing},
        title=nothing,
        height=150,
        width=200,
    ) 

poisson_plot = @vlplot(
        mark={:bar, color=colorscheme[6], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :poisson, 
            bin={binned=false, maxbins=20}, # step=0.25
            title=nothing
        }, 
        y={"count()", title=nothing},
        title=nothing,
        height=150,
        width=200,
    ) 

plot_combined = df |>
    @vlplot(
        spacing=10, 
        title={text="Eye-Balling Samples from Distributions", anchor="middle"}, 
        founds=:flush, 
        config={
            background="#202124",
            view={stroke=:transparent},
            axis={
                labelFont=FONT,
                titleFont=FONT,
                titleColor="white",
                labelColor="white",
                tickColor="white",
                gridColor="grey",
                domainColor="white",
            },
            legend={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
            title={
                font=FONT,
                subtitleFont=FONT,
                color="white",
            },
            mark={
                font=FONT,
            },
            header={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
        },
        width=600,
        height=1200,
    ) + 
        [
            [binomial_plot beta_plot gamma_plot]; 
            [gumbel_plot normal_plot poisson_plot]; 
        ]

In [None]:
plot_combined |> save("outputs/eye-balling-distributions-$size.svg")

### Distributions Revealed

In [None]:
beta_plot = @vlplot(
        mark={:bar, color=colorscheme[1], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :beta, 
            bin={binned=false, maxbins=20}, # step=0.25
            title="Beta(α=100, β=100)",
        }, 
        y={"count()", title=nothing},
        height=150,
        width=200,
    ) 

binomial_plot = @vlplot(
        mark={:bar, color=colorscheme[2], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :binomial, 
            bin={binned=false, maxbins=20}, # step=0.25
            title="Binomial(n=20, π=0.25)",
        }, 
        y={"count()", title=nothing},
        height=150,
        width=200,
    ) 

gamma_plot = @vlplot(
        mark={:bar, color=colorscheme[3], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :gamma, 
            bin={binned=false, maxbins=20}, # step=0.25
            title="Gamma(k=9 , θ=0.5)",
        }, 
        y={"count()", title=nothing},
        height=150,
        width=200,
    ) 

gumbel_plot = @vlplot(
        mark={:bar, color=colorscheme[4], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :gumbel, 
            bin={binned=false, maxbins=20}, # step=0.25
            title="Gumbel(μ=1 , β=2)",
        }, 
        y={"count()", title=nothing},
        height=150,
        width=200,
    ) 

normal_plot = @vlplot(
        mark={:bar, color=colorscheme[5], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :normal, 
            bin={binned=false, maxbins=20}, # step=0.25
            title="Normal(μ=0, σ=1)",
        }, 
        y={"count()", title=nothing},
        height=150,
        width=200,
    ) 

poisson_plot = @vlplot(
        mark={:bar, color=colorscheme[6], cornerRadiusTopLeft=3, cornerRadiusTopRight=3},
        x={
            :poisson, 
            bin={binned=false, maxbins=20}, # step=0.25
            title="Poisson(λ=10)",
        },
        y={"count()", title=nothing},
        height=150,
        width=200,
    ) 

plot_combined = df |>
    @vlplot(
        spacing=10, 
        title={text="Eye-Balling Samples from Distributions", anchor="middle"}, 
        founds=:flush, 
        config={
            background="#202124",
            view={stroke=:transparent},
            axis={
                labelFont=FONT,
                titleFont=FONT,
                titleColor="white",
                labelColor="white",
                tickColor="white",
                gridColor="grey",
                domainColor="white",
            },
            legend={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
            title={
                font=FONT,
                subtitleFont=FONT,
                color="white",
            },
            mark={
                font=FONT,
            },
            header={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
        },
        width=600,
        height=1200,
    ) + 
        [
            [binomial_plot beta_plot normal_plot]; 
            [gamma_plot gumbel_plot poisson_plot]; 
        ]

In [None]:
plot_combined |> save("outputs/distributions-revealed-$size.svg")

## II Visual Inspection of Normalized Distributions

In [None]:
normalized_df = @chain df begin
    @transform :beta = StatsBase.transform(fit(ZScoreTransform, :beta, dims=1), :beta)
    @transform :binomial = StatsBase.transform(fit(ZScoreTransform, :binomial, dims=1), :binomial)
    @transform :normal = StatsBase.transform(fit(ZScoreTransform, :normal, dims=1), :normal) 
    @transform :gamma = StatsBase.transform(fit(ZScoreTransform, :gamma, dims=1), :gamma)
    @transform :gumbel = StatsBase.transform(fit(ZScoreTransform, :gumbel, dims=1), :gumbel)
    @transform :poisson = StatsBase.transform(fit(ZScoreTransform, :poisson, dims=1), :poisson)
end;

# plot_combined = normalized_df |>
#     @vlplot(spacing=15, founds=:flush, config={view={stroke=:transparent}}) + 
#         [
#             [binomial_plot beta_plot normal_plot]; 
#             [gamma_plot gumbel_plot poisson_plot]; 
#         ]


data = [
    DataFrame(data=normalized_df.beta, distribution="beta"),
    DataFrame(data=normalized_df.binomial, distribution="binomial"),
    DataFrame(data=normalized_df.normal, distribution="normal"),
    DataFrame(data=normalized_df.gamma, distribution="gamma"),
    DataFrame(data=normalized_df.gumbel, distribution="gumbel"),
    DataFrame(data=normalized_df.poisson, distribution="poisson"),
]

normalized_combined_df = DataFrame()
for d in data
    append!(normalized_combined_df, d)
end

In [None]:
normalized_combined_plot = normalized_combined_df |>
    @vlplot(
        :line,
        transform=[
            {density="data", bandwidth=0.25, groupby=["distribution"], counts=true, steps=50}
        ],
        title="Overlayed Densities",
        x={"value:q", title="Distribution"},
        y={"density:q", title="Density"},
        params=[{
            name= "moust-event",
            select= {type= "point", fields= ["distribution"]},
            bind= "legend"
        }],
        color={
            "distribution:n",
            legend={orient="top-right"},
            scale={
                range=colorscheme
            }
        },
        opacity={
          condition={param="moust-event", value= 1},
          value=0.25
        },
        padding=20,
        tooltip={:distribution},
        config={
            background="#202124",
            view={stroke=:transparent},
            axis={
                labelFont=FONT,
                titleFont=FONT,
                titleColor="white",
                labelColor="white",
                tickColor="white",
                gridColor="grey",
                domainColor="white",
            },
            legend={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
            title={
                font=FONT,
                subtitleFont=FONT,
                color="white",
            },
            mark={
                font=FONT,
            },
            header={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
        },
        height=300,
        width=600,
    )

In [None]:
normalized_combined_plot |> save("outputs/normalized-combined-$size.html")

In [None]:
jitter_plot = normalized_combined_df |> @vlplot(
    :circle, 
    transform=[{calculate="random()", as= "jitter"}],
    x="data:Q",
    y="distribution:N",
    yOffset= {field= "jitter", type= "quantitative"},
    color={"distribution:N", fillOpacity=0.0125, opacity=0.0125},
    height=300,
    width=600,
    padding=20,
    background="#202124"
)

jitter_plot = normalized_combined_df |> @vlplot(
    :circle, 
    transform=[{calculate="random()", as= "jitter"}],
    x="data:Q",
    y="distribution:N",
    yOffset= {field= "jitter", type= "quantitative"},
    color={
        "distribution:N", 
        fillOpacity=0.0125, 
        opacity=0.0125,
        scale={
            range=colorscheme
        }
    },
    height=300,
    width=600,
    padding=20,  
    config={
            background="#202124",
            view={stroke=:transparent},
            axis={
                labelFont=FONT,
                titleFont=FONT,
                titleColor="white",
                labelColor="white",
                tickColor="white",
                gridColor="grey",
                domainColor="white",
            },
            legend={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
            title={
                font=FONT,
                subtitleFont=FONT,
                color="white",
            },
            mark={
                font=FONT,
            },
            header={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
        },
)

In [None]:
normalized_combined_plot |> save("outputs/normalized-combined-jitter$size.svg")

## III. Testing if Normal

In [None]:
ExactOneSampleKSTest(normalized_df.gumbel, Normal())

In [None]:
ExactOneSampleKSTest(df.gumbel, Normal())

In [None]:
d = ExactOneSampleKSTest(df.beta, Normal())
pvalue(d, tail=:both)

In [None]:
tests = [
    [ExactOneSampleKSTest(df.beta, Normal()) |> pvalue, "beta"],
    [ExactOneSampleKSTest(df.binomial, Normal()) |> pvalue, "binomial"],
    [ExactOneSampleKSTest(df.gamma, Normal()) |> pvalue, "gamma"],
    [ExactOneSampleKSTest(df.gumbel, Normal()) |> pvalue, "gumbel"],
    [ExactOneSampleKSTest(df.normal, Normal()) |> pvalue, "normal"],
    [ExactOneSampleKSTest(df.poisson, Normal()) |> pvalue, "poisson"],
]
pvalues, distributions = zip(tests...) |> collect

ks_test_df = DataFrame(
    distribution=collect(distributions),
    pvalues=collect(pvalues)
)

## IV. Fitting Distributions & Finding Parameters with MLE

In [None]:
normal_beta = fit(Normal, df.beta)
normal_binomial = fit(Normal, df.binomial)
normal_gamma = fit(Normal, df.gamma)
normal_normal = fit(Normal, df.normal)
normal_gumbel = fit(Normal, df.gumbel)
normal_poisson = fit(Normal, df.poisson)

In [None]:
size = 1000
fitted_df = DataFrame(
    beta=rand(normal_beta, size),
    binomial=rand(normal_binomial, size),
    gamma=rand(normal_gamma, size),
    gumbel=rand(normal_gumbel, size),
    normal=rand(normal_normal, size),
    poisson=rand(normal_poisson, size),
);

data = [
    DataFrame(data=fitted_df.beta, distribution="beta"),
    DataFrame(data=fitted_df.binomial, distribution="binomial"),
    DataFrame(data=fitted_df.normal, distribution="normal"),
    DataFrame(data=fitted_df.gamma, distribution="gamma"),
    DataFrame(data=fitted_df.gumbel, distribution="gumbel"),
    DataFrame(data=fitted_df.poisson, distribution="poisson"),
]

fitted_combined_df = DataFrame()
for d in data
    append!(fitted_combined_df, d)
end

In [None]:
fitted_combined_plot = fitted_combined_df |>
    @vlplot(
        :line,
        transform=[
            {density="data", bandwidth=0.25, groupby=["distribution"], counts=true, steps=50}
        ],
        title="Fitted Normal Distributions Densities from Other Distributions",
        x={
            "value:q", 
            title="Distribution",
        },
        y={"density:q", title="Density"},
        params=[{
            name= "moust-event",
            select= {type= "point", fields= ["distribution"]},
            bind= "legend"
        }],
        color={
            "distribution:n",
            legend={orient="top-right"},
            scale={
                range=colorscheme
            }
            
        },
        opacity={
          condition={param="moust-event", value= 1},
          value=0.25
        },
        padding=20,
        tooltip={:distribution},
        height=300,
        width=600,
        config={
            background="#202124",
            view={stroke=:transparent},
            axis={
                labelFont=FONT,
                titleFont=FONT,
                titleColor="white",
                labelColor="white",
                tickColor="white",
                gridColor="grey",
                domainColor="white",
            },
            legend={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
            title={
                font=FONT,
                subtitleFont=FONT,
                color="white",
            },
            mark={
                font=FONT,
            },
            header={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
        },
    )

In [None]:
fitted_normalized_df = @chain fitted_df begin
    @transform :beta = StatsBase.transform(fit(ZScoreTransform, :beta, dims=1), :beta)
    @transform :binomial = StatsBase.transform(fit(ZScoreTransform, :binomial, dims=1), :binomial)
    @transform :normal = StatsBase.transform(fit(ZScoreTransform, :normal, dims=1), :normal) 
    @transform :gamma = StatsBase.transform(fit(ZScoreTransform, :gamma, dims=1), :gamma)
    @transform :gumbel = StatsBase.transform(fit(ZScoreTransform, :gumbel, dims=1), :gumbel)
    @transform :poisson = StatsBase.transform(fit(ZScoreTransform, :poisson, dims=1), :poisson)
end;

data = [
    DataFrame(data=fitted_normalized_df.beta, distribution="beta"),
    DataFrame(data=fitted_normalized_df.binomial, distribution="binomial"),
    DataFrame(data=fitted_normalized_df.normal, distribution="normal"),
    DataFrame(data=fitted_normalized_df.gamma, distribution="gamma"),
    DataFrame(data=fitted_normalized_df.gumbel, distribution="gumbel"),
    DataFrame(data=fitted_normalized_df.poisson, distribution="poisson"),
]

fitted_normalized_combined_df = DataFrame()
for d in data
    append!(fitted_normalized_combined_df, d)
end

In [None]:
fitted_normalized_combined = fitted_normalized_combined_df |>
    @vlplot(
        :line,
        transform=[
            {density="data", bandwidth=0.25, groupby=["distribution"], counts=true, steps=50}
        ],
        title="Fitted Normal Distributions Densities from Other Distributions",
        x={
            "value:q", 
            title="Distribution",
        },
        y={"density:q", title="Density"},
        params=[{
            name= "moust-event",
            select= {type= "point", fields= ["distribution"]},
            bind= "legend"
        }],
        color={
            "distribution:n",
            legend={orient="top-right"},
            scale={range=colorscheme}
        },
        opacity={
          condition={param="moust-event", value= 1},
          value=0.25
        },
        padding=20,
        tooltip={:distribution},
        height=300,
        width=600,
        config={ 
            background="#202124",
            view={stroke=:transparent},
            axis={
                labelFont=FONT,
                titleFont=FONT,
                titleColor="white",
                labelColor="white",
                tickColor="white",
                gridColor="grey",
                domainColor="white",
            },
            legend={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
            title={
                font=FONT,
                subtitleFont=FONT,
                color="white",
            },
            mark={
                font=FONT,
            },
            header={
                labelFont=FONT,
                titleFont=FONT,
                labelColor="white",
                titleColor="white",
            },
        },
    )

In [None]:
config=Dict(
    "background"=>"#202124",
    "view"=>Dict("stroke"=>:transparent),
    "axis"=>Dict(
        "labelFont"=>FONT,
        "titleFont"=>FONT,
        "titleColor"=>"white",
        "labelColor"=>"white",
        "tickColor"=>"white",
        "gridColor"=>"grey",
        "domainColor"=>"white",
    ),
    "legend"=>Dict(
        "labelFont"=>FONT,
        "titleFont"=>FONT,
        "labelColor"=>"white",
        "titleColor"=>"white",
    ),
    "title"=>Dict(
        "font"=>FONT,
        "subtitleFont"=>FONT,
        "color"=>"white",
    ),
    "mark"=>Dict(
        "font"=>FONT,
    ),
    "header"=>Dict(
        "labelFont"=>FONT,
        "titleFont"=>FONT,
        "labelColor"=>"white",
        "titleColor"=>"white",
    ),
)