In [165]:
import random
import pandas as pd
import numpy as np
import panel as pn
import plotly.express as px
import plotly.graph_objects as go
from panel.interact import interact, fixed
from NormalPopulation import NormalPopulation
pn.extension("plotly")

In [166]:
def make_pop(mean, stdev, n=10):
    population = NormalPopulation(mean, stdev, n)
    pop = pd.DataFrame(population.pop, columns = ["Population"])
    pop_parameters = pd.DataFrame(columns = ["Mean", "Standard Deviation", "Variance", "N"])
    
    pop_parameters["Mean"] = np.mean(pop)
    pop_parameters["Variance"] = np.var(pop)
    pop_parameters["Standard Deviation"] = np.std(pop)
    pop_parameters["N"] = len(pop.index)
    
    box = px.box(pop, range_y=(0,100), labels = None, title=None, template= "simple_white")
    hist = px.histogram(pop, x="Population", range_x=(0,100), nbins = 40, template= "simple_white").update_layout(bargap=0.4)
    pop_dist_column = pn.Column("## Population Distribution", hist)
    pop_box_column = pn.Column("## Population Box Plot", box)
    pop_param_column = pn.Column("## Population Parameters", pop_parameters, width=450)
    
    
    row = pn.Row(pop_dist_column,pop_box_column, pop_param_column, width=2000)
    
    return row

In [167]:
def make_samp(mean, stdev, n, sample_size):
    
    population = NormalPopulation(mean, stdev, n)
    
    pop = pd.DataFrame(population.pop, columns = ["Population"])
    
    
    sample_info = population.get_sample(sample_size)
    sample = sample_info['sample']
    sample = pd.DataFrame(sample, columns = ["Sample"])
    
    
    sample_parameters = pd.DataFrame(columns = ["Mean", "Standard Deviation", "Variance", "n", "lower_95", "upper_95"])
    sample_parameters["Mean"] = [sample_info["mean"]]
    sample_parameters["Standard Deviation"] = [sample_info["stdev"]]
    sample_parameters["Variance"] = [sample_info["unbi-var"]]
    sample_parameters["n"] = [sample_info["n"]]
    sample_parameters["lower_95"] = [(sample_info["mean"] - (1.96*(sample_info["stdev"]/(np.sqrt(sample_info["n"])))))]
    sample_parameters["upper_95"] = [(sample_info["mean"] + (1.96*(sample_info["stdev"]/(np.sqrt(sample_info["n"])))))]
    
    
    pop_parameters = pd.DataFrame(columns = ["Mean", "Standard Deviation", "Variance", "N"])
    pop_parameters["Mean"] = np.mean(pop)
    pop_parameters["Variance"] = np.var(pop)
    pop_parameters["Standard Deviation"] = np.std(pop)
    pop_parameters["N"] = len(pop.index)

    
    hist_pop = px.histogram(pop, x="Population", range_x=(0,100), nbins = 40, template= "simple_white").update_layout(bargap=0.4)
    hist_pop.add_vline(pop_parameters["Mean"][0], line_width=3, line_color="purple", opacity = 1)
    
    hist_sample = px.histogram(sample, x="Sample", range_x=(0,100), nbins = 40, template= "simple_white").update_layout(bargap=0.4)
    hist_sample.add_vrect(x0=sample_parameters["lower_95"][0], x1=sample_parameters["upper_95"][0], line_width=0, fillcolor="purple", opacity=0.2)
    hist_sample.add_vline(pop_parameters["Mean"][0], line_width=3, line_color="purple", opacity = 1)
    
    pop_dist_column = pn.Column("## Population Distribution", hist_pop)
    sample_dist_column = pn.Column("## Sample Distribution", hist_sample)
    param_column = pn.Column("## Population Parameters", pop_parameters,"## Sample Statistics", sample_parameters, width=450)
    
    row = pn.Row(pop_dist_column,sample_dist_column, param_column, width=2000)
    return row

In [168]:
def make_samp_dist(mean, stdev, n=10, samples=1000):
    sample_size = 15
    population = NormalPopulation(mean, stdev, n)
    pop = pd.DataFrame(population.pop, columns = ["Population"])
    samp_dist= pd.DataFrame(population.get_sampling_distribution(samples, sample_size), columns = ["Sample Means"])
    
    pop_parameters = pd.DataFrame(columns = ["Mean", "Standard Deviation", "Variance", "N"])
    samp_dist_parameters = pd.DataFrame(columns = ["Mean", "Standard Deviation", "Variance", "Number of Samples"])
    
    pop_parameters["Mean"] = np.mean(pop)
    pop_parameters["Variance"] = np.var(pop)
    pop_parameters["Standard Deviation"] = np.std(pop)
    pop_parameters["N"] = len(pop.index)
    
    samp_dist_parameters["Mean"] = np.mean(samp_dist)
    samp_dist_parameters["Variance"] = np.var(samp_dist)
    samp_dist_parameters["Standard Deviation"] = np.std(samp_dist)
    samp_dist_parameters["Number of Samples"] = len(samp_dist.index)
    samp_dist_parameters["Sample Size"] = sample_size
    
    hist_pop = px.histogram(pop, x="Population", range_x=(0,100), nbins = 20, template= "simple_white").update_layout(bargap=0.4)
    hist_pop.add_vline(pop_parameters["Mean"][0], line_width=3, line_color="purple", opacity = 1)
    hist_samp_dist = px.histogram(samp_dist, x="Sample Means", range_x=(0,100), nbins = 20, template= "simple_white").update_layout(bargap=0.4)
    
    pop_dist_column = pn.Column("## Population Distribution", hist_pop)
    samp_dist_column = pn.Column("## Sampling Distribution", hist_samp_dist)
    param_column = pn.Column("## Population Parameters", pop_parameters,"## Sampling Distribution Statistics",samp_dist_parameters, width=450)
    
    
    row = pn.Row(pop_dist_column,samp_dist_column, param_column, width=2000)
    
    return row

In [169]:
def make_samps(mean, stdev, n=10, sample_size = 30):
    
    #set population
    population = NormalPopulation(50, 10, 1000)

    #add population into dataframe
    pop = pd.DataFrame(population.pop, columns = ["Population"])

    #get sample
    samples = pd.DataFrame()
    for i in range(100):
        sample_info = population.get_sample(sample_size)
        sample = sample_info['sample']
        samples[f"Sample {i}"] = sample

    sample_parameters = pd.DataFrame(columns = ["Mean", "Standard Deviation", "Variance", "n", "lower_95", "upper_95"])
    sample_parameters["Mean"] = np.mean(samples)
    sample_parameters["Standard Deviation"] = np.std(samples, ddof=1)
    sample_parameters["Variance"] = np.var(samples, ddof=1)
    sample_parameters["n"] = len(samples.index)
    sample_parameters["lower_95"] = sample_parameters["Mean"]-((sample_parameters["Standard Deviation"]/(np.sqrt(sample_parameters["n"])))*1.96)
    sample_parameters["upper_95"] = sample_parameters["Mean"]+((sample_parameters["Standard Deviation"]/(np.sqrt(sample_parameters["n"])))*1.96)
    
    
    pop_parameters = pd.DataFrame(columns = ["Mean", "Standard Deviation", "Variance", "N"])
    pop_parameters["Mean"] = np.mean(pop)
    pop_parameters["Variance"] = np.var(pop)
    pop_parameters["Standard Deviation"] = np.std(pop)
    pop_parameters["N"] = len(pop.index)
    
    fig= go.Figure()
    fig.add_trace(go.Histogram(x=pop["Population"], opacity= 0.10))
    fig.update_xaxes(range=(0,100))
    fig.update_yaxes(range=(0,100))
    fig.update_layout(template="simple_white")
    fig.add_vline(pop_parameters["Mean"][0], line_width=3, line_color="purple", opacity = 1)
    
    hist_pop = px.histogram(pop, x="Population", range_x=(0,100), nbins = 40, template= "simple_white").update_layout(bargap=0.4)
    hist_pop.add_vline(pop_parameters["Mean"][0], line_width=3, line_color="purple", opacity = 1)

    
    
    y_stepper = 90/len(sample_parameters.index)
    y_level = y_stepper
    
    for x in range(len(sample_parameters.index)):
        lower = sample_parameters.iloc[x, 4]
        upper = sample_parameters.iloc[x, 5]
        fig.add_shape(type="line", x0=lower, y0=y_level, x1=upper, y1=y_level, opacity = 1,line=dict(color="RoyalBlue",width=3))
        y_level += y_stepper
    
    pop_dist_column = pn.Column("## Population Distribution", hist_pop)
    conf_column = pn.Column('## Confidence Intervals', fig)
    param_column = pn.Column("## Population Parameters", pop_parameters,"## Sample Statistics", sample_parameters, width=450)
    
    row = pn.Row(pop_dist_column,conf_column, param_column, width=2000)
    return row

In [170]:
dash = pn.interact(make_pop, mean=fixed(50), stdev=fixed(10), n= pn.widgets.IntSlider(name = "Population Size", value=100, start=100, end=20000, step=100), throttled=True)
dash2 = pn.interact(make_samp, mean=fixed(50), stdev=fixed(10), n= fixed(1000), sample_size = pn.widgets.IntSlider(name = "Sample Size", value=30, start=1, end=100),  throttled=False)
dash3 = pn.interact(make_samp_dist, mean=fixed(50), stdev=pn.widgets.IntSlider(name = "Population Standard Deviation", value=5, start=1, end=10), n= pn.widgets.IntSlider(name = "Population Size", value=100, start=100, end=20000, step=100), samples= pn.widgets.IntSlider(name = "Number of Samples", value=100, start=100, end=20000, step=100),  throttled=False)
dash4 = pn.interact(make_samps, mean=fixed(50),stdev=fixed(10), n= fixed(1000),sample_size = pn.widgets.IntSlider(name = "Sample Size", value=30, start=1, end=100), throttled=True)

In [171]:
tabs = pn.Tabs(("Population",dash), ("Sample", dash2), ("Sample Distribution", dash3), ("Samples", dash4))

In [172]:
tabs

In [128]:
tabs.show(title="Welcome to Inferential Statistics")

Launching server at http://localhost:63339


<bokeh.server.server.Server at 0x25ed22a45c8>

In [62]:
population = NormalPopulation(50, 10, 1000)

#add population into dataframe
pop = pd.DataFrame(population.pop, columns = ["Population"])

#get sample
samples = pd.DataFrame()
for i in range(5):
    sample_info = population.get_sample(30)
    sample = sample_info['sample']
    samples[f"Sample {i}"] = sample

sample_parameters = pd.DataFrame(columns = ["Mean", "Standard Deviation", "Variance", "n", "lower_95", "upper_95"])
sample_parameters["Mean"] = np.mean(samples)
sample_parameters["Standard Deviation"] = np.std(samples, ddof=1)
sample_parameters["Variance"] = np.var(samples, ddof=1)
sample_parameters["n"] = len(samples.index)
sample_parameters["lower_95"] = sample_parameters["Mean"]-((sample_parameters["Standard Deviation"]/(np.sqrt(sample_parameters["n"])))*1.96)
sample_parameters["upper_95"] = sample_parameters["Mean"]+((sample_parameters["Standard Deviation"]/(np.sqrt(sample_parameters["n"])))*1.96)


pop_parameters = pd.DataFrame(columns = ["Mean", "Standard Deviation", "Variance", "N"])
pop_parameters["Mean"] = np.mean(pop)
pop_parameters["Variance"] = np.var(pop)
pop_parameters["Standard Deviation"] = np.std(pop)
pop_parameters["N"] = len(pop.index)

In [63]:
samples

Unnamed: 0,Sample 0,Sample 1,Sample 2,Sample 3,Sample 4
0,41.094668,71.393281,48.522544,72.832082,46.847137
1,47.632578,16.289031,53.846803,48.801979,41.252052
2,48.800046,49.665329,33.896882,51.096219,38.045605
3,39.916663,43.329671,38.195053,62.254401,43.484733
4,52.172808,43.219766,51.641025,29.254272,46.545055
5,54.591486,51.0617,33.01483,42.794694,55.728461
6,58.319006,37.395034,31.926254,41.094668,47.765152
7,43.223406,50.659731,47.87109,28.038217,48.135817
8,57.240387,36.186755,46.009578,49.130938,37.757166
9,62.158378,45.280663,46.409841,54.328999,52.826178
