In [None]:
%matplotlib inline
import numpy as np
import math
import matplotlib.pyplot as plt

### Sampling distribution

The sampling distribution represents the distribution of a point estimate based on multiple samples of the same size taken from the same population.

The sampling distribution of the sample mean, for a sufficiently large number of samples, is normal: centered around the population mean and with standard deviation equal to the standard error of the sample mean.

In this example we represent three sampling distributions from three different populations (normal, exponential and uniform) as we increase the size of the sample distribution. We will then compare the theoretical standard error to the obtained standard deviation.

In [None]:
# Generating and plotting the original populations
SIZE_POPULATION = 10000

normal_population = np.random.normal(loc = 3.21, scale = 2.44, size = SIZE_POPULATION)
uniform_population = np.random.uniform(low = -2.35, high = -1.12, size = SIZE_POPULATION)
exponential_population = np.random.exponential(scale= 1.23, size = SIZE_POPULATION)

In [None]:
fig, axarray = plt.subplots(1,3)

def plot_distribution(population, ax, label, bins=50):
    mean = np.mean(population)
    std = np.std(population)
    
    weights = np.ones_like(population)/float(len(population))
    ax.hist(population, bins=bins, weights=weights)
    ax.set_xlabel(label + ' (mean = ' + str(mean) + ', \nstd = ' + str(std) + ')')
    
    return(mean, std)
    
(mean_normal, std_normal) = plot_distribution(normal_population, axarray[0], 'normal')
(mean_uniform, std_uniform) = plot_distribution(uniform_population, axarray[1], 'uniform')
(mean_exponential, std_exponential) = plot_distribution(exponential_population, axarray[2], 'exponential')

fig.set_figwidth(16)

In [None]:
N = [100,600,1100,1600]
SIZE_SAMPLE = 50

for n in N:
    fig, axarray = plt.subplots(1,3)
    
    def sampling_distribution(population, ax, n, label):
        means = []
        for i in range(0,n):
            # Generating samples without replacement
            sample = np.random.choice(population, size = SIZE_SAMPLE, replace = False)
            means.append(np.mean(sample))
        plot_distribution(means, ax, label + ' - n = ' + str(n), 20)
    
    sampling_distribution(normal_population, axarray[0], n, 'normal')
    sampling_distribution(uniform_population, axarray[1], n, 'uniform')
    sampling_distribution(exponential_population, axarray[2], n, 'exponential')
    
    def compute_std_error(std, n):
        return std / math.sqrt(n)
    
    plt.suptitle('n = ' + str(n) + 
                 '\nstandard_error_normal = ' + str(compute_std_error(std_normal, n)) + 
                 '\nstandard_error_uniformal = ' + str(compute_std_error(std_uniform, n)) + 
                 '\nstandard_error_exponential = ' + str(compute_std_error(std_exponential, n))
                )
    
    fig.set_figwidth(16)