# Using sampling to construct the $t$-distribution

In this notebook, we'll explore the one sample $t$-test as an example for reconstructing the $t$-distribution.

*Import the modules for this notebook:*

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t

Next, set up the parameters for this example:

In [None]:
# number of samples
N = 5

# true mean
mu0 = 22

# mean and standard deviation of the distribution
mu = 22
sigma = 4

### Make a function to to calculate the sample

Here, we define some functions to take $N$ random samples from normally-distributed data

In [None]:
def compute_sample(mu0, mu, sigma, N):
    values = np.random.normal(mu, sigma, N)
    t = (np.mean(values)-mu0)/(np.std(values, ddof=1)/np.sqrt(N))
    return(values, t)

In [None]:
def gaussian(x, mu, sigma):
    p = (1/np.sqrt(2*np.pi*sigma**2)) * np.exp(-1*((x-mu)**2)/(2*sigma**2))
    return(p)

In [None]:
def plot_samples(mu0, mu, sigma, values,t_val):
    x = np.linspace(mu-3*sigma,mu+3*sigma,100)
    p = gaussian(x, mu,sigma)
    p_samples = gaussian(values, mu,sigma)

    plt.plot(x,p,'k-')
    plt.plot(values,p_samples,'k.',markersize=8)
    plt.title('$t=$'+str(t_val))
    plt.show()

values, t_val = compute_sample(mu0, mu, sigma, N)
plot_samples(mu0, mu, sigma, values, t_val)

### Repeating the process

What happens if we repeat this process many times? What are the distribution of $t$ values we would get?

In [None]:
trials = 10000
t_values = np.zeros((trials,))
for i in range(trials):
    t_values[i] = compute_sample(mu0, mu, sigma, N)[1]

In [None]:
df = N-1
x_t = np.linspace(-4, 4, 100)
plt.plot(x_t, t.pdf(x_t, df),'k-', label='PDF from scipy.stats')
plt.hist(t_values,density=True,bins=np.linspace(-4,4,100), label='sampled values')
plt.xlabel('t')
plt.ylabel('density')
plt.legend()
plt.show()