In [None]:
import numpy as np
import matplotlib.pyplot as plt

# generate n iid samples from a uniform distribution on 0,1 and plot them
n = 10000
U = np.random.uniform(0, 1, n)
plt.hist(U, bins=30, density=True)

In [ ]:
# get an empirical distribution function
def ecdf(x):
    x = np.sort(x)
    n = len(x)
    y = np.arange(1, n+1) / n
    return x, y

# plot the empirical distribution function
x, y = ecdf(U)
plt.plot(x, y, marker='.', linestyle='none')
plt.show()

In [ ]:
# use scipy.stats.ecdf
from scipy.stats import uniform

x = uniform.rvs(size=n)
x, y = ecdf(x)
plt.plot(x, y, marker='.', linestyle='none')

In [ ]:
# conduct kolmogorov smirnov test to compare the empirical distribution function with the uniform distribution
from scipy.stats import kstest

kstest_result = kstest(x, 'uniform')
print(kstest_result)

if kstest_result[1] < 0.05:
    print('The null hypothesis that the sample comes from a uniform distribution is rejected')
else:
    print('The null hypothesis that the sample comes from a uniform distribution is not rejected')

In [ ]:
# repeat sampling from uniform distribution and conduct the test
n = 100
rows = 10000
x = uniform.rvs(size=(rows, n))

# perform ks test for each sample
kstest_results = np.array([kstest(x[i], 'uniform')[0] for i in range(rows)])

# plot the kstest values
plt.hist(kstest_results, bins=30, density=True)
plt.show()

In [ ]:
# generate cauchy distribution using tangens
n = 1000
U = np.random.uniform(0, 1, n)
X = np.tan(np.pi * (U - 0.5))

# scale the histogram better
plt.hist(X, bins=30, density=True, range=(-10, 10))

# plot the theoretical density together with the histogram
x = np.linspace(-10, 10, 1000) # cut off only to -10 to 10 
y = 1 / (np.pi * (1 + x**2))
plt.plot(x, y, color='red')
plt.show()

# Cauchy distribution has a very heavy tails

In [ ]:
num_of_repeats = 1000000
n = 100
X = np.random.standard_cauchy((num_of_repeats, n))
# get the mean of each sample
X_mean = np.mean(X, axis=1)
# plot the mean of each sample
plt.hist(X_mean, bins=30, density=True, range=(-5, 5))

# it's a cauchy distribution, surprising (!!)
# Central limit theorem doesn't work here, it's a stable distribution

In [ ]:
# draw a sample from a normal distribution
n = 100
num_of_repeats = 10000
X = np.random.normal(0, 1, (num_of_repeats, n))
X_mean = np.mean(X, axis=1)
plt.hist(X_mean, bins=30, density=True)

# draw the theoretical distribution
y = np.random.normal(0, 1 / n, num_of_repeats)
plt.hist(y, bins=30, density=True, alpha=0.5)
plt.show()

In [ ]:
# do the experiment for a uniform distribution
n = 12
num_of_repeats = 10000
X = np.random.uniform(0, 1, (num_of_repeats, n))
X_mean = np.mean(X, axis=1)
plt.hist(X_mean, bins=30, density=True)

In [ ]:
# draw n bernoulli samples
n = 100
p = 0.5

# repeat the random walk multiple times and plot it
num_of_repeats = 100
Y_cumsums = np.cumsum(np.random.binomial(1, p, (num_of_repeats, n)) * 2 - 1, axis=1)
plt.plot(Y_cumsums.T, color='black', alpha=0.1)

In [ ]:
# for how long is the first player winning
Y_means = np.mean(Y_cumsums > 0, axis= 1)

# plot the histogram
plt.hist(Y_means, bins=30, density=True)
plt.show()

# it should be a beta distribution with some parameters
# Z ~ Beta(1/2, 1/2)
# this theorem holds for distributions with Expected value of 0 and distributions that have variance

In [ ]:
# Let's illustrate the theorem with Exponential distribution
n = 1000
num_of_repeats = 10000
X = np.random.exponential(1, (num_of_repeats, n)) - 1
X_mean = np.mean(np.cumsum(X, axis=1) > 0, axis=1)
plt.hist(X_mean, bins=30, density=True)
plt.show()

In [ ]:
# Uniform distribution in a circle using a uniform distribution in a square
n = 1000
U = np.random.uniform(-1, 1, (n, 2))
U = U[U[:, 0]**2 + U[:, 1]**2 < 1]
plt.scatter(U[:, 0], U[:, 1])
plt.show()

In [ ]:
# generate a distribution of a function of distance
U_dis = np.linalg.norm(U, axis=1)
plt.hist(U_dis, bins=30, density=True)
plt.show()