# COURSE: Master Python for scientific programming by solving projects
## PROJECT: The Law of Large Numbers
#### TEACHER: Mike X Cohen, sincxpress.com
##### COURSE URL: udemy.com/course/maspy_x/?couponCode=202201

In [None]:
# load modules
import numpy as np
import matplotlib.pyplot as plt

# Generate a population of random numbers

In [None]:
# simulation parameters

population_size = 2.3e5
print(population_size)

sample_size = 50
number_of_samples = 500

In [None]:
# generate population distribution

population = 1 / np.logspace(np.log10(.001),np.log10(10),int(population_size))

trueMean = np.mean(population)

plotskip = int(1e3)
plt.plot(population[::plotskip],'o')
plt.xlabel('Sample')
plt.ylabel('Data value')
plt.show()

In [None]:
# shuffle the data values

np.random.shuffle(population)
plt.plot(population[::plotskip],'o')
plt.xlabel('Sample')
plt.ylabel('Data value')
plt.show()

# Monte Carlo sampling

In [None]:
# generate one random sample

randsample = np.random.choice(population,size=sample_size)
print(np.mean(randsample))

In [None]:
# Monte Carlo sampling for sample means

samplemeans = np.zeros(number_of_samples)

for expi in range(number_of_samples):
  randsample = np.random.choice(population,size=sample_size)
  samplemeans[expi] = np.mean(randsample)


In [None]:
plt.plot(samplemeans,'ko',markerfacecolor='k',label='Sample means')
plt.plot([0,number_of_samples],[trueMean,trueMean],'r',linewidth=5,label='True mean')
plt.legend()
plt.xlabel('Sample number')
plt.ylabel('Mean value')
plt.show()

# Cumulative averaging

In [None]:
# Cumulative averaging

cumave = np.zeros(number_of_samples)
for i in range(number_of_samples):
  cumave[i] = np.mean(samplemeans[:i])

# alternative (loopless!)
cumave2 = np.cumsum(samplemeans) / np.arange(1,number_of_samples+1)


plt.plot(cumave,'ko',label='Cumulative averages')
plt.plot(cumave2,'b+',label='Cumulative averages alt.')
plt.plot([0,number_of_samples],[trueMean,trueMean],'r',linewidth=5,label='True mean')
plt.xlabel('Sample number')
plt.ylabel('Mean value')

plt.legend()
plt.show()

In [None]:

number_of_meta_samples = 100

cumaves = np.zeros((number_of_meta_samples,number_of_samples))
allsamplemeans = np.zeros((number_of_meta_samples,number_of_samples))

for metai in range(number_of_meta_samples):
  for expi in range(number_of_samples):
    randsample = np.random.choice(population,size=sample_size)
    samplemeans[expi] = np.mean(randsample)

  tmp = np.cumsum(samplemeans) / np.arange(1,number_of_samples+1)
  cumaves[metai,:] = (tmp-trueMean)**2
  allsamplemeans[metai,:] = samplemeans # added on later for CLT

# now plot
plt.plot(cumaves.T)
plt.ylim([-10,500])
plt.xlabel('Sample number')
plt.ylabel('Squared divergence from true mean')
plt.show()


In [None]:
# Bonus: The Central Limit Theorem
plt.hist(samplemeans,bins='fd')
plt.xlabel('Sample mean')
plt.ylabel('Count')
plt.show()


In [None]:
nbins = 50

x = np.zeros((nbins,number_of_meta_samples))
y = np.zeros((nbins,number_of_meta_samples))

for i in range(number_of_meta_samples):
  y,x = np.histogram(allsamplemeans[i,:],bins=nbins)
  x = (x[1:]+x[:-1])/2
  plt.plot(x,y,color=[.8,.8,.8])

plt.xlabel('Sample averages')
plt.ylabel('Counts')
plt.show()