<a href="https://colab.research.google.com/github/mcnica89/Markov-Chains-RL-W24/blob/main/MonteCarlo_improvements.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import numpy as np

In [18]:
def my_dice_roll():
  return np.random.randint(1,6+1)

# Simplest Method

In [19]:
#%%time
total = 0
N=10**6
for i in range(N):
  total += my_dice_roll()

est_mean = total/N
print(est_mean)

3.498569


# Major Erorr Analysis Improvement: Estimate the variance and use Central Limit Theorem


In [20]:
total = 0
total_sq = 0
N=10**6
for i in range(N):
  x = my_dice_roll()
  total += x
  total_sq += x**2

est_mean = total/N
est_var = total_sq/N - (est_mean)**2  #(true answer: 3.5,35/12 ~= 2.9)


print(f"{est_mean=}")
print(f"{est_var=}")

upper_est_2SD = est_mean + 2*np.sqrt(est_var/N)
lower_est_2SD = est_mean - 2*np.sqrt(est_var/N)

print(f"{upper_est_2SD=}")
print(f"{lower_est_2SD=}")


est_mean=3.500308
est_var=2.9195359051359997
upper_est_2SD=3.5037253298963584
lower_est_2SD=3.4968906701036415


# Major Speed Improvement: Use vectors!

In [21]:
def my_vector_dice_roll(N_sample):
  return np.random.randint(1,6+1,size=N_sample)

In [22]:
#%%time
N=10**6
est_mean = np.mean(my_vector_dice_roll(N))
print(est_mean)

3.501951


# Vector Speed AND Errors


In [23]:
# same thing but as vectors
N=10**6
samples = my_vector_dice_roll(N)
est_mean = np.mean(samples)
est_var = np.var(samples)



print(f"{est_mean=}")
print(f"{est_var=}")

upper_est_2SD = est_mean + 2*np.sqrt(est_var/N)
lower_est_2SD = est_mean - 2*np.sqrt(est_var/N)

print(f"{upper_est_2SD=}")
print(f"{lower_est_2SD=}")


est_mean=3.500412
est_var=2.916051830256
upper_est_2SD=3.503827290225006
lower_est_2SD=3.4969967097749937


# Another improvement: An online algorithm for the mean

In [24]:
est_mean = 0
N=10**6
for n in range(1,N+1):
  x = my_dice_roll()
  est_mean += 1/n*(x - est_mean)

print(est_mean)

3.5026629999998957


# Online Algorithm for Mean and Variance: Welford's online algorithm

From wiki page https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm

In [25]:
# For a new value new_value, compute the new count, new mean, the new M2.
# mean accumulates the mean of the entire dataset
# M2 aggregates the squared distance from the mean
# count aggregates the number of samples seen so far
def update(existing_aggregate, new_value):
    (count, mean, M2) = existing_aggregate
    count += 1
    delta = new_value - mean
    mean += delta / count
    delta2 = new_value - mean
    M2 += delta * delta2
    return (count, mean, M2)

# Retrieve the mean, variance and sample variance from an aggregate
def finalize(existing_aggregate):
    (count, mean, M2) = existing_aggregate
    if count < 2:
        return float("nan")
    else:
        (mean, variance, sample_variance) = (mean, M2 / count, M2 / (count - 1))
        return (mean, variance, sample_variance)


#Initialize
count, mean, M2 = (0,0,0)
N=10**6
for i in range(N):
  x = my_dice_roll()
  count, mean, M2 = update((count,mean,M2),x)

est_mean, est_var, _ = finalize((count,mean,M2))


print(f"{est_mean=}")
print(f"{est_var=}")

upper_est_2SD = est_mean + 2*np.sqrt(est_var/N)
lower_est_2SD = est_mean - 2*np.sqrt(est_var/N)

print(f"{upper_est_2SD=}")
print(f"{lower_est_2SD=}")

est_mean=3.4975289999999144
est_var=2.9189918941590633
upper_est_2SD=3.5009460114977933
lower_est_2SD=3.4941119885020355


# Constant alpha-averages: A lazy way to keep track of things




In [26]:
est_mean = 0
N=10**6
alpha = 0.001
for i in range(1,N+1):
  x = my_dice_roll()
  est_mean += alpha*(x - est_mean)

print(est_mean)

3.4763437855051538
