In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt

In [2]:
np.random.seed(42)
n_exp = 104_900_000
n = 25_000_000

In [3]:
# Simulate random variable theta
theta = np.where(
    np.random.rand(n) < 0.5, 0.5, np.random.rand(n))

means = theta * n_exp
std_errors = (n_exp * theta * (1 - theta))**0.5

# Compute normally distributed random variable x with mean theta and standard deviation std_error
X = np.random.normal(means, std_errors)

# Compute the ratio between x and sigma
z_score = (X - means )  / std_errors

# Compute 1 - CDF of a standard normal evaluated at x_sigma_ratio
p_value = 1 - norm.cdf(z_score)

In [4]:
# Store results in a DataFrame
data = pd.DataFrame({'theta': theta, 'X': X})

In [25]:
# Observation
X_obs = 52_263_470
p_val = 1 - norm.cdf(z_score)

little_interval = X_obs * .05

# Count how many simulated values for x_hat are within .5% of X_obs
count_within_range = np.sum(np.abs(X - X_obs) <= little_interval)

# Count how many values for x_hat are within .5% of X_obs come from theta = 0.5
count_theta_zero = np.sum((np.abs(X - X_obs) <= little_interval) & (theta == 0.5))

# Compute the probability of theta = 0.5 given that x_hat is within .05% of X_obs
prob_theta_zero_given_condition = count_theta_zero / count_within_range

# Calculate odds of theta = 0 given that x_hat is within .05% of X_obs
odds_theta_nonzero_given_condition = (1 - prob_theta_zero_given_condition) / prob_theta_zero_given_condition

In [26]:
print(little_interval)

2613173.5


In [27]:
print(data)
print(f'Number of trials with X close to X_obs and theta = 0.5: {count_theta_zero}')
print(f'Number of trials with X close to X_obs and theta ≠ 0.5: {count_within_range - count_theta_zero}')

print(f'Probability of theta = 1/2 given x_hat are within .05% of X_obs: {prob_theta_zero_given_condition}')
print(f'Odds of theta nonzero given that x_hat are within .05% of X_obs: {odds_theta_nonzero_given_condition}')


             theta             X
0         0.500000  5.244542e+07
1         0.163023  1.710289e+07
2         0.111495  1.170072e+07
3         0.400559  4.202623e+07
4         0.500000  5.245951e+07
...            ...           ...
24999995  0.066307  6.953886e+06
24999996  0.500000  5.244325e+07
24999997  0.500000  5.244817e+07
24999998  0.491206  5.153269e+07
24999999  0.500000  5.246573e+07

[25000000 rows x 2 columns]
Number of trials with X close to X_obs and theta = 0.5: 12495798
Number of trials with X close to X_obs and theta ≠ 0.5: 622430
Probability of theta = 1/2 given x_hat are within .05% of X_obs: 0.9525522806891297
Odds of theta nonzero given that x_hat are within .05% of X_obs: 0.049811144514339926
