In [1]:
import logging
import sys
from normstats import *
np.set_printoptions(suppress=True)

logging.basicConfig(
    # Change the logging level to DEBUG to get a lot more verbosity
    #level=logging.DEBUG,
    level=logging.WARN,
    format='%(asctime)s - %(name)s - %(funcName)s - %(levelname)s - %(message)s',
    stream=sys.stdout,
    force=True
)
logger = logging.getLogger(__name__)

# For reproducibility
np.random.seed(abs(hash("giraffes are cool"))%(2**32))

In [2]:
# Parameters for normative synthetic data generation

# number of samples
n = 100
# number of ind vars
k = 3
# number of dep vars
m = 2

# std_dev of noise will be chosen randomly from this range
#noise_range = (10.0, 20.0)
# B (model params) will be chosen randomly from this range
B_range = (10, 20)
# X (ind vars) will be chosen randomly from this range
X_range = (1,20)
# X (ind vars) for single subject to evaluate will be chosen randomly from this range
X_range_syth = (5,15)
# X (ind vars) will be integers or floats?
X_are_ints = True

# The actual standard deviation of the model residual, used to generate synthetic data
#S_YdotX_actual = np.random.uniform(noise_range[0], noise_range[1], size=(m))
S_YdotX_actual = np.array([[1.0],[1.0]])

#B_actual = np.random.randint(B_range[0], B_range[1], size=(m, k+1))
# The actual model parameters, used to generate synthetic data
B_actual = np.random.uniform(B_range[0], B_range[1], size=(m, k+1))

In [3]:
# Generate synthetic normative dataset
X, Y, epsilon_actual = gen_synth_norm_data(B_actual, n, S_YdotX_actual, X_range, X_are_ints=X_are_ints)

# estimate model params from synthetic dataset
B_estimate, S_YdotX_estimate, R, X_mean, X_std = estimate_model_params(X,Y)

In [4]:
print('B_actual:')
print(B_actual)
print()
print('B_estimate')
print(B_estimate)
print()
print('S_YdotX_actual:')
print(S_YdotX_actual)
print()
print('S_YdotX_estimate:')
print(S_YdotX_estimate)

B_actual:
[[17.96792581 18.3345392  11.73340325 14.72934485]
 [10.63800895 13.61066806 10.65474291 10.852796  ]]

B_estimate
[[18.03403474 18.38124974 11.70906168 14.70657026]
 [10.87303757 13.59708354 10.6587232  10.87420713]]

S_YdotX_actual:
[[1.]
 [1.]]

S_YdotX_estimate:
[[1.04153877]
 [0.97866029]]


In [5]:
# Generate a single median subject; 2 ways.

# 1) A *perfectly* median subject, accoring to the estimated model parameters, would be generated 
#    using the estimated parameters of the model 
x_obs_median_estimate, y_obs_median_estimate = gen_synth_sub_data(B_estimate, 1, 0.0, X_range_syth, X_mean, X_std, X_are_ints=X_are_ints)

# 2) But this is real life, and an actual median subject would be generated from the actual (not estimated)
#    model parameters.
x_obs_median_real, y_obs_median_real = gen_synth_sub_data(B_actual, 1, 0.0, X_range_syth, X_mean, X_std, X_are_ints=X_are_ints)

In [6]:
# Evaluate both subjects using the estimated model parameters
p_perfect, p_ci_perfect = single_subject_eval(x_obs_median_estimate, y_obs_median_estimate, B_estimate, R, S_YdotX_estimate, n, X_mean, X_std)
p_real, p_ci_real = single_subject_eval(x_obs_median_real, y_obs_median_real, B_estimate, R, S_YdotX_estimate, n, X_mean, X_std)

In [7]:
# Percentile estimates should be *exactly* 0.5 since it's a perfectly median subject, according to the model estimates
print('percentile estiamtes of *perfectly* median subject')
print(p_perfect[:,0])
print('lower confidence interval of percentile estiamtes of *perfectly* median subject')
print(p_ci_perfect[:,0])
print('upper confidence interval of percentile estiamtes of *perfectly* median subject')
print(p_ci_perfect[:,1])

percentile estiamtes of *perfectly* median subject
[0.5 0.5]
lower confidence interval of percentile estiamtes of *perfectly* median subject
[0.42204455 0.42204455]
upper confidence interval of percentile estiamtes of *perfectly* median subject
[0.57795545 0.57795545]


In [8]:
# Percentile estimates should be *approximately* 0.5 since this subject was generated from the actual
# model parameters, not estimated.  The difference between these estimates and 0.5 will depend on how
# closely the model parameter estimates match the actual values.  
# This will get closer to 0.5 as n increases
print('percentile estimates of real-world median subject')
print(p_real[:,0])
print('lower confidence interval of percentile estiamtes of *perfectly* median subject')
print(p_ci_real[:,0])
print('upper confidence interval of percentile estiamtes of *perfectly* median subject')
print(p_ci_real[:,1])

percentile estimates of real-world median subject
[0.52580429 0.54859746]
lower confidence interval of percentile estiamtes of *perfectly* median subject
[0.44757958 0.47024886]
upper confidence interval of percentile estiamtes of *perfectly* median subject
[0.60329482 0.62556196]


In [9]:
# Alternatively, If we evaluate both subject using the actual model parameters, then x_obs_median_estimate will
# give us a perfectly median subject
p_real, p_ci_real = single_subject_eval(x_obs_median_estimate, y_obs_median_estimate, B_actual, R, S_YdotX_actual, n, X_mean, X_std)
p_perfect, p_ci_perfect = single_subject_eval(x_obs_median_real, y_obs_median_real, B_actual, R, S_YdotX_actual, n, X_mean, X_std)

In [10]:
# Percentile estimates should be *exactly* 0.5
print('percentile estimates of *perfectly* median subject')
print(p_perfect[:,0])
print('lower confidence interval of percentile estiamtes of *perfectly* median subject')
print(p_ci_perfect[:,0])
print('upper confidence interval of percentile estiamtes of *perfectly* median subject')
print(p_ci_perfect[:,1])

percentile estimates of *perfectly* median subject
[0.5 0.5]
lower confidence interval of percentile estiamtes of *perfectly* median subject
[0.42206401 0.42206401]
upper confidence interval of percentile estiamtes of *perfectly* median subject
[0.57793599 0.57793599]


In [11]:
# Should be appox 0.5 and get closer to 0.5 as n increases
print('percentile estimates of real-world median subject')
print(p_real[:,0])
print('lower confidence interval of percentile estiamtes of *perfectly* median subject')
print(p_ci_real[:,0])
print('upper confidence interval of percentile estiamtes of *perfectly* median subject')
print(p_ci_real[:,1])

percentile estimates of real-world median subject
[0.51283524 0.56693283]
lower confidence interval of percentile estiamtes of *perfectly* median subject
[0.43471653 0.48855656]
upper confidence interval of percentile estiamtes of *perfectly* median subject
[0.59058868 0.64339944]
