In [3]:
import numpy as np
from scipy.stats import t
from numpy.linalg import inv
import random

In [31]:
# very basic test data

# number of samples
n = 10
# number of ind vars
k = 5
# number of dep vars
m = 1
# std_dev of noise
S_YdotX_actual = 1.0
# actual model params
B_actual_min_range = 0
B_actual_max_range = 21
B_actual = np.random.randint(B_actual_min_range, B_actual_max_range , size=(m, k+1))

# Z-normalize ind vars (X) and append a ones column for linear offests (Z_1)
X_min_range = 1
X_max_range = 6
X = np.random.randint(X_min_range, X_max_range, size=(k, n))
X_mean = np.mean(X)
X_std = np.std(X)
Z = (X - X_mean)/X_std
Z_1 = np.vstack((Z, np.ones((1,n))))

# Generate dep vars
epsilon_actual = np.random.normal(loc=0, scale=S_YdotX_actual, size=(1,n))
Y = B_actual @ Z_1 + epsilon_actual

In [32]:
#---- 
# Estimate model parameters
B_estimate = Y @ Z_1.T @ inv(Z_1 @ Z_1.T)
R = inv(Z @ Z.T)
Y_estimate = B_estimate @ Z_1
S_YdotX_estimate = np.std(Y - Y_estimate)

print('B_actual:')
print(f'{B_actual}')
print()
print('B_estimate:')
print(f'{B_estimate}')
print()
print(f'S_YdotX_actual:   {S_YdotX_actual}')
print(f'S_YdotX_estimate: {S_YdotX_estimate}')

B_actual:
[[ 8  7  5  7  2 13]]

B_estimate:
[[ 7.3894599   7.20620064  5.49532874  6.89002528  1.50375164 13.23893752]]

S_YdotX_actual:   1.0
S_YdotX_estimate: 0.8070111055366468


In [33]:
# ----
# Generate a perfectly median subject:
x_obs = np.random.randint(X_min_range, X_max_range, size=(k, 1))
z_obs = (x_obs - X_mean)/X_std
z_obs1 = np.vstack((z_obs, 1))

# Compute the subjects 'actual' (observed) dep vars and estimated dep vars
y_obs = B_actual @ z_obs1
y_estimate = B_estimate @ z_obs1

print()
print(f'y_obs.T:          {y_obs.T}')
print(f'y_estimate.T:     {y_estimate.T}')

r_A = np.sum(np.diag(R) * (z_obs.T ** 2))

# Computing r_B:
# ------------------
# the off diagonal indices into R
# https://stackoverflow.com/a/35746928
off_d_R_idx = np.where(~np.eye(R.shape[0],dtype=bool))

# We divide by 2, because we only iterate over half (i.e. upper) of the off-diagonal elements
off_d_R_idx_len = int(off_d_R_idx[0].shape[0]/2)

# This is the r_{i,j} term of the equation for r_B
r_i_j = R[off_d_R_idx[0][0:off_d_R_idx_len],off_d_R_idx[1][0:off_d_R_idx_len]]
# This is the z_{obs,i} term of the equation for r_B
z_obs_i = z_obs[off_d_R_idx[0][0:off_d_R_idx_len]].T
# This is the z_{obs,j} term for the equation for r_B
z_obs_j = z_obs[off_d_R_idx[1][0:off_d_R_idx_len]].T

r_B = np.sum(r_i_j * z_obs_i * z_obs_j)

# Compute S_{N+1}
S_Nplus1 = S_YdotX_estimate * np.sqrt(1 + 1/n + 1/(n-1)*r_A + 2/(n-1)*r_B)

t_diff = (y_obs - y_estimate)/S_Nplus1
p = t.cdf(x=t_diff, df=n-k)
print(f'p: {p}')


y_obs.T:          [[17.53149094]]
y_estimate.T:     [[17.58906588]]
p: [[0.47441784]]


In [34]:
print(S_Nplus1)

0.8535624168829047


In [35]:
print(t_diff)

[[-0.06745253]]


In [36]:
print(y_obs)

[[17.53149094]]


In [37]:
print(y_estimate)

[[17.58906588]]
