In [2]:
import numpy as np
from numpy import ma
import pandas as pd
from scipy.stats import norm

np.set_printoptions(precision=5, suppress=True)

# Set random seed to student number
np.random.seed(46387334) # TODO: UNCOMMENT

In [3]:
# Helper functions
def sig_fig(X, sigfigs):
    exp = np.floor(ma.log10(abs(X)).filled(0))
    return np.round(X*10**-exp, sigfigs-1) * 10**exp

def get_diff(a1, a2):
    print(f"{a1.shape=}")
    print(f"{a2.shape=}")

    rows, cols = a1.shape
    for i in range(rows):
        for j in range(cols):
            print(f"[{i},{j}]: {round(a1[i, j], 3) :>5} vs {round(a2[i, j], 3) :>7}  |  Error: {(a1[i, j] - a2[i, j]) * (100 / a1[i, j])  :.2f}%")

## 1.b)

In [11]:
# Variables that make up multivariate normal distribution
vars_h = [
    'logcp',
    'ejection_fraction',
    'sqrtplat',
    'recipsc',
    'serum_sodium'
]
n_vars = len(vars_h)

# mu_h hat - Mean vector for multivariate normal distribution
mu_h = np.array([
    [5.66 ],
    [38.1 ],
    [505  ],
    [0.891],
    [137  ]
])

# Sigma_h hat - Covariance matrix for multivariate normal distribution
sigma_h = np.array([
    [1.29,   -0.928, 1.01,   0.0235, 0.0953],
    [-0.928, 140,    77.8,   0.514,  9.19  ],
    [1.01,   77.8,   8757,   1.82,   23.0  ],
    [0.0235, 0.514,  1.82,   0.100,  0.354 ],
    [0.0953, 9.19,   23.0,   0.354,  19.5  ]
])

### Part 1. Determine the conditional distribution

In [14]:
# Variable Sets
A = [1, 2]     # Unknown Variables (ejection_fraction and sqrtplat)
B = [0, 3, 4]  # Known Variables (recipsc, serum_sodium, logcp)

# Observed values of x2 (recipsc, serum_sodium, logcp)
x_B = np.array([[8], [0.4], [142]])

# Partition the mean vector
mu_A = mu_h[A]
mu_B = mu_h[B]

# Partition the covariance matrix
sigma_AA = sigma_h[np.ix_(A, A)]
sigma_AB = sigma_h[np.ix_(A, B)]
sigma_BB = sigma_h[np.ix_(B, B)]

# Conditional mean
mu_cond = mu_A + sigma_AB @ np.linalg.inv(sigma_BB) @ (x_B - mu_B)

# Conditional covariance
sigma_cond = sigma_AA - sigma_AB @ np.linalg.inv(sigma_BB) @ sigma_AB.T

print(f"Conditional Mean:\n{mu_cond}")
print(f"\nConditional Covariance:\n{sigma_cond}")

Conditional Mean:
[[ 36.28745]
 [503.26812]]

Conditional Covariance:
[[ 133.51654   62.22592]
 [  62.22592 8708.59787]]


### Part 2. Determine the partial correlation matrix

In [22]:
# Calculate the standard deviations from the conditional covariance matrix
std_devs = np.sqrt(np.diag(sigma_cond))

# Calculate the partial correlation matrix
partial_corr_matrix = sigma_cond / np.outer(std_devs, std_devs)

print(f"Partial Correlation Matrix:\n{partial_corr_matrix}")

Partial Correlation Matrix:
[[1.      0.05771]
 [0.05771 1.     ]]


### Part 3. Use this distribution to predict the ejection fraction and platelet count values for this patient and describe the unusualness or ortherwise of this pair of values in the overall patient distribution

In [21]:
print(f"{62.22592 / np.sqrt(133.52 * 8708.6):.10f}")

0.0577064015
