In [2]:
import collections
import numpy as np
import pandas as pd
from scipy import stats

np.set_printoptions(suppress=True)

In [3]:
cadralazine_data = pd.DataFrame(collections.OrderedDict([
    ('time', [2, 4, 6, 8, 10, 24, 28, 32]),
    ('drug concentration', [1.63, 1.01, 0.73, 0.55, 0.41, 0.01, 0.06, 0.02])
]), index=pd.Series(np.arange(1, 9), name='$i$'))
with open('p3_data.tex', 'w') as f:
    f.write(cadralazine_data.reset_index().to_latex(escape=False, index=False))
cadralazine_data

Unnamed: 0_level_0,time,drug concentration
$i$,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2,1.63
2,4,1.01
3,6,0.73
4,8,0.55
5,10,0.41
6,24,0.01
7,28,0.06
8,32,0.02


In [8]:
D = 30 # Specified in homework

def compute_mle_estimates(x, y):
    assert len(x) == len(y), 'Inputs must have the same length.'
    n = len(x)
    log_D = np.log(D)
    log_y = np.log(y)
    x_sum = np.sum(x)
    log_y_sum = np.sum(log_y)    
    
    k_e = -(n*np.sum(x*log_y) - x_sum*log_y_sum)/(n*np.sum(x*x) - x_sum*x_sum)
    log_V = -(log_y_sum + k_e*x_sum)/n + log_D
    sigma_2 = np.mean(np.square(log_y - log_D + log_V + k_e*x))
    return np.exp(log_V), k_e, sigma_2    
estimates = compute_mle_estimates(cadralazine_data['time'], cadralazine_data['drug concentration'])

In [9]:
def compute_asymptotic_covariances(x, V, k_e, sigma_2):
    n, var_x = len(x), np.var(x, ddof=0)    
    covariances = np.zeros((3,3))
    covariances[0, 0] = np.mean(np.square(x))*sigma_2*V*V/n/var_x
    covariances[1, 1] = sigma_2/var_x/n
    covariances[2, 2] = 2*sigma_2*sigma_2/n
    covariances[0, 1] =  covariances[1, 0] = -sigma_2*V*np.mean(x)/n/var_x
    return covariances

asymptotic_covariances = compute_asymptotic_covariances(
    cadralazine_data['time'], *estimates)

In [23]:
estimates_summary = pd.DataFrame(collections.OrderedDict([
    ('Estimate', estimates),
    ('Standard error', np.sqrt(np.diag(asymptotic_covariances))),
    ('95\% CI lower bound', estimates + np.sqrt(np.diag(asymptotic_covariances))*stats.norm.ppf((1 - 0.95)/2)),
    ('95\% CI upper bound', estimates + np.sqrt(np.diag(asymptotic_covariances))*stats.norm.isf((1 - 0.95)/2)),
]), index=['$\hat{V}$', '$\hat{k}_e$', '$\hat{\sigma}^2$'])

with open('p3_summary.tex', 'w') as f:
    f.write(estimates_summary.to_latex(escape=False))

estimates_summary

Unnamed: 0,Estimate,Standard error,95\% CI lower bound,95\% CI upper bound
$\hat{V}$,16.663309,6.165432,4.579285,28.747333
$\hat{k}_e$,0.152106,0.020508,0.111911,0.192302
$\hat{\sigma}^2$,0.411963,0.205981,0.008247,0.815679


In [18]:
stats.norm.isf((1 - 0.95)/2)

1.959963984540054

In [218]:
params = [[], [], []]
for i in range(4096):
    x = cadralazine_data['time'].values
    y = (D/V*np.exp(-k_e*x))*np.exp(stats.norm.rvs(size=x.shape, scale=np.sqrt(sigma_2)))
    V_hat, k_e_hat, sigma_2_hat = compute_mle_estimates(x, y)
    params[0].append(V_hat)
    params[1].append(k_e_hat)
    params[2].append(sigma_2_hat)
np.cov(params, ddof=2)

array([[48.69041953, -0.10942584,  0.01603871],
       [-0.10942584,  0.00041874,  0.00000027],
       [ 0.01603871,  0.00000027,  0.03217829]])

In [158]:
from sklearn import linear_model

tmp = linear_model.LinearRegression().fit(cadralazine_data['time'].values[:,np.newaxis],
                                          np.log(cadralazine_data['drug concentration']) - np.log(30))
tmp.intercept_, tmp.coef_

(-2.813209259207856, array([-0.15210635]))

In [15]:
np.log(cadralazine_data['drug concentration']) - np.log(30)

$i$
1   -2.912617
2   -3.391247
3   -3.715908
4   -3.999034
5   -4.292796
6   -8.006368
7   -6.214608
8   -7.313220
Name: drug concentration, dtype: float64

In [3]:
cadralazine_data['time'] * 

$i$
1     2
2     4
3     6
4     8
5    10
6    24
7    28
8    32
Name: time, dtype: int64