In [111]:
# Packages
import pandas as pd
import pymc3 as pm
import sklearn as sk
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from mpl_toolkits import mplot3d
from mpl_toolkits.mplot3d import Axes3D
import statistics
import arviz as az
import bambi as bmb
import numpy as np
import sklearn as sk
from skopt import gp_minimize
from skopt.plots import plot_convergence
from skopt.plots import plot_objective
from skopt.plots import plot_gaussian_process
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process.kernels import WhiteKernel
from sklearn.gaussian_process.kernels import Matern

# Set the seed for this analysis
np.random.seed(seed = 20090701)

# Load the data
analysisData = pd.read_csv("../2_pipeline/10_analysisData.csv")
# analysisData.head()
# analysisData.columns

Index(['id', 'C0', 'C1', 'laterality', 'initial_size', 'num_wounds', 'revasc',
       'age', 'CVD', 'COPD', 'CHF', 'CAD', 'diabetes', 'HLD', 'HTN', 'obesity',
       'PAD', 'smoker', 'healedAliveWithLimb6mo', 'first_visit', 'foot_wound',
       'toe_wound', 'heel_wound', 'ankle_wound', 'leg_wound', 'white', 'black',
       'race_other', 'female', 'hispanic', 'ps', 'ps_quint'],
      dtype='object')

# ITR evaluation: stabilized IPW

In [112]:
# Stabilized IPW estimate (negated)
def computeStabilizedIPW(beta, analysisData):
    analysisData['A_d'] = np.logical_or(analysisData['C0'] < beta[0], analysisData['initial_size'] > beta[1])*1.0 # Treatment assigned by d
    analysisData['pi_d'] = np.where(analysisData['A_d'] == 1.0, analysisData['ps'], 1-analysisData['ps']) # PS for treatment assigned by d
    analysisData['C_d'] = np.where(analysisData['revasc'] == analysisData['A_d'], 1, 0) # Consistent with regime indicator
    analysisData['summand'] = analysisData['C_d']*analysisData['healedAliveWithLimb6mo']*analysisData['pi_d']
    # Estimate the value
    vhat_stabilizedIPW = sum(analysisData['summand'])/(sum(analysisData['C_d']*analysisData['pi_d']))
    
    return -1*vhat_stabilizedIPW
    

# Optimal DTR learning

In [113]:
# Expectation improvement
# Set the noise
noise = np.std(analysisData.healedAliveWithLimb6mo)

# Create the internal sIPW function
def computeStabilizedIPW_internal(beta, analysisData = analysisData):
    return(computeStabilizedIPW(beta, analysisData = analysisData))

# Do expectation improvement
ei_result = gp_minimize(computeStabilizedIPW_internal,
           [(0.0, 1.0), (0.0, 100.0)],
           acq_func = "EI",
           n_calls = 100,
           n_random_starts = 100,
           noise = noise)
print(ei_result)

# Extract the relevant information
thetas_explored = pd.DataFrame(ei_result['x_iters'], columns = ['theta1', 'theta2'])
values_explored = pd.DataFrame(ei_result['func_vals'], columns = ['values'])
policies_explored = pd.concat([thetas_explored, values_explored], axis=1)
optimal_theta = pd.DataFrame(ei_result['x'], columns = ['theta'])
policies_explored.to_csv('../2_pipeline/10_policiesExplored.csv')
optimal_theta.to_csv("../2_pipeline/10_optimalTheta.csv")

          fun: -0.48351757031796455
    func_vals: array([-0.46977691, -0.36652026, -0.41106565, -0.44133837, -0.40856903,
       -0.43146739, -0.4155901 , -0.4605672 , -0.37417514, -0.4172348 ,
       -0.36498788, -0.4336245 , -0.41795171, -0.45642753, -0.36652026,
       -0.44133837, -0.467344  , -0.37787018, -0.37417514, -0.27200099,
       -0.4199698 , -0.38601176, -0.4336245 , -0.36652026, -0.46505046,
       -0.44133837, -0.36652026, -0.3933122 , -0.46767267, -0.47874397,
       -0.4336245 , -0.45642753, -0.28788736, -0.4199698 , -0.36019584,
       -0.36652026, -0.45707302, -0.44133837, -0.46118243, -0.46118243,
       -0.4336245 , -0.4336245 , -0.36652026, -0.4336245 , -0.4336245 ,
       -0.45707302, -0.44880447, -0.41106565, -0.36652026, -0.40501927,
       -0.4336245 , -0.3933122 , -0.44880447, -0.4336245 , -0.41795171,
       -0.4336245 , -0.41795171, -0.41106565, -0.41795171, -0.41106565,
       -0.44133837, -0.27682838, -0.45707302, -0.4472785 , -0.30257108,
       -0.457

# Fit GPR (characterization)

In [114]:
# Fit the GPR
kernel = 1.0 * Matern(length_scale = [1.0, 1.0], nu = 1.5) \
    + WhiteKernel(noise_level = 10, noise_level_bounds = (1e-5, 1e2))
gpr = GaussianProcessRegressor(kernel = kernel, alpha = 0.0)
gpr.fit(policies_explored.loc[:,['theta1', 'theta2']], policies_explored.loc[:,['values']])
print(gpr.kernel_)

# Get the posterior predictive mean over the parameter space
theta1_gp = []
theta2_gp = []
mean_gp = []
sd_gp = []
for i in np.linspace(0.0, 1.0, num = 101):
    for j in np.linspace(0.0, 100.0, num = 101):
        theta1_gp.append(i)
        theta2_gp.append(j)
        eval_gp_mean, eval_gp_sd = gpr.predict(pd.DataFrame({'theta1':[i], 'theta2':[j]}), return_std = True)
        mean_gp.append(eval_gp_mean)
        sd_gp.append(eval_gp_sd)

# wrangle        
mean_gp_list = [mean_gp[i][0][0] for i in range(len(mean_gp))]
sd_gp_list = [sd_gp[i][0] for i in range(len(sd_gp))]

# Save the posterior predictive mean estimates
pd.DataFrame({'theta1':theta1_gp, 'theta2':theta2_gp, 'gp_mean': mean_gp, 'gp_sd': sd_gp}).to_csv("../2_pipeline/10_gpValueSurrogateOverTheta.csv")

0.297**2 * Matern(length_scale=[1.82, 55], nu=1.5) + WhiteKernel(noise_level=8.15e-05)
