In [168]:
import sys
import os
import numpy as np
import multiprocessing
import dill
import matplotlib.pyplot as plt
import pandas as pd
import sklearn 
from sklearn.preprocessing import PolynomialFeatures

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path + "/../src/simulations_v2")
    sys.path.append(module_path + "/..")
from load_params import load_params

from multi_group_simulation import MultiGroupSimulation

from util_functions import *
from uncertainty_analysis import *
from sim_helper_functions import *
from plot_utils import *
configure_plot(plt)

lhs_output_sim_files = []
for i in range(2000):
    
    # replace fname with local path!!!
    #fname = '/Users/brianliu/Summer_2021_Research/CovidDelta/COVID-PNAS/group-testing/notebooks/apr_29_scenarios/point_{}.dill'.format(i)
    fname = '/home/yz685/group-testing/notebooks/apr_29_scenarios/point_{}.dill'.format(i)
    lhs_output_sim_files.append(fname)

In [169]:
def residential_regression_non_linear(scenario_data):
    residential_columns = scenario_data.columns[0:12]
    residential_target = 'res_cornell_inf_50'
    X_res = scenario_data[residential_columns]
    Y_res_outcomes = np.array(scenario_data[[residential_target]])

    X_res = scenario_data[residential_columns]
    quadratic = PolynomialFeatures(degree = 2,interaction_only=False,include_bias = False)
    X_quadratic = pd.DataFrame(quadratic.fit_transform(X_res),columns = quadratic.get_feature_names(X_res.columns))
    X = add_constant(X_quadratic)
    model = OLS(Y_res_outcomes,X)
    results = model.fit()
    
    return model

In [170]:
scenario_data = load_sim_output(lhs_output_sim_files)
reg_model = residential_regression_non_linear(scenario_data)
reg_results = reg_model.fit()
reg_results.summary()

  x = pd.concat(x[::order], 1)


0,1,2,3
Dep. Variable:,y,R-squared:,0.671
Model:,OLS,Adj. R-squared:,0.655
Method:,Least Squares,F-statistic:,43.23
Date:,"Mon, 27 Sep 2021",Prob (F-statistic):,0.0
Time:,17:22:25,Log-Likelihood:,-15362.0
No. Observations:,2000,AIC:,30910.0
Df Residuals:,1909,BIC:,31420.0
Df Model:,90,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,4539.6821,7172.662,0.633,0.527,-9527.396,1.86e+04
asymp_prob_mult,293.5968,1141.565,0.257,0.797,-1945.250,2532.443
inital_prev_mult,-1703.0148,1115.542,-1.527,0.127,-3890.824,484.794
R0,1681.3058,363.082,4.631,0.000,969.227,2393.384
outside_inf_mult,805.2527,1087.826,0.740,0.459,-1328.200,2938.705
daily_self_report_prob,-8025.5347,4020.767,-1.996,0.046,-1.59e+04,-139.977
ct_mult,-4568.1453,1149.485,-3.974,0.000,-6822.523,-2313.768
ct_testing_ratio,-3458.8525,1105.289,-3.129,0.002,-5626.553,-1291.152
test_sensitivity,-2438.0464,2884.661,-0.845,0.398,-8095.464,3219.371

0,1,2,3
Omnibus:,1294.948,Durbin-Watson:,1.984
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34320.873
Skew:,2.613,Prob(JB):,0.0
Kurtosis:,22.61,Cond. No.,225000.0


In [171]:
reg_results.params

const               4539.682082
asymp_prob_mult      293.596751
inital_prev_mult   -1703.014768
R0                  1681.305826
outside_inf_mult     805.252669
                       ...     
E_time ID_time       -38.637360
E_time Sy_time        55.321066
ID_time^2             18.595522
ID_time Sy_time        1.494890
Sy_time^2            -23.999865
Length: 91, dtype: float64

In [172]:
# get mean and standard deviation of parameters

param_names = reg_results.params.keys()[1:13]

mean_dict = dict()
sd_dict = dict()

for param in param_names:
    mean_dict[param] = (PARAM_BOUNDS[param][1] + PARAM_BOUNDS[param][0])/2
    sd_dict[param] = (PARAM_BOUNDS[param][1] - PARAM_BOUNDS[param][0])/(2*1.96)


mean_vals = np.array(list(mean_dict.values()))
sd_vals = np.array(list(sd_dict.values()))

In [173]:
mean_vals

array([ 1.  ,  1.  ,  2.5 ,  1.  ,  0.36,  1.5 ,  1.  ,  0.6 ,  0.1 ,
        2.  ,  3.  , 12.  ])

In [174]:
from scipy.optimize import minimize
from scipy.optimize import NonlinearConstraint


In [175]:
# negative number of infections
def nonlinear_objective(x, reg_model = reg_model):
    assert len(x) == 12, 'x should be 12-dimensional'

    quadratic = PolynomialFeatures(degree = 2,interaction_only=False,include_bias = False)
    features = np.concatenate((np.array([1]),quadratic.fit_transform(x.reshape(1,-1))[0]))
    
    return -reg_model.predict(reg_model.fit().params, features)

# ellipsoidal constraints
def ellipsoid_constraint_func(x, size=10, mean_vals = mean_vals, sd_vals = sd_vals): # what size to use? not 1.96^2
    assert len(x) == 12, 'x should be 12-dimensional'
    
    result = 0
    
    for i in range(12):
        result += (x[i] - mean_vals[i])**2 / (sd_vals[i]**2)
        
    result -= size
    
    return result

ellip_cons = NonlinearConstraint(ellipsoid_constraint_func, -np.inf, 0)

# test the method on a simple objective function
def test_objective(x, mean_vals = mean_vals):
    return sum((x - mean_vals)**2)

In [176]:
# test on simple objective function to verify it works

count_success = 0

for _ in range(100):

    initial_point = np.random.rand(12)

    result = minimize(test_objective, initial_point, 
             constraints = ellip_cons,
             method = 'SLSQP')


    if result.success:
        count_success += 1


print(count_success)
print(result)

100
     fun: 9.555742456291885e-13
     jac: array([ 4.53095956e-08, -1.11534847e-06, -2.16698165e-07, -8.23705012e-07,
        1.12210353e-07,  5.08923983e-08,  3.21873946e-07,  1.56482299e-07,
       -1.11054277e-06, -5.14719798e-07, -2.64064159e-07,  2.07316088e-07])
 message: 'Optimization terminated successfully.'
    nfev: 29
     nit: 2
    njev: 2
  status: 0
 success: True
       x: array([ 1.00000002,  0.99999943,  2.49999988,  0.99999958,  0.36000005,
        1.50000002,  1.00000015,  0.60000007,  0.09999944,  1.99999974,
        2.99999986, 12.0000001 ])


In [None]:
# not working yet for the quadratic regression objective

count_success = 0

for _ in range(100):

    initial_point = np.random.rand(12) * mean_vals

    result = minimize(nonlinear_objective, initial_point, 
             constraints = ellip_cons,
             method = 'SLSQP')


    if result.success:
        count_success += 1


print(count_success)