In [83]:
%matplotlib inline
import pandas as pd
import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt
import os.path
import sys
import numpy as np


In [84]:
# Define set of all parameters and meta-parameters
    parameters = {    
    "delta_schooling":[0.5],
        "xi":[0, 30 * 37199.03, 140 * 37199.03],
        "icus":[3000],
        "tests":[0],
        "frequencies":[(7,14)],
        "eta":[0.1, 0]
    }
    
    meta_parameters = {
        "trust_region_radius":[0,0.05,0.1,0.2,0.4,0.6],
        "max_inner_iterations_mult":[1, 1.5, 2],
        "initial_uhat":["dynamic_gradient", "full_lockdown", "full_open"]
    }
    
    n_days = 90
    groups = "all"


In [85]:
# Add all data into one big pandas dataframe

ls = []

for delta in parameters["delta_schooling"]:
    for xi in parameters["xi"]:
        for icus in parameters["icus"]:
            for tests in parameters["tests"]:
                for freq in parameters["frequencies"]:
                    for eta in parameters["eta"]:
                        for initial_uhat in meta_parameters["initial_uhat"]:
                            with open(f"testing_outputs_ndays={n_days}_eta={eta}_tests={tests}_xi={xi*10}_freq={freq[1]}_groups={groups}_initial_uhat={initial_uhat}.csv", "r") as file:
                                df = pd.read_csv(file, header=None, names=["radius", "factor", "num_iterations", "reward", "time"])
                            if len(df) == 0:
                                print(file.name)
                            df["delta"] = delta
                            df["xi"] = xi
                            df["icus"] = icus
                            df["tests"] = tests
                            df["freq"] = freq[1]
                            df["eta"] = eta
                            df["initial_uhat"] = initial_uhat
                            ls.append(df)

all_data = pd.concat(ls, ignore_index=True)


In [55]:
# First, look for each combination of parameters what combination of meta-param achieves the best rewards

for delta in parameters["delta_schooling"]:
    for xi in parameters["xi"]:
        for icus in parameters["icus"]:
            for tests in parameters["tests"]:
                for freq in parameters["frequencies"]:
                    for eta in parameters["eta"]:
                        maximum_reward = all_data[(all_data.delta == delta) 
                                                 & (all_data.xi == xi) 
                                                 & (all_data.icus == icus)
                                                 & (all_data.tests == tests) 
                                                 & (all_data.freq == freq[1])
                                                 & (all_data.eta == eta)].reward.max()
                        print(f"For xi={xi} and eta={eta}, the maximum reward is: {maximum_reward}")
                        print(f"And these are the meta-parameters that achieve it:")
                        print("Observe that radius == 0 implies that the solution follows the initial uhat")
                        print(all_data[all_data.reward == maximum_reward])
                        print("")

For xi=0 and eta=0.1, the maximum reward is: 54293211.26175425
And these are the meta-parameters that achieve it:
Observe that radius == 0 implies that the solution follows the initial uhat
    radius  factor  num_iterations        reward         time  delta   xi  \
3      0.2     1.0             5.0  5.429321e+07  3706.690063    0.5  0.0   
10     0.2     1.5             7.5  5.429321e+07  4080.882481    0.5  0.0   
11     0.2     2.0            10.0  5.429321e+07  4156.937640    0.5  0.0   

    icus  tests  freq  eta      initial_uhat  
3   3000      0    14  0.1  dynamic_gradient  
10  3000      0    14  0.1  dynamic_gradient  
11  3000      0    14  0.1  dynamic_gradient  

For xi=0 and eta=0, the maximum reward is: 56136871.74392151
And these are the meta-parameters that achieve it:
Observe that radius == 0 implies that the solution follows the initial uhat
     radius  factor  num_iterations        reward          time  delta   xi  \
49     0.00     1.0        0.000000  5.613687

Notice above that for any combination of parameters we achieve the maximum reward for some combination of meta_param

In [86]:
# Now we'll define the ratio of the rewards wrt to the maximum reward 

for delta in parameters["delta_schooling"]:
    for xi in parameters["xi"]:
        for icus in parameters["icus"]:
            for tests in parameters["tests"]:
                for freq in parameters["frequencies"]:
                    for eta in parameters["eta"]:
                        maximum_reward = all_data[(all_data.delta == delta) 
                                                 & (all_data.xi == xi) 
                                                 & (all_data.icus == icus)
                                                 & (all_data.tests == tests) 
                                                 & (all_data.freq == freq[1])
                                                 & (all_data.eta == eta)].reward.max()
                        
                        all_data.loc[(all_data.delta == delta)
                                                 & (all_data.xi == xi) 
                                                 & (all_data.icus == icus)
                                                 & (all_data.tests == tests) 
                                                 & (all_data.freq == freq[1])
                                                 & (all_data.eta == eta), "FractionalReward"] = all_data[(all_data.delta == delta) 
                                                 & (all_data.xi == xi) 
                                                 & (all_data.icus == icus)
                                                 & (all_data.tests == tests) 
                                                 & (all_data.freq == freq[1])
                                                 & (all_data.eta == eta)].reward / maximum_reward


In [87]:
# Now for each combination of meta-parameters we can plot a histogram of the fractional rewards.
# This should give an idea of how good the meta-parameters are for all the instances.

meta_parameters = {
        "trust_region_radius":[0,0.05,0.1,0.2,0.4,0.6],
        "max_inner_iterations_mult":[1, 1.5, 2],
        "initial_uhat":["dynamic_gradient", "full_lockdown", "full_open"]
    }
    

for radius in meta_parameters["trust_region_radius"]:
    for factor in meta_parameters["max_inner_iterations_mult"]:
        for initia_uhat in meta_parameters["initial_uhat"]:
            print(f"Meta Param: radius = {radius}, factor = {factor}, num iterations = {factor/radius if radius>0 else 0}, initia uhat = {initial_uhat}")
            frac_rewards_list = list(all_data.loc[(all_data.radius==radius) & (all_data.factor==factor) & (all_data.initial_uhat==initial_uhat), "FractionalReward"])
            print(frac_rewards_list)
            num_bins = 4
            plt.hist(frac_rewards_list, num_bins, facecolor='blue', alpha=0.5)
            plt.show()

Meta Param: radius = 0, factor = 1, num iterations = 0, initia uhat = full_open
[0.9494812689463952, 0.9182981794771456, -1.3195006322791651, -1.2079438755758476, -35.49309823801783, -27.925936107938675]


<IPython.core.display.Javascript object>

Meta Param: radius = 0, factor = 1, num iterations = 0, initia uhat = full_open
[0.9494812689463952, 0.9182981794771456, -1.3195006322791651, -1.2079438755758476, -35.49309823801783, -27.925936107938675]
Meta Param: radius = 0, factor = 1, num iterations = 0, initia uhat = full_open
[0.9494812689463952, 0.9182981794771456, -1.3195006322791651, -1.2079438755758476, -35.49309823801783, -27.925936107938675]
Meta Param: radius = 0, factor = 1.5, num iterations = 0, initia uhat = full_open
[0.9494812689463952, 0.9182981794771456, -1.3195006322791651, -1.2079438755758476, -35.49309823801783, -27.925936107938675]
Meta Param: radius = 0, factor = 1.5, num iterations = 0, initia uhat = full_open
[0.9494812689463952, 0.9182981794771456, -1.3195006322791651, -1.2079438755758476, -35.49309823801783, -27.925936107938675]
Meta Param: radius = 0, factor = 1.5, num iterations = 0, initia uhat = full_open
[0.9494812689463952, 0.9182981794771456, -1.3195006322791651, -1.2079438755758476, -35.49309823801