In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../../src/")
sys.path.append("../../data/")
sys.path.append("../..")

from src.experimental.experiments import create_plankton_dataset
from src.experimental.experiments import optimal_sequence_of_interventions
from src.utils.sem_utils.real_sems import PredatorPreySEM as PPSEM
from src.utils.sem_utils.sem_estimate import build_sem_hat
from src.utils.sequential_intervention_functions import get_interventional_grids

import pygraphviz
from networkx.drawing import nx_agraph
from src.utils.dag_utils.graph_functions import make_graphical_model
from src.utils.utilities import powerset

from src.experimental.experiments import run_methods_replicates
from src.experimental.analyse_results import get_relevant_results, elaborate
from src.utils.plotting import plot_expected_opt_curve_paper

from src.methods.bo import BO
from src.methods.dcbo import DCBO
from src.methods.pibo import PIBO
from src.methods.dcpibo import DCPIBO

from matplotlib.pyplot import rc
import numpy as np
from scipy import stats
import random

random.seed(42)

## Utility Functions

In [2]:
def power_list(input_list, exponent):
    '''Raises each element of the given input_list to the desired exponent'''
    
    return_list = []
    
    for element in input_list:
        if element >= 0:
            raised_element = element**exponent
        else:
            raised_element = -(abs(element)**exponent)
        return_list.append(raised_element)
        
    return return_list

In [3]:
def compute_statistics(y_function, standard_deviation):
    '''Computes all the necessary lists associated to the Normal Distribution'''
    
    return_variates = []
    return_amps = []
    return_pdfs = []
    
    for mean in y_function:
        return_variates.append(stats.norm.rvs(mean, standard_deviation, 10))
        
        amp = np.linspace(mean-5*standard_deviation, mean+5*standard_deviation, 10)
        return_amps.append(amp)
        
        return_pdfs.append(stats.norm.pdf(amp, mean, standard_deviation))
    
    return return_variates, return_amps, return_pdfs

In [4]:
def normalise_pdfs(pdfs_list):
    '''Normalises the PDFs between 0 and 1'''
    
    return_normalised_pdfs_list = []
    
    for pdf_list in pdfs_list:
        temp_list = []
        
        pdf_min = min(pdf_list)
        pdf_max = max(pdf_list)
        
        for pdf_value in pdf_list:
            temp_list.append(round((pdf_value-pdf_min)/(pdf_max-pdf_min),2))
        
        return_normalised_pdfs_list.append(temp_list)
        
    return return_normalised_pdfs_list

In [5]:
def regret_priors(start_prior):
    '''Adds regret'''
    
    return_raised_priors = []
    
    for i in range(1,N+1):
        temp = []
        gamma = beta/i
        
        for p_list in start_prior:
            temp.append(power_list(p_list, gamma))
            
        return_raised_priors.append(temp)
    
    return return_raised_priors

In [6]:
def predict_optima(time_steps, variates, normalised_pdfs):
    '''Predicts the optimum for each time-step'''
    
    return_prediction = []
    
    for time_step in range(time_steps):
        if(min(variates[time_step])+max(variates[time_step]))<0:
            optimum = min(variates[time_step]*normalised_pdfs[time_step])
        else:
            optimum = max(variates[time_step]*normalised_pdfs[time_step])
        return_prediction.append(optimum)
        
    return return_prediction

In [7]:
def predict_optima_regret(iterations, time_steps, regret_priors, normalised_pdfs):
    '''Computes the prediction for each time-step and each iteration, according to the effect of gamma (regret)'''
    
    return_predictions = []
    
    for iteration in range(iterations):
        temp = []
        
        for time_step in range(time_steps):
            if(min(regret_priors[iteration][time_step])+max(regret_priors[iteration][time_step]))<0:
                optimum = min(np.multiply(regret_priors[iteration][time_step], normalised_pdfs[time_step]))
            else:
                optimum = max(np.multiply(regret_priors[iteration][time_step], normalised_pdfs[time_step]))
                              
            temp.append(optimum)
                              
        return_predictions.append(temp)
                              
    return return_predictions

In [8]:
def setup_plankton_SEM(T):
    
    p_dict = create_plankton_dataset(1, T)
    
    P_SEM = PPSEM()
    p_stat_sem = PPSEM.static(P_SEM)
    p_dyn_sem = PPSEM.dynamic(P_SEM)
    
    slice_node_set = ["M", "N", "P", "J", "A", "E", "D"]
    dag_view = make_graphical_model(0, T-1, topology="dependent", nodes=slice_node_set, verbose=True)
    G = nx_agraph.from_agraph(pygraphviz.AGraph(dag_view.source))
    
    for t in range(T-1):
        G.add_edge("P_{}".format(t), "N_{}".format(t + 1))
        G.add_edge("A_{}".format(t), "J_{}".format(t + 1))
        G.remove_edge("M_{}".format(t), "M_{}".format(t+1))
        
    for t in range(T):
        G.remove_edge("J_{}".format(t), "A_{}".format(t))
        G.add_edge("P_{}".format(t), "A_{}".format(t))
        G.add_edge("P_{}".format(t), "E_{}".format(t))
        G.add_edge("J_{}".format(t), "D_{}".format(t))
        G.add_edge("A_{}".format(t), "D_{}".format(t))
        
    #  Specifiy all the exploration sets based on the manipulative variables in the DAG
    exploration_sets = list(powerset(["M", "J", "A"]))
    # Specify the intervention domain for each variable
    intervention_domain = {"M": [40.0, 160.0], "J": [0.0, 20.0], "A":[0.0, 100.0]}
    # Specify a grid over each exploration and use the grid to find the best intevention value for that ES
    interventional_grids = get_interventional_grids(exploration_sets, intervention_domain, size_intervention_grid=100)
    
    _, optimal_interventions, true_objective_values, _, _, all_causal_effects = optimal_sequence_of_interventions(
        exploration_sets=exploration_sets,
        interventional_grids=interventional_grids,
        initial_structural_equation_model=p_stat_sem,
        structural_equation_model=p_dyn_sem,
        G=G,
        T=T,
        model_variables=slice_node_set,
        target_variable="D",
    )
    
    return (
        p_stat_sem,
        p_dyn_sem,
        dag_view,
        G,
        exploration_sets,
        intervention_domain,
        true_objective_values,
        optimal_interventions,
        all_causal_effects,
    )

## Constants and such

In [9]:
T = 4
N = 10

## Setup

Concerning the models:

In [10]:
init_sem, sem, dag_view, G, exploration_sets, intervention_domain, true_objective_values, optimal_interventions, all_causal_effects = setup_plankton_SEM(T=T)

