What we need to develop:
1. parameters dictionary -> class on its own

```python
'key':{'values','prior','prior_sample_weight',0:{'V_lambda','V','freq'}, 1:...}
    
value_priors_dict = {'param_1':(values_1, priors_1), ..., 'param_m':(values_m, priors_m)}

```

2. Methods: init, sample_params, sample_key, update_params, update_key, evaluate_params

3. Evaluation instance is a class that has a method evaluate_params and is passed already instantiated to the BayesianHPTuning class

In [23]:
class BayesHPTuning():
    
    def __init__(self, value_priors_dict, evaluation_instance, N=2, lambdas=[5e-3,1e-3,5e-4]):
        
        self.params = {}
        for param in value_priors_dict.keys():
            self.params[param] = BayesParam(*value_priors_dict[param], prior_sample_weight=N)
            
        self.eval = evaluation_instance
        self.lambdas = lambdas
        self.history = []
    
    def step(self):
        HPs = self.sample_params()
        V, V_lambda = self.evaluate_params(HPs)
        self.update_params(V, V_lambda)
        # here can be implemented conditions for logging, saving and stopping the whole thing
        return
    
    def sample_params(self):
        HPs = {}
        for param in self.params:
            value = self.params[param].sample()
            HPs[param] = value
        return HPs
    
    def evaluate_params(self, HPs):
        # both of shape (len(lambdas), n_epochs)
        train_V_lambda, val_V_lambda = self.eval.evaluate_params(HPs, self.lambdas)
        
        d = dict(HPs=HPs, lambdas=self.lambdas, train_V_lambda=train_V_lambda, val_V_lambda=val_V_lambda)
        self.history.append(d)
        
        V = val_V_lambda.mean()
        V_lambda = val_V_lambda.mean(axis=1)
        
        return V, V_lambda
    
    def update_params(self, V, V_lambda):
        for param in value_priors_dict.keys():
            self.params[param].update_stat(V, V_lambda)
        return

In [24]:
class BayesParam():
    def __init__(self, values, priors, prior_sample_weight):
        self.values = values
        self.priors = priors
        self.N = prior_sample_weight
        self.global_V = []
        self.last_sampled = None
        self.stat = {}
        for idx in range(len(values)):
            self.stat[idx] = {'V_lambda':[], 'V':[], 'freq':0}
            
    def sample(self):
        probs = self.get_updated_sampling_probs()
        idx = np.random.choice(np.arange(len(self.values)), p=probs)
        value = self.values[idx]
        self.last_sampled = idx
        return value
        
    def get_updated_sampling_probs(self):
        expected_global_V = np.mean(self.global_V)
        advantages = []
        for idx in self.stat:
            if self.stat['freq'] != 0:
                expected_Vj = np.mean(self.stat[i]['V'])
                adv_j = expected_Vj - expected_global_V
            else:
                adv_j = 0 # every value would do
            biased_adv_j = (self.N*self.priors[idx]+self.stat['freq']*adv_j)/(self.N+self.stat['freq'])
            advantages.append(biased_adv_j)
        # sampling probs are the softmax of the biased advantages 
        advantages = np.array(advantages)
        probs = np.exp(advantages)/np.exp(advantages).sum()
        return probs
    
    def update_stat(self, V, V_lambda):
        idx = self.last_sampled
        self.stat[idx]['V'].append(V)
        self.stat[idx]['V_lambda'].append(V_lambda)
        self.stat[idx]['freq'] += 1
        return