In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-00
import sys, os
from setting import *

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
% matplotlib inline

## combinatorial bandit
- Combinatorial Bandit
- http://homes.dsi.unimi.it/~cesabian/Pubblicazioni/comband.pdf

In [2]:
class ComBand:
    def __init__(self, pipeline, decoder):
        # Pipeline Parameter
        self.decoder = decoder
        self.allalgs = sum(pipeline, [])
        self.algsInEachSteps = [len(step) for step in pipeline]
        self.NPipelineSteps = len(self.algsInEachSteps)

        # Combinatorial Bandit parameter
        self.K = len(self.allalgs)
        self.p = np.zeros(self.K)
        self.q = np.zeros(self.K)+(1.0/self.K)
        self.gamma = 0.2
        self.mu = 0.2

        # index for choice pipelines
        self.step_index = [0]
        for i, ni in enumerate(self.algsInEachSteps):
            self.step_index += [ni + self.step_index[i]]

        print("COMBAND, gamma:", self.gamma, self.step_index)

    def initialize(self, setting=None):
        if setting is None:
            pass
        else:
            # TODO
            ## 実験設定を外部ファイルに出力して
            ## 実験のパラメータを読み出せるようにする
            pass

    def save_parameter(self):
        filename = get_id(prefix="ComBandParams")
        # TODO
        ## パラメータの保存形式を決める

    def get_E(self, k_vector):
        edge = np.zeros([self.K, self.K])
        node = np.argwhere(k_vector==1).flatten().tolist()

        for i in range(self.NPipelineSteps-1):
            node_i, node_j = node[i], node[i+1]
            edge[node_i, node_j] = 1
            edge[node_j, node_i] = 1

        return edge

    def get_next_path(self):
        self.p = (1.0 - self.gamma)*self.q + self.gamma/self.K
        pathI = []

        for i in range(self.NPipelineSteps):
            left, right = self.step_index[i], self.step_index[i+1]
            temp = weighted_random_choice(self.p[left:right],\
                                          itemset=self.allalgs[left:right])
            pathI += [temp]

        return pathI

    def update(self, path, score):
        # calc loss for every edge
        k_vector = np.array(self.decoder.decode(path)) # 1s
        Eps = self.get_E(k_vector) # E{ps}
        ones_ones_T = self.NPipelineSteps # [1s.1s^T]
        InvP = np.linalg.pinv(Eps*ones_ones_T)

        # TODO
        ## lossが最小になるように計算を行っている
        xhat = (-1.0)*score*(InvP.dot(k_vector))

        # update parameter
        for k in range(self.K):
            q_tk = self.q[k] # q_t(k)
            v_k = np.eye(self.K, dtype=int)[k] # v(k)
            lt_vk = xhat.dot(v_k) # l_t v(k)
            exp = np.exp((-1)*self.mu*lt_vk)
            self.q[k] = q_tk * exp


## Decoder
パイプラインから $\{0, 1\}^d$ vector を作成

In [3]:
# {0, 1}^d vectorを取得
class VectorDecoder:
    def __init__(self, pipeline):
        self.algnames = sum(pipeline, [])
        self.algInEachSteps = [len(step) for step in pipeline]
        self.d = len(self.algnames)
        self.noneIndex = [i for i, x in enumerate(self.algnames) if x==NONE_NAME]
        print("EXISTING ALGORITHMS:", self.algnames)

    def decode(self, alglist):
        temp = np.zeros(self.d)

        for i, algname_i in enumerate(alglist):
            if algname_i==NONE_NAME:
                # (None in i step) = 1
                temp[self.noneIndex[i]] = 1
            else:
                # (algname in algnames) = 1
                temp[self.algnames.index(algname_i)] = 1

        return temp

## Bandit Food
### decoder のテスト + ComBandのテスト

In [4]:
from food.experimental_settings import *
from utils import *
from tqdm import tqdm

# import pipeline definition
pipeline = get_exp_setting(0)
print('Pipeline:', pipeline)

EXPERIMENTAL SETTINGS: 0
Pipeline: [['raw food', 'peel'], ['diced', 'grind', 'chopped'], ['wash', 'wash & boil'], ['simmer', 'bake', 'gril']]


In [5]:
# vector decoder
decoder = VectorDecoder(pipeline)

EXISTING ALGORITHMS: ['raw food', 'peel', 'diced', 'grind', 'chopped', 'wash', 'wash & boil', 'simmer', 'bake', 'gril']


In [6]:
# setting comband
bandit = ComBand(pipeline, decoder)

COMBAND, gamma: 0.2 [0, 2, 5, 7, 10]


### run bandit food

In [9]:
# history
score = 0
score_history = []
experiment_name = get_id(prefix="ComBand", suffix="")
logfile = open("report/"+experiment_name+".txt", 'w')

# exp setting
food = "yatsuhashi"
review = 0
metricfunc = FIVE_STAGE_EVALUATION

# random search
print("Start ComBand Pipeline Search!")
for i in tqdm(range(10)):

    # select pipeline
    pathI = bandit.get_next_path()

    # try to eval score
    # score = get_pipeline_score(pathI, food, review, metricfunc)
    score = np.random.randint(50, 100)
    logfile.write(str(score)+"\n")

    # update bandit parameter
    # FIX: socre have None if y has the same values.
    if score is not None:
        bandit.update(pathI, score)
        score_history += [score]

    # progress
    if i%10==0 and i>0:
        print("  Best score:", np.max(score_history), np.mean(score_history))

# close logfile
logfile.close()

100%|██████████| 10/10 [00:00<00:00, 1044.74it/s]

Start ComBand Pipeline Search!



