### Reimplementation of BASEFunc
From this repo, for multi armed batch bandit
https://github.com/Mathegineer/batched-bandit/blob/master/BASEFunc.m

#### TODO: Verify the below works with synthetic data


parameters

%     K: arms

%     TSeq: horizon

%     M: number of batches

%     b = T^(1/M); TGridAdaptive = floor(b.^(1:M));...,

%         TGridAdaptive = floor(TGridAdaptive/K) * K; TGridAdaptive(M) = T; ...,

%         TGridAdaptive = [0,TGridAdaptive]; % adaptive batch grids

%     a = T^(1/(2 - 2^(1-M))); TGridMinimax = floor(a.^(2.-1./2.^(0:M-1)));...,

%     TGridMinimax(M) = T; ...,

%     TGridMinimax = [0,TGridMinimax]; % minimax batch grids    

%     mu: batch mean

%     gamma: tunning parameter

In [3]:
K = 3 # number of arms
T = 10 # time horizon
M = 5 # number of batches
gridType = 'geometric' # grid type
mu = [0.7] + [0.5] * (K-1) # means
gamma = 0.5



In [6]:
import numpy as np
np.random.seed(3)
def BASEFunc(mu, K, T, M, gridType, gamma):
    # record
    regret = 0
    if gridType == 'minimax':
        a = T**(1/(2 - 2**(1-M)))
        TGrid = np.floor(a**(2 - 1/2**(np.arange(0, M))))
        TGrid[M-1] = T
        TGrid = np.concatenate(([0], TGrid))  # minimax batch grids
    elif gridType == 'geometric':
        b = T**(1/M)
        TGrid = np.floor(b**(np.arange(0, M)))
        TGrid[M-1] = T
        TGrid = np.concatenate(([0], TGrid))  # geometric batch grids
    elif gridType == 'arithmetic':
        TGrid = np.floor(np.linspace(0, T, M+1))
    
    # initialization
    activeSet = np.ones(K, dtype=bool)
    numberPull = np.zeros(K)
    averageReward = np.zeros(K)
    print(TGrid)
    for i in range(0, M):
        availableK = np.sum(activeSet)
        print(availableK)
        pullNumber = max(np.floor((TGrid[i] - TGrid[i-1])/availableK), 1)
        print(pullNumber)
        TGrid[i] = availableK * pullNumber + TGrid[i-1]
        print(TGrid)
        print(np.where(activeSet)[0])
        for j in np.where(activeSet)[0]:
            reward = np.mean(np.random.randn(int(pullNumber))) + mu[j]
            print(np.mean(np.random.randn(int(pullNumber))))
            print(reward)
            averageReward[j] = averageReward[j] * (numberPull[j]/(numberPull[j] + pullNumber)) + (reward * pullNumber/(numberPull[j] + pullNumber))
            print(averageReward)
            regret += pullNumber * (mu[0] - mu[j])
            print(regret)
            numberPull[j] += pullNumber
        maxArm = np.max(averageReward[activeSet])
        for j in np.where(activeSet)[0]:
            if (maxArm - averageReward[j]) >= np.sqrt(gamma * np.log(T*K) / numberPull[j]):
                activeSet[j] = False
    
    return regret, activeSet

In [7]:
BASEFunc(mu, K, T, M, gridType, gamma)

[ 0.  1.  1.  2.  3. 10.]
3
1
[13.  1.  1.  2.  3. 10.]
[0 1 2]
0.43650985051198943
2.4886284734303183
[2.48862847 0.         0.        ]
0.0
-1.8634927033644908
0.5964974680720087
[2.48862847 0.59649747 0.        ]
0.19999999999999996
-0.35475897926898675
0.22261179748560095
[2.48862847 0.59649747 0.2226118 ]
0.3999999999999999
1
1
[13. 14.  1.  2.  3. 10.]
[0]
-0.6270006768238473
0.6172585185175402
[1.5529435  0.59649747 0.2226118 ]
0.3999999999999999
1
1
[13. 14. 15.  2.  3. 10.]
[0]
-0.47721803035950267
0.6561818310240717
[1.25402294 0.59649747 0.2226118 ]
0.3999999999999999
1
1
[13. 14. 15. 16.  3. 10.]
[0]
0.8846223804995846
-0.6138647533626822
[0.78705102 0.59649747 0.2226118 ]
0.3999999999999999
1
1
[13. 14. 15. 16. 17. 10.]
[0]
1.7095730636529485
1.58131804220753
[0.94590442 0.59649747 0.2226118 ]
0.3999999999999999


(0.3999999999999999, array([ True, False, False]))