### Reimplementation of BASEFunc
From this repo, for multi armed batch bandit
https://github.com/Mathegineer/batched-bandit/blob/master/BASEFunc.m

#### TODO: Verify the below works with synthetic data

In [None]:
import numpy as np

def BASEFunc(mu, K, T, M, gridType, gamma):
    # record
    regret = 0
    if gridType == 'minimax':
        a = T**(1/(2 - 2**(1-M)))
        TGrid = np.floor(a**(2 - 1/2**(np.arange(0, M))))
        TGrid[M] = T
        TGrid = np.concatenate(([0], TGrid))  # minimax batch grids
    elif gridType == 'geometric':
        b = T**(1/M)
        TGrid = np.floor(b**(np.arange(1, M+1)))
        TGrid[M] = T
        TGrid = np.concatenate(([0], TGrid))  # geometric batch grids
    elif gridType == 'arithmetic':
        TGrid = np.floor(np.linspace(0, T, M+1))
    
    # initialization
    activeSet = np.ones(K, dtype=bool)
    numberPull = np.zeros(K)
    averageReward = np.zeros(K)
    
    for i in range(1, M+1):
        availableK = np.sum(activeSet)
        pullNumber = max(np.floor((TGrid[i] - TGrid[i-1])/availableK), 1)
        TGrid[i] = availableK * pullNumber + TGrid[i-1]
        for j in np.where(activeSet)[0]:
            reward = np.mean(np.random.randn(int(pullNumber))) + mu[j]
            averageReward[j] = averageReward[j] * (numberPull[j]/(numberPull[j] + pullNumber)) + (reward * pullNumber/(numberPull[j] + pullNumber))
            regret += pullNumber * (mu[0] - mu[j])
            numberPull[j] += pullNumber
        maxArm = np.max(averageReward[activeSet])
        for j in np.where(activeSet)[0]:
            if (maxArm - averageReward[j]) >= np.sqrt(gamma * np.log(T*K) / numberPull[j]):
                activeSet[j] = False
    
    return regret, activeSet