In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
workdir = 'ReLUBandit'
os.chdir(workdir)

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from tqdm.auto import tqdm
import torch

use_cuda = torch.cuda.is_available()

# Define functions

In [8]:
#Data generation
def gen_data(d, N, T, B, k, sig=0.05, seed=123):
    # N is the number of pieces the whole space is discretized into
    np.random.seed(seed)
    data_B = []
    for b in tqdm(range(B), desc='Data Generation'):
        X = np.random.multivariate_normal(np.zeros(d), np.eye(d), (T, N))
        X = X / np.linalg.norm(X, axis=2, keepdims=True)
        Err = sig * np.random.randn(T)
        # Create ReLU parameters
        theta = np.random.multivariate_normal(np.zeros(d), np.eye(d), k)
        theta = theta / np.linalg.norm(theta, axis=1, keepdims=True)
        data_B.append([X, Err, theta])
    return data_B

def rwdfun(x, theta, err=0):
    # theta is k by d
    return np.sum(np.maximum(theta @ x, 0)) + err

def alg_OFUL(data_B, paras, b):
    from algbandit import OFUL
    import numpy as np
    from tqdm.auto import tqdm

    d, lam, seed = list(paras.values())
    np.random.seed(seed)
    X, Err, theta = data_B[b]
    T = len(Err)
    rgt = np.zeros(T)
    alg = OFUL(d=d, lam=lam)
    for t in tqdm(range(T), leave=False):
        x = X[t] # N by d
        err = Err[t]
        x_alg = alg.choose_action(t, x)
        rwd = rwdfun(x_alg, theta, err)
        alg.update_model(rwd)
        rgt[t] = np.amax(np.apply_along_axis(rwdfun, 1, x, theta)) - rwdfun(x_alg, theta)
    return rgt

def alg_OFUReLU(data_B, paras, b):
    from algbandit import OFUReLU
    import numpy as np
    from tqdm.auto import tqdm

    d, k, lam, te, seed = list(paras.values())
    np.random.seed(seed)
    torch.manual_seed(seed)
    X, Err, theta = data_B[b]
    T = len(Err)
    rgt = np.zeros(T)
    alg = OFUReLU(d=d, k=k, lam=lam, te=te)
    for t in tqdm(range(T), leave=False):
        x = X[t] # N by d
        err = Err[t]
        x_alg = alg.choose_action(t, x)
        rwd = rwdfun(x_alg, theta, err)
        alg.update_model(t, rwd)
        rgt[t] = np.amax(np.apply_along_axis(rwdfun, 1, x, theta)) - rwdfun(x_alg, theta)
    return rgt

def alg_NeuralUCB(data_B, paras, b):
    from algbandit import NeuralUCBDiag
    import numpy as np
    from tqdm.auto import tqdm

    d, N, k, lam, nu, l, seed = list(paras.values())
    np.random.seed(seed)
    torch.manual_seed(seed)
    X, Err, theta = data_B[b]
    T = len(Err)
    rgt = np.zeros(T)
    alg = NeuralUCBDiag(dim=d, lamdba=lam, nu=nu, layer=l, hidden=k)
    for t in tqdm(range(T), leave=False):
        x = X[t] # N by d
        err = Err[t]
        x_ind, _, _, _ = alg.select(x)
        x_alg = x[x_ind]
        rwd = rwdfun(x_alg, theta, err)
        if t<2000:
            loss = alg.train(x_alg, rwd)
        else:
            if t%100 == 0:
                loss = alg.train(x_alg, rwd)
        rgt[t] = np.amax(np.apply_along_axis(rwdfun, 1, x, theta)) - rwdfun(x_alg, theta)
    return rgt

def run_bdtalg(bdtalg, data_B, paras, savepath):
    # Run in a loop
    rgt = []
    for b in tqdm(range(len(data_B)), desc=bdtalg.__name__):
        rgt.append(bdtalg(data_B, paras, b))
    # Save results
    np.save(savepath, np.array(rgt))

# Run experiments

In [None]:
d = 2; k = 3; N = 500;
T = 1000; sig = 0.01
B = 50; seed = 123
data_B = gen_data(d, N, T, B, k, sig, seed=seed)
# Savepath
presavepath = 'Interput/sim_d'+str(d)+'k'+str(k)+'N'+str(N)+'T'+str(T)+'_'

In [None]:
# Run simulations
run_bdtalg(alg_OFUL, data_B, paras={'d':d, 'lam':1e-2, 'seed':seed}, savepath=presavepath+'OFUL')
run_bdtalg(alg_OFUReLU, data_B, paras={'d':d, 'k':k, 'lam':1e-2, 'te':20, 'seed':seed}, savepath=presavepath+'OFUReLU')
run_bdtalg(alg_NeuralUCB, data_B, paras={'d':d, 'N':N, 'k':20, 'lam':1e-2, 'nu':sig, 'l':2, 'seed':seed}, savepath=presavepath+'NeuralUCBfull')
run_bdtalg(alg_NeuralUCB, data_B, paras={'d':d, 'N':N, 'k':k, 'lam':1e-2, 'nu':sig, 'l':1, 'seed':seed}, savepath=presavepath+'NeuralUCBtrue')
run_bdtalg(alg_NeuralUCB, data_B, paras={'d':d, 'N':N, 'k':2*k, 'lam':1e-2, 'nu':sig, 'l':1, 'seed':seed}, savepath=presavepath+'NeuralUCBtruewd')