In [1]:
import os

import numpy as np
import pandas as pd
import scipy as sp
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style='ticks', font_scale=1.5)
mpl.rcParams["text.usetex"] = True
mpl.rcParams["mathtext.fontset"] = 'cm'
mpl.rcParams['font.family'] = ['sans-serif']

if not os.path.exists('../figures'):
    os.makedirs('../figures')

In [2]:
solvers = ['Nelder-Mead', 'L-BFGS-B', 'TNC', 'SLSQP', 'Powell', 'trust-constr', 'COBYLA', 'COBYQA']

## 2-arm

In [3]:
value_rs = np.load('../data/2arm/value_rs.npy')
value_as = np.load('../data/2arm/value_as.npy')
values = value_rs + value_as
pis = np.exp(values) / np.sum(np.exp(values), axis=-1, keepdims=True)

for vtag in ['cvx', 'cvx_truc', 'mc']:
    htvalue_rs = np.load(f'../outputs/2arm/htvalue_rs_{vtag}.npy')
    htvalue_as = np.load(f'../outputs/2arm/htvalue_as_{vtag}.npy')
    htvalues = htvalue_rs + htvalue_as
    htpis = np.exp(htvalues) / np.sum(np.exp(htvalues), axis=-1, keepdims=True)
    kl = np.mean(np.sum(sp.special.kl_div(pis, htpis), axis=-1), axis=-1)
    print(f'{vtag}: {np.median(kl)} ({np.quantile(kl, 0.25)}-{np.quantile(kl, 0.75)})')
print('\n')

for solver in solvers:
    print(solver)
    s_tag = ''.join(solver.split('-')).lower()
    for vtag in [f'cvx_{s_tag}', f'cvx_truc_{s_tag}', f'direct_{s_tag}']:
        htvalue_rs = np.load(f'../outputs/2arm/htvalue_rs_{vtag}.npy')
        htvalue_as = np.load(f'../outputs/2arm/htvalue_as_{vtag}.npy')
        htvalues = htvalue_rs + htvalue_as
        htpis = np.exp(htvalues) / np.sum(np.exp(htvalues), axis=-1, keepdims=True)
        kl = np.mean(np.sum(sp.special.kl_div(pis, htpis), axis=-1), axis=-1)
        print(f'{vtag}: {np.median(kl)} ({np.quantile(kl, 0.25)}-{np.quantile(kl, 0.75)})')

cvx: 0.02242608209281092 (0.011870819473506983-0.07053034610659645)
cvx_truc: 0.01817947172723365 (0.010087837439312904-0.047579025796336716)
mc: 0.008596597310924659 (0.004947742760619697-0.013954268770744087)


Nelder-Mead
cvx_neldermead: 0.01940771069752556 (0.010464272407273503-0.036239824342810145)
cvx_truc_neldermead: 0.01848211294985041 (0.009796204900992145-0.034346544507991614)
direct_neldermead: 0.016629341161132603 (0.010730745872806162-0.025358163521944106)
L-BFGS-B
cvx_lbfgsb: 0.019342195466025183 (0.010464055968345494-0.03603443198356199)
cvx_truc_lbfgsb: 0.018482048886885948 (0.009753355229524805-0.03430199019505391)
direct_lbfgsb: 0.016632923934841375 (0.010564418283496285-0.025649939796666275)
TNC
cvx_tnc: 0.019342835274099104 (0.010464129002342087-0.03603324584700444)
cvx_truc_tnc: 0.018482046543328363 (0.009798447827468179-0.034156294050318906)
direct_tnc: 0.016739086011041977 (0.010286747854079493-0.02564594341880462)
SLSQP
cvx_slsqp: 0.019346054180760686 (0.0104299

In [4]:
alpha_rs = np.load('../data/2arm/alpha_rs.npy')
alpha_as = np.load('../data/2arm/alpha_as.npy')
alphas = np.hstack((alpha_rs, alpha_as))

for tag in ['mc']:
    htalpha_rs = np.load(f'../outputs/2arm/htalpha_rs_{tag}.npy')
    htalpha_as = np.load(f'../outputs/2arm/htalpha_as_{tag}.npy')
    htalphas = np.hstack((htalpha_rs, htalpha_as))
    alpha_res = np.sqrt(np.sum((alphas - htalphas) ** 2, axis=-1))
    print(f'{tag}: {np.median(alpha_res)} ({np.quantile(alpha_res, 0.25)}-{np.quantile(alpha_res, 0.75)})')
print('\n')

for solver in solvers:
    print(solver)
    s_tag = ''.join(solver.split('-')).lower()
    for tag in [f'cvx_{s_tag}', f'cvx_truc_{s_tag}', f'direct_{s_tag}']:
        htalpha_rs = np.load(f'../outputs/2arm/htalpha_rs_{tag}.npy')
        htalpha_as = np.load(f'../outputs/2arm/htalpha_as_{tag}.npy')
        htalphas = np.hstack((htalpha_rs, htalpha_as))
        alpha_res = np.sqrt(np.sum((alphas - htalphas) ** 2, axis=-1))
        print(f'{tag}: {np.median(alpha_res)} ({np.quantile(alpha_res, 0.25)}-{np.quantile(alpha_res, 0.75)})')

mc: 0.4774536991156728 (0.3769632153487179-0.5805996486930426)


Nelder-Mead
cvx_neldermead: 0.8294340118749506 (0.6104819450727579-1.0156932215305012)
cvx_truc_neldermead: 0.7031852839355582 (0.4976829885483294-0.9075001586252932)
direct_neldermead: 0.810723399763808 (0.6019972059715849-1.010859650690306)
L-BFGS-B
cvx_lbfgsb: 0.730906573083064 (0.5025720872042513-0.9143003658273203)
cvx_truc_lbfgsb: 0.7305070102721555 (0.5162117877740944-0.9350273704580165)
direct_lbfgsb: 0.7593066508879704 (0.563811928243529-0.9684300977546219)
TNC
cvx_tnc: 0.7396419649763584 (0.5306422087131474-0.9402262244633479)
cvx_truc_tnc: 0.7117100900247206 (0.49655872549238905-0.921239126139576)
direct_tnc: 0.7334929517095451 (0.5264033008363382-0.9407176747204503)
SLSQP
cvx_slsqp: 0.7404865569407447 (0.5039779171002968-0.9328198868729132)
cvx_truc_slsqp: 0.7291241015307623 (0.5175521865645635-0.9355909213328777)
direct_slsqp: 0.7747617569090743 (0.5727235245687228-0.9986455576320088)
Powell
cvx_powell: 0.778

In [5]:
beta_rs = np.load('../data/2arm/beta_rs.npy')
beta_as = np.load('../data/2arm/beta_as.npy')
betas = np.hstack((beta_rs, beta_as))

for tag in ['mc']:
    htbeta_rs = np.load(f'../outputs/2arm/htbeta_rs_{tag}.npy')
    htbeta_as = np.load(f'../outputs/2arm/htbeta_as_{tag}.npy')
    htbetas = np.hstack((htbeta_rs, htbeta_as))
    beta_res = np.sqrt(np.sum((betas - htbetas) ** 2, axis=-1))
    print(f'{tag}: {np.median(beta_res)} ({np.quantile(beta_res, 0.25)}-{np.quantile(beta_res, 0.75)})')
print('\n')

for solver in solvers:
    print(solver)
    s_tag = ''.join(solver.split('-')).lower()
    for tag in [f'cvx_{s_tag}', f'cvx_truc_{s_tag}', f'direct_{s_tag}']:
        htbeta_rs = np.load(f'../outputs/2arm/htbeta_rs_{tag}.npy')
        htbeta_as = np.load(f'../outputs/2arm/htbeta_as_{tag}.npy')
        htbetas = np.hstack((htbeta_rs, htbeta_as))
        beta_res = np.sqrt(np.sum((betas - htbetas) ** 2, axis=-1))
        print(f'{tag}: {np.median(beta_res)} ({np.quantile(beta_res, 0.25)}-{np.quantile(beta_res, 0.75)})')

mc: 1.1745131143494416 (0.840706573944782-1.5932698398721574)


Nelder-Mead
cvx_neldermead: 1.998632939293199 (1.4107477291548858-2.801341544806119)
cvx_truc_neldermead: 2.0211977569610977 (1.3150393528707138-2.996207735186613)
direct_neldermead: 2.0189802282547906 (1.4378153655649966-2.7331124004827134)
L-BFGS-B
cvx_lbfgsb: 1.9958030660567154 (1.400839957898953-2.8414725147205435)
cvx_truc_lbfgsb: 2.1133495617152946 (1.4338675231871045-2.9932061158818692)
direct_lbfgsb: 2.006757493201836 (1.3979147625383812-2.750587981621882)
TNC
cvx_tnc: 2.0078189832611253 (1.3964217032591297-2.831588067809371)
cvx_truc_tnc: 2.1298900628889625 (1.4432897515510317-3.031184720248518)
direct_tnc: 1.8358157629710576 (1.3062644147569658-2.582334781923806)
SLSQP
cvx_slsqp: 1.9954943556525786 (1.4069152478586018-2.801102949849411)
cvx_truc_slsqp: 2.0612023299096807 (1.4105094907680202-2.989903744919649)
direct_slsqp: 2.0503808087921485 (1.4493511264729655-2.7577275487166335)
Powell
cvx_powell: 2.00853626323

In [6]:
for logf in ['log_cvx', 'log_cvx_truc', 'log_mc']:
    df = pd.read_csv(f'../outputs/2arm/{logf}.csv')
    df['time'] *= 1000
    print(f'{logf}: {df['time'].describe()['50%']} ({df['time'].describe()['25%']}-{df['time'].describe()['75%']})')
print('\n')

for solver in solvers:
    print(solver)
    s_tag = ''.join(solver.split('-')).lower()
    for logf in [f'log_cvx_{s_tag}', f'log_cvx_truc_{s_tag}', f'log_direct_{s_tag}']:
        df = pd.read_csv(f'../outputs/2arm/{logf}.csv')
        if 'cvx' in logf:
            df['time'] = df['s1_time'] + df['s2_time']
        df['time'] *= 1000
        print(f'{logf}: {df['time'].describe()['50%']} ({df['time'].describe()['25%']}-{df['time'].describe()['75%']})')

log_cvx: 186.066522 (161.01731325-213.50452049999998)
log_cvx_truc: 5.865529 (4.86574875-7.516948)
log_mc: 2967.3807621002197 (2829.2201161384583-3176.9925355911255)


Nelder-Mead
log_cvx_neldermead: 214.9503733759155 (188.5601006124267-242.36353329858395)
log_cvx_truc_neldermead: 11.697701449645951 (10.6052183653564-13.27428346902465)
log_direct_neldermead: 4532.029867172241 (4060.0555539131165-4927.53928899765)
L-BFGS-B
log_cvx_lbfgsb: 300.60194121655275 (266.0239709883117-336.12088310644526)
log_cvx_truc_lbfgsb: 86.64922823986815 (82.03538234347528-104.94078083911124)
log_direct_lbfgsb: 1040.4133796691895 (764.5721435546875-1409.3273282051086)
TNC
log_cvx_tnc: 227.1252759854736 (200.4922174457702-254.73568212994377)
log_cvx_truc_tnc: 17.19650810162345 (15.607628432434026-19.2556656533508)
log_direct_tnc: 3025.087356567383 (2939.0783309936523-3042.7016615867615)
SLSQP
log_cvx_slsqp: 204.96542386267083 (178.34364410430902-232.90114655310052)
log_cvx_truc_slsqp: 11.73770294097895 (10.5

## 10-arm

In [7]:
value_rs = np.load('../data/10arm/value_rs.npy')
value_as = np.load('../data/10arm/value_as.npy')
values = value_rs + value_as
pis = np.exp(values) / np.sum(np.exp(values), axis=-1, keepdims=True)

for vtag in ['cvx', 'cvx_truc', 'mc']:
    htvalue_rs = np.load(f'../outputs/10arm/htvalue_rs_{vtag}.npy')
    htvalue_as = np.load(f'../outputs/10arm/htvalue_as_{vtag}.npy')
    htvalues = htvalue_rs + htvalue_as
    htpis = np.exp(htvalues) / np.sum(np.exp(htvalues), axis=-1, keepdims=True)
    kl = np.mean(np.sum(sp.special.kl_div(pis, htpis), axis=-1), axis=-1)
    print(f'{vtag}: {np.median(kl)} ({np.quantile(kl, 0.25)}-{np.quantile(kl, 0.75)})')
print('\n')

for solver in solvers:
    print(solver)
    s_tag = ''.join(solver.split('-')).lower()
    for vtag in [f'cvx_{s_tag}', f'cvx_truc_{s_tag}', f'direct_{s_tag}']:
        if vtag == 'direct_cobyqa':
            continue
        htvalue_rs = np.load(f'../outputs/10arm/htvalue_rs_{vtag}.npy')
        htvalue_as = np.load(f'../outputs/10arm/htvalue_as_{vtag}.npy')
        htvalues = htvalue_rs + htvalue_as
        htpis = np.exp(htvalues) / np.sum(np.exp(htvalues), axis=-1, keepdims=True)
        kl = np.mean(np.sum(sp.special.kl_div(pis, htpis), axis=-1), axis=-1)
        print(f'{vtag}: {np.median(kl)} ({np.quantile(kl, 0.25)}-{np.quantile(kl, 0.75)})')

cvx: 0.07837289201971652 (0.031999721700914086-0.17407202543062164)
cvx_truc: 0.07472319425318129 (0.028416029197078682-0.16691191240231876)
mc: 0.009067315138795615 (0.003314355932952438-0.01726269591416294)


Nelder-Mead
cvx_neldermead: 0.030410697362734315 (0.010485717701323581-0.07492210391441365)
cvx_truc_neldermead: 0.026141075494048607 (0.00971797180033388-0.05579119184990773)
direct_neldermead: 0.02339025963734761 (0.011019432721730848-0.039052969980530185)
L-BFGS-B
cvx_lbfgsb: 0.03074476492201665 (0.010594084295890972-0.07490017600780918)
cvx_truc_lbfgsb: 0.026141629055497807 (0.009718054987355312-0.05579151154459083)
direct_lbfgsb: 0.023990982012857417 (0.011168543625407985-0.04077897411542884)
TNC
cvx_tnc: 0.030586417842716554 (0.0105312868284293-0.07490018145672898)
cvx_truc_tnc: 0.026141629096494284 (0.009718054852853828-0.05579151119199764)
direct_tnc: 0.02306023698835538 (0.011059167191541801-0.03867621123233983)
SLSQP
cvx_slsqp: 0.030744797105636483 (0.01059408678147641

In [8]:
alpha_rs = np.load('../data/10arm/alpha_rs.npy')
alpha_as = np.load('../data/10arm/alpha_as.npy')
alphas = np.hstack((alpha_rs, alpha_as))

for tag in ['mc']:
    htalpha_rs = np.load(f'../outputs/10arm/htalpha_rs_{tag}.npy')
    htalpha_as = np.load(f'../outputs/10arm/htalpha_as_{tag}.npy')
    htalphas = np.hstack((htalpha_rs, htalpha_as))
    alpha_res = np.sqrt(np.sum((alphas - htalphas) ** 2, axis=-1))
    print(f'{tag}: {np.median(alpha_res)} ({np.quantile(alpha_res, 0.25)}-{np.quantile(alpha_res, 0.75)})')
print('\n')

for solver in solvers:
    print(solver)
    s_tag = ''.join(solver.split('-')).lower()
    for tag in [f'cvx_{s_tag}', f'cvx_truc_{s_tag}', f'direct_{s_tag}']:
        if tag == 'direct_cobyqa':
            continue
        htalpha_rs = np.load(f'../outputs/10arm/htalpha_rs_{tag}.npy')
        htalpha_as = np.load(f'../outputs/10arm/htalpha_as_{tag}.npy')
        htalphas = np.hstack((htalpha_rs, htalpha_as))
        alpha_res = np.sqrt(np.sum((alphas - htalphas) ** 2, axis=-1))
        print(f'{tag}: {np.median(alpha_res)} ({np.quantile(alpha_res, 0.25)}-{np.quantile(alpha_res, 0.75)})')

mc: 1.2159341667559196 (1.119488953204121-1.3169607849419531)


Nelder-Mead
cvx_neldermead: 2.305520320102847 (2.0484336695253944-2.5233804070384966)
cvx_truc_neldermead: 2.1895795651396845 (1.901825232772401-2.440427252614312)
direct_neldermead: 2.170913400270115 (1.9284446814016858-2.372537435349317)
L-BFGS-B
cvx_lbfgsb: 2.2603259386192596 (2.02389750451427-2.475886164910063)
cvx_truc_lbfgsb: 2.2092538536423048 (1.943587148112028-2.4556856212688913)
direct_lbfgsb: 1.8929545008567858 (1.7100176203252144-2.0564464725054568)
TNC
cvx_tnc: 2.2437982734305226 (1.9981840616610385-2.4701173554297804)
cvx_truc_tnc: 2.1533897184295894 (1.864335254206228-2.42193494121768)
direct_tnc: 1.8536532942062243 (1.6790838252147329-2.0385856287699347)
SLSQP
cvx_slsqp: 2.2618133969279306 (2.0207291237689478-2.4756742258344215)
cvx_truc_slsqp: 2.20925347079777 (1.9435779077179791-2.4582677520141085)
direct_slsqp: 1.875777262347937 (1.7016470619035045-2.060081859233997)
Powell
cvx_powell: 2.2676534254119387

In [9]:
beta_rs = np.load('../data/10arm/beta_rs.npy')
beta_as = np.load('../data/10arm/beta_as.npy')
betas = np.hstack((beta_rs, beta_as))

for tag in ['mc']:
    htbeta_rs = np.load(f'../outputs/10arm/htbeta_rs_{tag}.npy')
    htbeta_as = np.load(f'../outputs/10arm/htbeta_as_{tag}.npy')
    htbetas = np.hstack((htbeta_rs, htbeta_as))
    beta_res = np.sqrt(np.sum((betas - htbetas) ** 2, axis=-1))
    print(f'{tag}: {np.median(beta_res)} ({np.quantile(beta_res, 0.25)}-{np.quantile(beta_res, 0.75)})')
print('\n')

for solver in solvers:
    print(solver)
    s_tag = ''.join(solver.split('-')).lower()
    for tag in [f'cvx_{s_tag}', f'cvx_truc_{s_tag}', f'direct_{s_tag}']:
        if tag == 'direct_cobyqa':
            continue
        htbeta_rs = np.load(f'../outputs/10arm/htbeta_rs_{tag}.npy')
        htbeta_as = np.load(f'../outputs/10arm/htbeta_as_{tag}.npy')
        htbetas = np.hstack((htbeta_rs, htbeta_as))
        beta_res = np.sqrt(np.sum((betas - htbetas) ** 2, axis=-1))
        print(f'{tag}: {np.median(beta_res)} ({np.quantile(beta_res, 0.25)}-{np.quantile(beta_res, 0.75)})')

mc: 6.25123119754349 (5.779807382825902-6.723468382455703)


Nelder-Mead
cvx_neldermead: 12.219674087169118 (11.26189673854501-13.128446554977769)
cvx_truc_neldermead: 12.15566055749603 (11.137994148639548-13.151678178006643)
direct_neldermead: 10.885696709149984 (9.80900105380171-11.987402369014845)
L-BFGS-B
cvx_lbfgsb: 12.144143824536851 (11.1547431059587-13.0315893070069)
cvx_truc_lbfgsb: 11.728749604252542 (10.692677202170035-12.749155209734976)
direct_lbfgsb: 9.236782271463476 (8.29177824800636-10.168495811872944)
TNC
cvx_tnc: 11.910930821857137 (10.978117741949792-12.853869571424745)
cvx_truc_tnc: 11.957410271235045 (10.986521858493477-12.99669954270983)
direct_tnc: 9.232240058545898 (8.39782187673044-10.076535471838248)
SLSQP
cvx_slsqp: 12.012357917530892 (11.022769586184873-12.906287489535618)
cvx_truc_slsqp: 11.888367201716651 (10.911500940149528-12.91691009814173)
direct_slsqp: 9.276227506102888 (8.412306231762233-10.12324935613929)
Powell
cvx_powell: 12.220115972127287 (11.2

In [10]:
for logf in ['log_cvx', 'log_cvx_truc', 'log_mc']:
    df = pd.read_csv(f'../outputs/10arm/{logf}.csv')
    df['time'] *= 1000
    print(f'{logf}: {df['time'].describe()['50%']} ({df['time'].describe()['25%']}-{df['time'].describe()['75%']})')
print('\n')

for solver in solvers:
    print(solver)
    s_tag = ''.join(solver.split('-')).lower()
    for logf in [f'log_cvx_{s_tag}', f'log_cvx_truc_{s_tag}', f'log_direct_{s_tag}']:
        if logf == 'log_direct_cobyqa':
            continue
        df = pd.read_csv(f'../outputs/10arm/{logf}.csv')
        if 'cvx' in logf:
            df['time'] = df['s1_time'] + df['s2_time']
        df['time'] *= 1000
        print(f'{logf}: {df['time'].describe()['50%']} ({df['time'].describe()['25%']}-{df['time'].describe()['75%']})')

log_cvx: 282.6718865 (241.06386675000002-374.24183474999995)
log_cvx_truc: 33.0779125 (28.976156000000003-38.007451499999995)
log_mc: 13044.139981269836 (12232.116281986235-13640.235483646393)


Nelder-Mead
log_cvx_neldermead: 312.0142064960937 (270.9530122575988-409.90603745318595)
log_cvx_truc_neldermead: 41.01120025085444 (36.91343437338255-45.892186061645454)
log_direct_neldermead: 28473.435282707214 (28010.84530353546-28686.91712617874)
L-BFGS-B
log_cvx_lbfgsb: 455.8803753737792 (352.8158801717834-645.8015110236511)
log_cvx_truc_lbfgsb: 197.91939407916254 (141.88021883978266-348.46841962802125)
log_direct_lbfgsb: 5942.281246185303 (2795.0963377952576-11514.635622501373)
TNC
log_cvx_tnc: 324.63707273455805 (283.62374259332273-421.04866123840327)
log_cvx_truc_tnc: 50.737052849853455 (46.5575631517028-55.82175297308342)
log_direct_tnc: 49086.334347724915 (20681.030750274655-56594.25950050354)
SLSQP
log_cvx_slsqp: 301.26967359851074 (258.61124342959596-397.26296040771473)
log_cvx_truc