In [368]:
import numpy as np
from numpy import transpose
from numpy.linalg import inv, det
from scipy.stats import norm

In [34]:
def eta(T):
    """ Generates the cutoff probabilities for exploration rounds in interval chaining. """
    return np.array([pow(t, -1/3) for t in range(1,T+1)])

In [257]:
def beta(k, d, c):
    """ Generates the scaled down feature weights for a true model. """
    return np.random.uniform(0, c+1, size=(k, d))

In [499]:
def top_interval(c, k, d, _delta, T):
    """
    Simulates T rounds of interval chaining.
    """
    X = np.random.uniform(0, 1, size=(k, T, d))  # 3-axis ndarray
#     X = np.random.normal(0.5, 0.5, (k, T, d))
    _eta = eta(T)                             # exploration cutoff probabilities
    B = beta(k, d, c)                         # true parameters. B[i]: params for arm i
    Y = np.array([X[i].dot(transpose(B[i])) for i in range(k)])  # not sure if there's a cleaner way to do this
    picks = []
    for t in range(T):
        print('Iteration [{0} / {1}]'.format(t, T))
        r = np.random.randint(k)
        if r <= _eta[t]:
            # Play uniformly at random from [1, k].
            picks.append(np.random.randint(0,k))
            print('Exploration round.')
        else:
            intervals = []
            for i in range(k):
                # Compute beta hat.
                _Xti = X[i][:t+1]
                _XtiT = transpose(_Xti)
                try:
                    _XTX = inv(_XtiT.dot(_Xti))
                except:
                    print('Encountered singular matrix. Ignoring.')
                    continue
                _Yti = Y[i][:t+1]
                Bh_t_i = _XTX.dot(_XtiT).dot(_Yti)  # Compute OLS estimators.
                yh_t_i = Bh_t_i.dot(X[i][t])
                _s2 = np.var(Y[i][:t+1])
                w_t_i = norm.ppf(1 - _delta/(2*T*k), loc=0, scale=np.sqrt(_s2 * X[i][t].dot(_XTX).dot(transpose(X[i][t]))))
                intervals.append([yh_t_i - w_t_i, yh_t_i + w_t_i])
            # Pick the agent with the largest upper bound.
            picks.append(np.argmax(np.array(intervals)[:,1]) if intervals else np.random.randint(0,k))
            print(intervals)
    # Compute sum of best picks over each iteration.
    best = [transpose(Y)[i].max() for i in range(2, T)]
    performance = [Y[picks[t-2]][t] for t in range(2, T)]
#     print(picks)
#     print('Best: {0}'.format(sum(best)))
#     print('Algorithm: {0}'.format(sum(performance)))
    print('Cumulative Regret: {0}'.format(sum(best) - sum(performance)))
    print('Final Regret: {0}'.format(best[-1] - performance[-1]))
            

In [500]:
top_interval(c=10, k=2, d=10, _delta=0.05, T=1000)

Iteration [0 / 1000]
Exploration round.
Iteration [1 / 1000]
Exploration round.
Iteration [2 / 1000]
Encountered singular matrix. Ignoring.
[[nan, nan]]
Iteration [3 / 1000]
Encountered singular matrix. Ignoring.
[[9.2233746247920294, 34.188451500623408]]
Iteration [4 / 1000]
Exploration round.
Iteration [5 / 1000]
Exploration round.
Iteration [6 / 1000]
[[21.063700732907808, 61.529255257979514], [-24.237699237213519, 3.805615023058925]]
Iteration [7 / 1000]
Exploration round.
Iteration [8 / 1000]
Exploration round.
Iteration [9 / 1000]
[[9.3997345331739197, 54.237998280099198], [17.451629509059359, 53.266561338430691]]
Iteration [10 / 1000]
[[10.992785170369221, 53.510376200752219], [10.068875302837434, 41.790435474480653]]
Iteration [11 / 1000]
[[24.426036051891522, 55.354379035185126], [14.297205693038482, 46.770606078885194]]
Iteration [12 / 1000]
Exploration round.
Iteration [13 / 1000]
[[-2.3342648500295908, 46.753307739477094], [7.1414676742128513, 35.122224802539208]]
Iteration



[[28.751713141353409, 39.08919562516877], [26.848572000618841, 37.362968102263793]]
Iteration [230 / 1000]
Exploration round.
Iteration [231 / 1000]
Exploration round.
Iteration [232 / 1000]
[[31.572971246380064, 42.611196546076641], [28.46894866296336, 37.289074530770506]]
Iteration [233 / 1000]
[[18.061493073652152, 32.025570851763447], [22.779656536249732, 32.750879781756773]]
Iteration [234 / 1000]
Exploration round.
Iteration [235 / 1000]
[[21.320458133552531, 33.346562619663139], [25.980230290662323, 36.342706380976843]]
Iteration [236 / 1000]
[[27.375167906879387, 36.925692319700843], [30.10011165879488, 39.835736392358037]]
Iteration [237 / 1000]
[[31.041622729410836, 41.512171493644708], [14.488426325451455, 21.249043260204999]]
Iteration [238 / 1000]
[[27.245949809528582, 40.473642875152777], [27.524108879542748, 35.40219038198817]]
Iteration [239 / 1000]
[[37.527128695305016, 49.392940267199059], [11.65267175868858, 22.860433035738101]]
Iteration [240 / 1000]
Exploration rou

### Testing

In [359]:
tmp = np.array([[ 0.41617394,  0.54593447,  0.10577638,  0.66642839,  0.21852587,
         0.12408503,  0.19740174,  0.16025822,  0.34112945,  0.79106745],
       [ 0.09355889,  0.68958286,  0.77347255,  0.02070936,  0.07084052,
         0.5991455 ,  0.45762994,  0.50857438,  0.17216335,  0.11820663]])

In [362]:
transpose(tmp).dot(tmp)

array([[ 0.18195401,  0.29172031,  0.11638661,  0.27928767,  0.09757253,
         0.10769634,  0.12496881,  0.11427695,  0.1580766 ,  0.34028094],
       [ 0.29172031,  0.77356897,  0.59112039,  0.37810705,  0.16815121,
         0.48090276,  0.42334218,  0.43819466,  0.30495522,  0.51338426],
       [ 0.11638661,  0.59112039,  0.60944843,  0.0865105 ,  0.07790807,
         0.47654786,  0.37484464,  0.41031986,  0.16924706,  0.17510583],
       [ 0.27928767,  0.37810705,  0.0865105 ,  0.44455568,  0.14709891,
         0.09510171,  0.14103135,  0.11733288,  0.23090374,  0.52963779],
       [ 0.09757253,  0.16815121,  0.07790807,  0.14709891,  0.05277194,
         0.06955957,  0.07555613,  0.07104824,  0.08674175,  0.18124252],
       [ 0.10769634,  0.48090276,  0.47654786,  0.09510171,  0.06955957,
         0.37437242,  0.29868152,  0.3245957 ,  0.14547995,  0.1689826 ],
       [ 0.12496881,  0.42334218,  0.37484464,  0.14103135,  0.07555613,
         0.29868152,  0.24839261,  0.26437411

In [191]:
k = 5
d = 10
T = 50
B = beta(k, d)
X = np.random.uniform(size=(k, T, d))
Y = np.array([X[i].dot(transpose(B[i])) for i in range(k)])

In [156]:
s2 = np.var(Y[0])
s2

0.54880669730882059

In [192]:
xTx = transpose(X[0][:10]).dot(X[0][:10])
Bh = inv(xTx).dot(transpose(X[0][:10])).dot(Y[0][:10])

In [152]:
Bh.dot(X[0][9])

2.9963963502844737

In [153]:
B[0].dot(X[0][9])

2.9963963502845434

In [90]:
#X[0].dot(np.transpose(sample_b[0]))

In [280]:
np.random.normal(0.5, 0.2)

0.691813602964692

In [438]:
sum([1,2])

3

In [498]:
[1,2,3][-1]

3