In [33]:
import numpy as np
from numpy import transpose
from numpy.linalg import inv, det
from scipy.stats import norm

In [41]:
def ridgeFair(X, Y, k, d, _delta, T, _lambda):
    """
    Simulates T rounds of ridgeFair.
    
    :param X: a 3-axis (T, k, d) ndarray of d-dimensional context vectors for each
              time-step and arm
    :param Y: a T x k ndarray of reward function output for each context vector
    :param k: the number of arms
    :param d: the number of features
    :param _delta: confidence parameter
    :param T: the number of iterations
    :param _lambda:   
    """
    picks = []
    for t in range (1,50): # for t >= 1 # right now its 1 - 49
        for i in range(k): # for 1 <= i <= k
            R = 1
            intervals = []
#             try:
            X_i = X[t,:i] # design matrix (taking all of axis i from X?)
            Y_i = Y[t,:i] # sem with Y
            Xti = X[:t+1,i] # feature vector for arm i in round t

            _X_iT = transpose(X_i)
            _idenD = np.identity(d)
            V_it = _X_iT.dot(X_i)+(_lambda*_idenD) # computing V_it as line 5

            B_it = inv(V_it).dot(_X_iT).dot(Y_i) # computing line 6
            y_ti = transpose(Xti).dot(B_it) #computing line 7

            _V_itI = inv(V_it) # inverse of V_it
            _Wti1 = norm(Xti,_V_itI) # computing first part of W_ti
            _Wti2 = R*sqrt(d*log((1+(t/_lambda))/_delta))+sqrt(_lambda) # computing second part of W_ti
            W_ti = _Wti1*_Wti2 # computing W_ti as line 8

            intervals.append([yh_t_i - w_t_i, yh_t_i + w_t_i]) # line 9
#             except:
#                     print('Error in assigning interval value.')
#                     intervals = None
#                     break
            if not intervals:
                picks.append(np.random.randint(0,k))
            else:
                i_st = np.argmax(np.array(intervals)[:,1]) # line 10
                chain = compute_chain(i_st, np.array(intervals), k) # line 11
                picks.append(np.random.choice(chain)) # play uniformly random from chain
            

In [42]:
def compute_chain(i_st, intervals, k):
    # Sort intervals by decreasing order.
    chain = [i_st]
    print(intervals[:,1])
    ordering = np.argsort(intervals[:,1])[::-1]
    intervals = intervals[ordering,:]
    
    lowest_in_chain = intervals[0][0]
    for i in range(1, k):
        if intervals[i][1] >= lowest_in_chain:
            chain.append(i)
            lowest_in_chain = min(lowest_in_chain, intervals[i][0])
        else:
            return chain
    return chain

In [43]:
def beta(k, d, c):
    """ 
    Generates the scaled down feature weights for a true model from the distribution
    β ∼ U[0, c]^d.
    
    :param k: the number of arms 
    :param d: the number of features
    :param c: the scale of the feature weights
    """
    return np.random.uniform(0, c+1, size=(k, d))

In [40]:
k = 2
c = 10
d = 10
T = 1000
X = np.random.uniform(0, 1, size=(T, k, d)) # 3-axis ndarray
B = beta(k, d, c)                           # true parameters. B[i]: params for arm i
Y = np.array([np.diag(X[t].dot(transpose(B))) for t in range(T)])
ridgeFair(X, Y, k, d, 0.05, T, 1)

ValueError: shapes (10,2) and (10,) not aligned: 2 (dim 1) != 10 (dim 0)