In [72]:
import numpy as np
import itertools
import random

In [73]:
class User():

    def __init__(self, locs, svr_locs, w, idx, 
                 max_dist = 7, threshold_dist = 6, self_weight = 0.5, P = None, ceiling = 20):
        # max dist - reward range
        # threshold dist - used for generating markov chain
        
        self.idx = idx
        self.locs = locs
        self.dists = self.get_dists()
        self.svr_locs = svr_locs
        self.ceiling = ceiling
        self.w = w # True weights
        self.t = 0 # Time-steps past
        self.mode = "blind" # oracle
        
        if P is None:
            self.P = self.make_P(threshold_dist, self_weight)
        else:
            self.P = P
            
        self.reward_dists = self.get_reward_dists()
        self.reward_scale = self.get_scales(max_dist)
        self.usr_place = self.init_loc()
        self.expected_time = self.get_expected_time()
        
        # Initialize learning parameters
        self.ucb_raw = np.zeros(len(svr_locs))
        self.pulls = np.zeros(len(svr_locs))
        self.param_summed = np.zeros(len(svr_locs))
        self.max_logs = np.zeros(len(svr_locs)) # Threshold value UCB idx must exceed to pull arm
        self.wait_times = np.zeros(len(svr_locs))
        
        
    
    def make_P(self, threshold_dist, self_weight):
        # Creating Markov Transition Probability Matrix 
        
        P = np.zeros(self.dists.shape)
        locs = self.locs
        for i in range(len(locs)):
            cut_list = self.dists[i,:]
            others = np.squeeze(np.argwhere((cut_list > 0) * (cut_list < threshold_dist) == True))
            num_others = others.shape[0]
        
            # Draw values to make up row of MC
            self_transition = np.random.exponential(scale=1/self_weight)
            others_transition = np.random.exponential(scale=1/((1-self_weight)*num_others),size=num_others)
            total = self_transition + np.sum(others_transition)
            
            P[i,i] = self_transition/total
            
            idx = 0
            for j in others:
                P[i,j] = others_transition[idx]/total
                idx += 1
            
        return P
    
    def get_dists(self):
        # Obtaining distance matrix (from loc to loc) 
        
        locs = self.locs
        
        num_locs = len(locs)
        dists = np.zeros([num_locs,num_locs])
        
        for i,j in itertools.product(range(num_locs), range(num_locs)):
            if dists[i,j] == 0 and i != j:
                a = np.array(locs[i])
                b = np.array(locs[j])
                dists[i,j] = np.linalg.norm(a-b)
                dists[j,i] = dists[i,j]
        
        return dists
    
    def get_reward_dists(self):
        
        locs = self.locs
        svr_locs = self.svr_locs
        
        dists = np.zeros([len(locs),len(svr_locs)])
        
        for i,j in itertools.product(range(len(locs)), range(len(svr_locs))):
            a = np.array(locs[i])
            b = np.array(svr_locs[j])
            dists[i,j] = np.linalg.norm(a-b)
        
        return dists
    
    def get_scales(self,max_dist):
        # Mapping reward to [0,1] based on distance and max acceptable distance
        
        reward_scale = np.ones(self.reward_dists.shape) - self.reward_dists/max_dist
        reward_scale[reward_scale < 0] = 0
        
        return reward_scale
    
    def init_loc(self):
        # Initial location user takes 
        curr_loc = np.random.randint(0, len(self.locs)-1)
        return curr_loc
    
    def next_loc(self):
        # Update user location based on markov chain
        weights = self.P[self.usr_place]
        population = range(weights.shape[0])
        self.usr_place =  random.choices(population, weights)[0]
        self.expected_time = self.get_expected_time()
        
    def get_expected_time(self):
        # Get number of expected ts user will stay at this location
        try:
            curr_prob = np.ceil( 1/(1 - self.P[self.usr_place, self.usr_place]) )
        except:
            curr_prob = self.ceiling
        
        return curr_prob
    
    def update_ucb(self, L=2):
        """
        Update decision variables for next round
        """

        reward_record = self.rewards_scaled
        pulls_record = self.pulls
        ucb = np.zeros(self.ucb_raw.shape)
        
        for s in range(reward_record.shape[0]):
            if pulls_record[s] > 0:
                mean = reward_record[s]/pulls_record[s]
            else:
                mean = 0

            cb = np.sqrt(L * np.log(self.t)/ pulls_record[s])

            ucb[s] = mean + cb

        self.ucb_raw = ucb
    
    def choose_arm(self):
        # Choose an arm to pull based on collision restriction and UCB info
        
        if self.mode is "blind": 
            ucb_scaled =  self.reward_scale[self.usr_place] * self.ucb_raw
        else:
            ucb_scaled = self.reward_scale[self.usr_place] * self.w
            
        for i in range(ucb_scaled.shape[0]):
            if self.wait_times[i] > 0 and ucb_scaled[i] > self.max_logs[i]:
                ucb_scaled[i] = -10 # Force arm out of consideration
        
        arm_id = np.argmax(ucb_scaled)
        
        return arm_id
    
    def receive_reward(self, arm_id, reward, collision_flag, max_reward, wait_time, chosen_idx):
        # Return information from server transaction
        if not collision_flag:
            scale = self.reward_scale[self.usr_place,arm_id]
            self.pulls[arm_id] += 1
            self.param_summed[arm_id] += reward/scale
            self.t += 1 # only update time used in UCB index when success
        elif chosen_idx != self.idx:
            self.max_logs[arm_id] = max_reward # Threshold value UCB idx must exceed to pull arm
            self.wait_times[arm_id] = wait_time
        else: # This arm is reserved
            pass
        
        self.update_waittime(arm_id, wait_time, max_reward)
    
    def update_waittime(self, arm_id, wait_time, max_reward):
        self.wait_times -= 1
        self.wait_times[self.wait_times < 0] = 0
        self.max_logs[self.wait_times <= 0] = 0
    


In [1]:
class Server():
    def __init__(self, loc, w, s_idx):
        self.locs = locs
        self.w = w
        self.s_idx = s_idx
       
    def receive_users(self, user_list, scales_list, w_est_list, stay_times_list):
        
        # if 1 pull
        if len(user_list) == 1:
            reserve_id = user_list[0]
            reward = np.array([scales_list[0] *  np.random.binomial(n=1,p=self.w[reserve_id, self.s_idx])])
            reserve_max_val = scales_list[0]* w_est_list[0]
            reserve_time = stay_times_list[0]
            collision_flag = False
        elif len(user_list) > 1:
            collision_flag = True
            reward = np.zeros(len(user_list))
            reserve_max_val_list = np.zeros(len(user_list))
            for i in range(len(user_list)):
                reward[i] = scales_list[i] *  np.random.binomial(n=1,p=self.w[user_list[i], self.s_idx])
                reserve_max_val_list[i] = scales_list[i] * w_est_list[i]
            
            reserve_id = user_list[np.argmax(reserve_max_val)]
            reserve_max_val = reserve_max_val_list[np.argmax(reserve_max_val)]
            reserve_time = stay_times_list[np.argmax(reserve_max_val)]
            
                
        else: # no users pull this arm
            reserve_id, reserve_max_val, reserve_time, reward, collision_flag = None, None, None, None, False
            
        
        return reserve_id, reserve_max_val, reserve_time, reward, collision_flag

SyntaxError: invalid syntax (<ipython-input-1-396dd768da70>, line 6)

In [75]:
locs = [(0,0),(2,2),(4,4),(5,5)]
svr_locs = [(1,1),(3,3),(6,6)]
w = np.zeros(3)
u = User(locs,svr_locs,w)

In [24]:
        self.ucb_raw = np.zeros(len(svr_locs))
        self.pulls = np.zeros(len(svr_locs))
        self.max_logs = np.zeros(len(svr_locs)) # Threshold value UCB idx must exceed to pull arm
        self.wait_times = np.zeros(len(svr_locs))
        

array([0.39840772, 0.22612264, 0.37546964, 0.        ])

In [25]:
u.reward_scale

array([[0.79796949, 0.39390847, 0.        ],
       [0.79796949, 0.79796949, 0.19187796],
       [0.39390847, 0.79796949, 0.59593898],
       [0.19187796, 0.59593898, 0.79796949]])

In [26]:
u.dists/7

array([[0.        , 0.40406102, 0.80812204, 1.01015254],
       [0.40406102, 0.        , 0.40406102, 0.60609153],
       [0.80812204, 0.40406102, 0.        , 0.20203051],
       [1.01015254, 0.60609153, 0.20203051, 0.        ]])

In [27]:
np.random.randint(0, 9)

2

In [41]:
weights = u.P[u.usr_place]
population = range(weights.shape[0])
random.choices(population, weights)[0]

3

In [31]:
population

range(0, 4)

In [33]:
weights

array([0.14406385, 0.66682787, 0.03831985, 0.15078843])

In [71]:
for i in range(10):
    u.next_loc()
    print(u.usr_place)

2
2
1
0
2
2
2
2
1
3


In [98]:
a = np.array([3,2,3])

In [99]:
np.argmax(a)

0

In [100]:
a-=1

In [101]:
a[a>1] = 0

In [102]:
a

array([0, 1, 0])