In [1]:
cd /home/ubuntu/DistMEC/

/home/ubuntu/DistMEC


In [2]:
import numpy as np
import matplotlib.pyplot as plt

from classes.User import *
from classes.solver import *
from classes.Server import *
from classes.distributed_utils import *

from tqdm import tqdm, tqdm_notebook
from time import sleep
import pickle
import os

In [14]:
# Fix P
def fix_P(user):
    P = user.P
    
    for i in range(user.P.shape[1]):
        tot = np.sum(user.P[i])
        if tot != 1.0:
            user.P[i] = user.P[i]/tot
        
    return


def obtain_w_stationary(Users, num_users, num_svrs):
    
    w_curr = np.zeros([num_users,num_svrs])
    for i in range(num_users):
        w_curr[i] = Users[i].stationary_reward_scale
    
    return w_curr

def extract_centralized_case(Users, num_users, num_svrs):
    
    rewards_record = np.zeros([num_users,num_svrs])
    pulls_record = np.zeros([num_users,num_svrs])
    ucb = np.zeros([num_users,num_svrs])
    
    for i in range(num_users):
        rewards_record[i] = Users[i].param_summed
        pulls_record[i] = Users[i].pulls
        ucb[i] = Users[i].ucb_raw 
    
    return rewards_record, pulls_record, ucb

In [18]:
# System Parameters
T = 5000
num_users = 10
num_svrs = 16
num_locs = 16
mu = np.random.uniform(low=0, high = 1, size = [num_users, num_svrs])
ceiling = 20
# w = np.random.uniform(low=0, high=1, size= [num_users, num_svrs])
space_1d_dim = 10 #4 for small, 10 for big

num_explore = 1
trials = 1

# result dictionaries
result_dicts = []
performance_list = []
user_dicts = []

save_name = 'results/22_06_16_ceiling_20/'
isExist = os.path.exists(save_name)
if not isExist:
  # Create a new directory because it does not exist 
    os.makedirs(save_name)

for tri in range(trials):
    
    print("trial:", tri)
    result_dict = {}
    user_dict = {}
    
    # Generate visitng locations and server locations
    usr_locs = gen_eq_locs(space_1d=space_1d_dim, nums=num_locs)
    svr_locs = gen_eq_locs(space_1d=space_1d_dim, nums=num_svrs)

    # Create Users
    Users_w = []
    for i in range(num_users):
        Users_w += [User(usr_locs,svr_locs,mu[i],i,
                      max_dist = 4, threshold_dist = 4, self_weight = 1 - 0.95, P = None, ceiling = 1,
                      sticky_mode = False, kick_mode=True)]
            
    # Create Servers
    Servers = []
    for i in range(num_svrs):
        Servers += [Server(svr_locs[i],mu,i)]

    # Recorded Values - reservation
    regret_w = np.zeros(T)
    collision_count_w = np.zeros(T)
    optimal_reward = np.zeros(T)

    # Explore rounds are common/shared across all users
    explore_rounds(Users_w, num_users, Servers, mu, regret_w, collision_count_w, optimal_reward_w,
                   usr_move_flag = True, rounds=num_explore)
    
    # Distributed - true w learning
    Users_s = copy.deepcopy(Users_w)
    regret_s = copy.deepcopy(regret_w)
    collision_count_s = copy.deepcopy(collision_count_w)
    for u in Users_s:
        u.sticky_mode = True
        u.kick_mode = True #false
        
    # Centralized - true w learning
    rewards_record_ct, pulls_record_ct, ucb_ct = extract_centralized_case(Users_w, num_users, num_svrs)
    regret_ct = copy.deepcopy(regret_w)
    
    # Centralized - stationary w learning
    rewards_record_cs, pulls_record_cs, ucb_cs = extract_centralized_case(Users_w, num_users, num_svrs)
    regret_cs = copy.deepcopy(regret_w)
    
    # Centralized - stationary w, known mu
    w_stat = obtain_w_stationary(Users_w, num_users, num_svrs)
    optimal_stat_arms = offline_optimal_action(w_stat,mu)[0]
    regret_cst = copy.deepcopy(regret_w)
    
    round_start = ((num_svrs)*num_explore)+1
    for (zzz,t) in zip(tqdm_notebook(range(T-round_start)),range(round_start, T)):
        w = obtain_w(Users_w, len(Users_w), len(Servers))
        optimal = offline_optimal_action(w, mu)
        optimal_reward[t] = optimal[1]
        
        # Distributed solution 
        play_round(Users_w, Servers, mu, regret_w, collision_count_w, 
                   usr_move_flag = True, debugger = False, reservation_mode = True, optimal =optimal)
        play_round(Users_s, Servers, mu, regret_s, collision_count_s, 
                   usr_move_flag = False, debugger=False, reservation_mode = True, optimal=optimal)
        copy_usr_loc(Users_w, Users_s)
        
        # Centralized Solution -  true w learning
        ucb_ct = update_ucb(rewards_record_ct, pulls_record_ct, ucb_ct, t, 1)
        arms = offline_optimal_action(w, ucb_ct)[0]
        rewards_record_ct, pulls_record_ct = pull_super_arm(arms, mu, rewards_record_ct, pulls_record_ct)
        regret_ct[t] = optimal[1] - expected_reward(arms, mu, w)
        
        # Centralized Solution -  stationary w learning
        ucb_cs = update_ucb(rewards_record_cs, pulls_record_cs, ucb_cs, t, 1)
        arms = offline_optimal_action(w_stat, ucb_cs)[0]
        rewards_record_cs, pulls_record_cs = pull_super_arm(arms, mu, rewards_record_cs, pulls_record_cs)
        regret_cs[t] = optimal[1] - expected_reward(arms, mu, w)
        
        # centralized solution - stationary w, known mu
        regret_cst[t] = optimal[1] - expected_reward(optimal_stat_arms, mu, w) 
        
    # Obtain reward values
    reward_w = optimal_reward - regret_w
    reward_s = optimal_reward - regret_s
    reward_cs = optimal_reward - regret_cs
    reward_cst = optimal_reward - regret_cst
    reward_ct = optimal_reward -regret_ct
    
    threshold = 1

    # reward[reward <=  0] = threshold
    reward_w[reward_w <= 0] = reward_s[reward_w <=  0]# threshold
    
    # Log results
    result_dict["reward_w"] = reward_w
    result_dict["reward_s"] = reward_s
    result_dict["reward_cs"] = reward_cs
    result_dict["reward_ct"] = reward_ct
    result_dict["reward_cst"] = reward_cst
    
    result_dict["regret_w"] = regret_w
    result_dict["regret_s"] = regret_s
    result_dict["regret_cs"] = regret_cs
    result_dict["regret_ct"] = regret_ct
    result_dict["regret_cst"] = regret_cst
    
    result_dict["collision_w"] = collision_count_w
    result_dict["collision_s"] = collision_count_s
    
    if np.cumsum(regret)[-1] > np.cumsum(regret_s)[-1]:
        performance_list += [1]
    else:
        performance_list += [0]

trial: 0


HBox(children=(IntProgress(value=0, max=4983), HTML(value='')))




KeyboardInterrupt: 

In [21]:
fix_P(Users_w[0])

In [12]:
optimal_stat

([6, 2, 14, 7, 5, 13, 1, 9, 10, 8], 6.743168732199361)

In [160]:
P = np.array([[0.7, 0.2, 0.1],[0.4,0.6,0],[0,1,0]])

In [161]:
evals, evecs = np.linalg.eig(Users[0].P.T)
evec1 = evecs[:,np.isclose(evals, 1)]

#Since np.isclose will return an array, we've indexed with an array
#so we still have our 2nd axis.  Get rid of it, since it's only size 1.
evec1 = evec1[:,0]

stationary = evec1 / evec1.sum()

#eigs finds complex eigenvalues and eigenvectors, so you'll want the real part.
stationary = stationary.real

In [162]:
s2 = stationary.reshape([1,stationary.shape[0]])

In [163]:
r2 = Users[0].reward_scale

In [174]:
true_scaling = np.matmul(s2, Users[0].reward_scale)

In [178]:
stationary

array([0.01418415, 0.04076948, 0.07075752, 0.11449394, 0.01854684,
       0.17635562, 0.00885521, 0.01378636, 0.17506885, 0.07949673,
       0.03025502, 0.01886457, 0.01227082, 0.16134263, 0.01981257,
       0.0451397 ])

In [12]:
w2 = obtain_w_stationary(Users_w,  num_users, num_svrs)

In [13]:
w2

array([[0.68966886, 0.70309462, 0.66513838, 0.58121115, 0.73974057,
        0.77837824, 0.72120058, 0.62917617, 0.72003593, 0.75355825,
        0.7159357 , 0.63020159, 0.64226993, 0.68186278, 0.65710858,
        0.57300329],
       [0.61681633, 0.68513614, 0.69012004, 0.63406074, 0.68178213,
        0.74668027, 0.74460606, 0.69157697, 0.70395873, 0.73939451,
        0.73369552, 0.6761866 , 0.61230602, 0.6642079 , 0.66341463,
        0.61814077],
       [0.57644049, 0.6620544 , 0.67724389, 0.6350632 , 0.65112569,
        0.75178164, 0.77793392, 0.72907838, 0.67127607, 0.77159917,
        0.80676505, 0.76935531, 0.62287109, 0.71523508, 0.7371109 ,
        0.68963639],
       [0.59506308, 0.6564949 , 0.67512521, 0.625099  , 0.66596378,
        0.73424469, 0.73989736, 0.69225131, 0.6884668 , 0.74150859,
        0.75422249, 0.69209454, 0.62961885, 0.68181368, 0.68199131,
        0.63592823],
       [0.662027  , 0.68969299, 0.66908196, 0.58861805, 0.70488892,
        0.76752797, 0.73191875, 

In [6]:
Users_s[0].mu.shape

(16,)