In [1]:
cd /home/tkim/Academics/EdgeAlloc/

/home/tkim/Academics/EdgeAlloc


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm, tqdm_notebook

# Import tools
import tools.solver_utils as ts
import tools.other_utils as to
import tools.sim_utils as tm

# Import classes
from classes.User import *
from classes.Server import *

import pdb

np.set_printoptions(suppress=True, precision=4)

In [4]:
def make_user_copies(Users, scale_settings, T):
    
    lst_lu = []
    lst_regret = []
    lst_collision_loss = []
    lst_collision_rate = []
    
    for i in range(len(scale_settings)):
        lst_lu += [copy.deepcopy(Users)]
        lst_regret += [np.zeros(T)]
        lst_collision_loss += [np.zeros(T)]
        lst_collision_rate += [np.zeros(T)]
    
    return lst_lu, lst_regret, lst_collision_loss, lst_collision_rate

def solve_dist_many(servers, opt_reward, lst_lu, scale_settings, mu2, C2, t, 
                    lst_regret, lst_collision_loss, lst_collision_rate):
    
    for i in range(len(lst_lu)):
        usersR = lst_lu[i]
        lrn_x_R = tm.dist_usr_arms(usersR)
        lrn_reward_R = ts.calculate_obj_val(lrn_x_R, mu2, C2)
        lst_regret[i][t] = opt_reward - lrn_reward_R
        tm.dist_receive_rewards(servers, usersR, lrn_x_R, rsv_flag = True, wait_scale = scale_settings[i])
        
        lst_collision_loss[i][t], lst_collision_rate[i][t] = tm.characterize_collision(lrn_x_R, mu2, C2)
    
    return
    
def update_usr_loc(users, lst_lu):
    
    for j in range(len(lst_lu)):
        usersR = lst_lu[j]
        for i in range(len(usersR)):
                usersR[i].next_step()
                usersR[i].usr_place = copy.deepcopy(users[i].usr_place)
                
    return

def record_stats(cumul_reg, cumul_coll_pc, exp_wait, trial,
                lst_regret, lst_collision_rate):
    
    num_U = len(lst_lu[0])
    
    for i in range(len(lst_lu)):
        cumul_reg[i,trial] = np.cumsum(lst_regret[i])[-1]
        cumul_coll_pc[i,trial] = (np.cumsum(lst_collision_rate[i])[-1])/num_U
        
        collect_timehist = []
        usersR = lst_lu[i]
        for j in range(num_U):
            a = usersR[j].waittime_history
            collect_timehist += [np.mean(a[a>0])]

        exp_wait[i,trial] = np.mean(collect_timehist)
    return
    

In [5]:
scale_settings = [0.5,1,1.5,2,3,5,10]
num_trials = 10

cumul_reg = np.zeros([len(scale_settings),num_trials])
cumul_coll_pc = np.zeros([len(scale_settings),num_trials])
exp_wait = np.zeros([len(scale_settings),num_trials])

cumul_reg_dist = np.zeros(num_trials)
cumul_coll_dist = np.zeros(num_trials)

In [6]:
# testing user markov chain 
U = 7
K = 4
T = 4000

space_1d_dim = 6
num_locs = 9
cap_multiply = 2

# Generate visitng locations and server locations
usr_locs = tm.gen_eq_locs(space_1d=space_1d_dim, nums=num_locs, offset = 1.7)
svr_locs = tm.gen_eq_locs(space_1d=space_1d_dim, nums=K, offset = 1.7)
C = np.ones(K) * cap_multiply
C2 = np.append(C,np.array([1]))

for tri in range(num_trials):

    mu = np.random.uniform(low=0.0, high=1.0, size=[U,K])
    mu2 = np.append(mu, np.zeros([U,1]), axis = 1)

    servers = []
    for s in range(len(svr_locs)):
        servers += [Server(C[s], s, mu, svr_locs[s])]

    users = []
    for i in range(U):
        users += [User(servers, T, usr_locs, lat_dist = 4.5, self_weight = 0.25, threshold_dist=3.5)]

    # Multiple users with different wait time scalings
    lst_lu, lst_regret, lst_collision_loss, lst_collision_rate = make_user_copies(users, scale_settings, T)

    # Naive
    regret_dist = np.zeros(T)
    collision_loss = np.zeros(T)
    collision_rate = np.zeros(T)

    # Central solver
    UCB = np.ones_like(mu)
    T_ka = np.ones_like(mu)
    rewards_sum = np.ones([U,K]) # Just for calculating UCB idx
    regret_central = np.zeros(T)

    pbar = tqdm(total=T - K)

    for t in range(T - K):

        # Obtain New Arm availability
        L = to.obtain_L_users(U,K,users)

        # Optimal placement and reward
        opt_x, opt_reward = ts.solve_optimization(mu, C, L)

        # Solve for Central
        lrn_x, _ = ts.solve_optimization(UCB, C, L)
        lrn_reward = ts.calculate_obj_val(lrn_x, mu, C)
        regret_central[t] = opt_reward - lrn_reward
        UCB, T_ka, rewards_sum, mu_bar = ts.play_UCB(UCB, T_ka, rewards_sum, t+1, mu, lrn_x)

        # Solve Naive - Distributed 
        lrn_x_dist = tm.dist_usr_arms(users)
        lrn_reward_dist = ts.calculate_obj_val(lrn_x_dist, mu2, C2)
        regret_dist[t] = opt_reward - lrn_reward_dist
        tm.dist_receive_rewards(servers, users, lrn_x_dist)
        # Obtain Collision Information for Dist Methods
        collision_loss[t], collision_rate[t] = tm.characterize_collision(lrn_x_dist, mu2, C2)

        # Solve Reserve - Distributed
        solve_dist_many(servers, opt_reward, lst_lu, scale_settings, mu2, C2, t, 
                    lst_regret, lst_collision_loss, lst_collision_rate)
        
        

        # Update for Next Step
        for u in users:
            u.next_step()
        update_usr_loc(users, lst_lu)
        pbar.update(1)
        
    record_stats(cumul_reg, cumul_coll_pc, exp_wait, tri,
                lst_regret, lst_collision_rate)
    cumul_reg_dist[tri] = np.cumsum(regret_dist)[-1]
    cumul_coll_dist[tri] = (np.cumsum(collision_rate)[-1])/U

    pbar.close()

  0%|          | 0/3996 [00:00<?, ?it/s]

Restricted license - for non-production use only - expires 2024-10-28


100%|██████████| 3996/3996 [04:00<00:00, 16.60it/s]
100%|██████████| 3996/3996 [03:50<00:00, 17.32it/s]
100%|██████████| 3996/3996 [01:22<00:00, 48.58it/s]
100%|██████████| 3996/3996 [03:33<00:00, 19.61it/s]
100%|██████████| 3996/3996 [01:23<00:00, 47.59it/s]
100%|██████████| 3996/3996 [02:31<00:00, 26.46it/s]
100%|██████████| 3996/3996 [03:31<00:00, 19.17it/s]
100%|██████████| 3996/3996 [04:22<00:00, 15.21it/s]
100%|██████████| 3996/3996 [03:26<00:00, 19.33it/s]
100%|██████████| 3996/3996 [05:43<00:00, 10.43it/s]


In [7]:
print(np.mean(cumul_reg, axis = 1))

[3604.9937 3086.8736 2937.0466 2937.2423 2874.1348 2785.2783 3205.6403]


In [8]:
np.mean(cumul_coll_pc, axis=1)

array([620.1706, 436.8145, 365.5006, 315.058 , 247.6585, 185.659 ,
       149.0452])

In [9]:
np.mean(exp_wait, axis=1)

array([ 1.8909,  4.2783,  6.184 ,  8.5484, 12.8645, 21.1511, 41.6044])

In [10]:
np.mean(cumul_reg_dist)

4075.080462156966

In [11]:
np.mean(cumul_coll_dist)

750.1841581428755

In [12]:
2785/4075

0.6834355828220859