### Import libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, Attention, MultiHeadAttention, Flatten, Reshape, Concatenate, RepeatVector
from tensorflow import math
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD

### Set up simulation

- Rc: Resources in substrate networks (min resources, max resources, number of nodes)
- ns: Number of demanding slices
- nv: Number of demanding VNFs per slice

In [None]:
####
#parameters
Rc = np.random.randint(10,30,100)
ns = 20
nv = 10

### Model building

In [None]:
####
#parameters
n = Rc.shape[0] + ns                       #input shape
R_dense = [Rc.shape[0] for _ in range(3)]  #dense block for substrate network
r_dense = [nv for _ in range(3)]           #dense block for demanding slices
decis_dense = [n*3 for _ in range(3)]      #dense block for decision making
n_ah = 5                                   #number of attention heads
ah_size = nv                               #size of attention heads


####
#construct model
R_in = tf.keras.Input(shape=(Rc.shape[0]))
r_in = tf.keras.Input(shape=(ns,nv))
msk = tf.keras.Input(shape=(ns))
#substrate dense
Rx = R_in
for nn in R_dense:
    Rx = Dense(nn, activation='relu')(Rx)
Rx = RepeatVector(ns)(Rx)
#request dense
rx = r_in
for nn in r_dense:
    rx = Dense(nn, activation='relu')(rx)
#attention
mha = MultiHeadAttention(num_heads=n_ah, key_dim=ah_size, attention_axes=1)
rxa = mha(rx, rx)
#concatenation
x = tf.concat([Rx,rx,rxa], axis=-1)
#decision dense
for nn in decis_dense:
    x = Dense(nn, activation='relu')(x)
#output layer
q = Dense(units=1, activation='linear')(x)
#loss
msk_q = Layer(name='msk_q')(Flatten()(q) * msk)
# Build and Compile model
model = Model(inputs=[R_in,r_in,msk], outputs=[q, msk_q])
model.compile(loss={'msk_q': 'mse'}, optimizer='adam')

In [None]:
### data storage to use in simulation
XR = []        #resources
Xr = []        #requested slices
Qs = []         #predicted Q
prd_acts = []   #predicted actions
Ps = []
accs = []

### Running simulation

In [None]:
####
#parameters
reward = 0.2
penalty = -0.2
gamma = 0.9 #discount factor
batch_size = 256 #batch size for experience relay; number of previous actions to sample
learning_rate = 1e-6


####
#functional codes
for ep in range(200):
####one episode
    #relay data
    Rx = [] #current resources
    rx = [] #current request slices
    ax = [] #current action taken
    Rs = [] #rewards
    
    #generate slices
    rq = np.random.randint(1,29,(ns,nv))
    
    #select and allocate slices
    Rt = Rc.copy()
    rt = rq.copy()
    allocated_slices = []
    unallocable_slices = set()
    allocated = np.zeros(rq.shape)
    unallocable = np.zeros(rq.shape)
    slice_mapping = {}

    while len(allocated_slices) + len(unallocable_slices) < ns:
        mask = np.ones((1,ns)) #dummy mask of 1
        qs = model.predict([Rt.reshape(1,-1), rt.reshape(1,ns,nv), mask])[0] #current q values
        sorted_si = np.argsort(-qs.flatten())
        for si in sorted_si:
            if (si in allocated_slices) or (si in unallocable_slices):
                continue
            #allocate selected slice
            Rt, rt, allocated, unallocable, s0 = allocate_slice(Rt, rt, si, allocated, unallocable)
            #update environment data
            slice_mapping.update(s0)
            allocated_slices.append(si)
            unallocable_slices.update(np.argwhere(unallocable.sum(axis=1)!=0).flatten())
            #update relay data
            Rx.append(Rt)
            rx.append(rt)
            ax.append(si)
            #rewards
            if len(allocated_slices) < ns: #episode has not ended
                Rs.append(len(allocated_slices)*reward)
            else:
                Rs.append(len(allocated_slices)*reward + len(unallocable_slices)*penalty)
            break
                
#     #calculate a Qs
    rr = Rs.copy()
    for i in range(-2,-len(rr)-1,-1):
        rr[i] = rr[i] + rr[i+1]*gamma
    
    rr = rr[:len(allocated_slices)]
    
    Qs.append(rr)
    prd_acts.append(ax)
    Ps.append(slice_mapping)
    XR.append(Rx)
    Xr.append(rx)
    
    batch_indices = [-1]
    if len(XR) > batch_size:
        batch_indices += list(np.random.choice(np.arange(len(XR)),batch_size-1))

    Res = np.vstack(slice_eps(XR,batch_indices))
    Req = np.vstack(slice_eps(Xr,batch_indices)).reshape(Res.shape[0],-1,nv)
    Acts = slice_eps(prd_acts, batch_indices)
    Qy = slice_eps(Qs, batch_indices)
    masks = gen_mask(Acts, len(Acts), ns)
    ys = gen_y(Qy, masks)
    
    model.fit([Res,Req,masks], ys, epochs=1, verbose=True)

### evaluation simulation

In [None]:
agents = []
max_dem = []
min_dem = []
tot_dem = []
ind_dem = []

for _ in range(100):
    rq = np.random.randint(1,29,(ns,nv))
    agents.append(len(agent_allocate(Rc,rq,model)[0]))
    max_dem.append((allocate_slices_max_dem(Rc,rq)[1].sum(axis=1)==0).sum())
    min_dem.append((allocate_slices_min_dem(Rc,rq)[1].sum(axis=1)==0).sum())
    tot_dem.append((allocate_slices_tot_dem(Rc,rq)[1].sum(axis=1)==0).sum())
    ind_dem.append((allocate_all_vnfs(Rc,rq)[1].sum(axis=1)==0).sum())
    
np.mean(agents), np.mean(max_dem), np.mean(min_dem), np.mean(tot_dem), np.mean(ind_dem)