In [1]:
#!pip install --upgrade tf_slim

import gym
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tf_slim as slim
import numpy.random as rnd
import numpy as np
import random
import matplotlib as mpl
import matplotlib.pyplot as plt
from tensorflow import keras
from collections import deque # container em forma de lista, com rapida insercao e remocao nas duas extremidades 
from ns3gym import ns3env

#from sklearn.preprocessing import label_binarize
#from keras.layers import Dense, Flatten, Reshape

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
seed = 42
np.random.seed(seed)
tf.random.set_random_seed(seed)#tf.random.set_seed(seed)
random.seed(seed)

In [3]:
env = gym.make('ns3-v0') # environment ID 

ob_space = env.observation_space
ac_space = env.action_space
print("Observation space: ", ob_space,  ob_space.dtype)
print("Action space: ", ac_space, ac_space.dtype)

# numero de estados
state_size = ob_space.shape[0]

# numero de acoes
action_size = ac_space.shape[0]

print("\n")
print("Numero de Estados: ", state_size)
print("Numero de Acoes: ", action_size) 

Got new port for ns3gm interface:  5205
Observation space:  Box(0, 100, (5,), uint64) uint64
Action space:  Box(0, 100, (5,), uint64) uint64


Numero de Estados:  5
Numero de Acoes:  5


In [4]:
numOfEpisodes = 50 #300 #200
numTimeSlots = 50 #600 #495

discount_rate = 0.618 # gamma 0.618; 0.528
learning_rate = 0.7 # Learning rate 0.7

# Epsilon greedy parameters
epsilon = 1 # Epsilon-greedy algorithm in initialized at 1 meaning every step is random at the start
max_epsilon = 1 # You can't explore more than 100% of the time
min_epsilon = 0.01 # At a minimum, we'll always explore 1% of the time
decay = 0.01
epsilon_vec = np.zeros((numOfEpisodes,))

maxNofCollisions = 3

batch_size = 128 # tamanho do batch 128

MIN_REPLAY_SIZE = 1000 # tamanho minimo da memoria de repeticao

inputQueues = 1
cwSize = 100

In [5]:
def createModel(inputQueues, cwSize, loss='mse'):
    learning_rate = 0.01
    init = tf.keras.initializers.he_uniform()#tf.keras.initializers.HeUniform()
    model = keras.Sequential()
    # Input layer - número dos estados de entrada 
    model.add(keras.layers.Dense(inputQueues, input_shape=(inputQueues,), activation='relu', kernel_initializer=init))
    # Output layer - número de ações no ambiente
    model.add(keras.layers.Dense(cwSize, activation='linear', kernel_initializer=init))
    #model.add(keras.layers.Flatten())
    
    if(loss=='huber'):
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss=tf.keras.losses.Huber(), metrics=['accuracy'])
        print('Using Huber loss...', "\n")
    elif(loss=='mse'):
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mse', metrics=['accuracy'])
        print('Using MSE loss...', "\n")
    else:
        print('Loss not defined...', "\n")
    return model

def train(replay_memory, model, model_target, done, batch_size, acc_history=[], loss_history=[]):
    mini_batch = random.sample(replay_memory, batch_size)
    c_states = np.array([transition[0] for transition in mini_batch])    
    current_qs_list  = model.predict(c_states)
    #current_qs_list0 = model.predict(c_states[:,0]-c_states[:,1])
    #current_qs_list1 = model.predict(c_states[:,1]-c_states[:,2])
    #current_qs_list2 = model.predict(c_states[:,2]-c_states[:,3])
    #current_qs_list3 = model.predict(c_states[:,3]-c_states[:,4])
    n_states = np.array([transition[3] for transition in mini_batch]) 
    
    future_qs_list = model_target.predict(n_states)
    #future_qs_list0 = model_target.predict(n_states[:,0]-n_states[:,1])
    #future_qs_list1 = model_target.predict(n_states[:,1]-n_states[:,2])
    #future_qs_list2 = model_target.predict(n_states[:,2]-n_states[:,3])
    #future_qs_list3 = model_target.predict(n_states[:,3]-n_states[:,4])
    
    X = []
    Y = []
    for index, (c_state, actionVec, reward, n_state, done) in enumerate(mini_batch):
        
        if not done:
            max_future_q = reward + discount_rate * np.max(future_qs_list[index])
        else:
            max_future_q = reward

        
        current_qs = current_qs_list[index]
        current_qs[actionVec] = max_future_q

        X.append(c_state)
        Y.append(current_qs)

    # Train model with a mini-batch.
    history = model.fit(np.array(X), np.array(Y), batch_size=batch_size, verbose=0, shuffle=True)

    # Store metrics.
    acc_history.append(history.history['acc'][0])
    loss_history.append(history.history['loss'][0])

    return history

In [6]:
print("Prediction Network")
# The first model makes the predictions for Q-values which are used to make a action.
model = createModel(inputQueues, cwSize)

print("Target Network")
# Build a target model for the prediction of future rewards.
model_target = createModel(inputQueues, cwSize)
# Initialize target model's weights.
model_target.set_weights(model.get_weights())

Prediction Network
Using MSE loss... 

Target Network
Using MSE loss... 



In [7]:
model.metrics_names

['loss', 'acc']

In [8]:
acc_history = []
loss_history = []

episode_reward = np.zeros((numOfEpisodes,))
max_reward =  np.zeros((numOfEpisodes,))
replay_memory = deque(maxlen=50_000)

steps_to_update_target_model = 0


for episode in range(numOfEpisodes):
    
        
    print('Episode:', episode)

    collisionCnt = 0
    
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    #state = np.array(state)
    
                       
    for iteration in range(numTimeSlots):
        
                        
        steps_to_update_target_model += 1
        
        #print("Time Slot   :", iteration)
        #print("----------------------")
        
        #print("Estado atual:", state[0])
                  
        # Escolha da ação.
        if np.random.uniform() >= epsilon:
            #predicted = model.predict(state.reshape(1,4))
            #action = np.argmax(predicted[0])
            action0 = np.argmax(model.predict(state[:,0]-state[:,1])[0])
            action1 = np.argmax(model.predict(state[:,1]-state[:,2])[0])
            action2 = np.argmax(model.predict(state[:,2]-state[:,3])[0])
            action3 = np.argmax(model.predict(state[:,3]-state[:,4])[0])
            
            #print("Action (Q)") # action0, action1, action2, action3
        else:
            action0 = np.random.randint(0, cwSize)
            action1 = np.random.randint(0, cwSize)
            action2 = np.random.randint(0, cwSize)
            action3 = np.random.randint(0, cwSize)
            #print("Action (rnd)") # action0, action1, action2, action3
            
        
        actionVec = [action0, action1, action2, action3, 100]
        #print("ActionVec   :", actionVec)
        
        next_state, reward, done, _, _ = env.step(actionVec)
      
        next_state = np.reshape(next_state, [1, state_size])
        #next_state = np.array(next_state)
        #print("Next State  :", next_state[0])
        
        
        # Add information to replay buffer.
        replay_memory.append([state[0], actionVec, reward, next_state[0], done])          

        # Accumulate reward.
        episode_reward[episode] += reward
      
        # Plot received reward.
        #print("Reward      :", reward)
        #print("-------------", "\n")      

        
        
        # Train
        if len(replay_memory) >= MIN_REPLAY_SIZE:
            
            history = train(replay_memory, model, model_target, done, batch_size, acc_history, loss_history)

        # Is it the end of the episode?
        if done:
            # Update the the target network with new weights
            model_target.set_weights(model.get_weights())
            print('########## Game Over ##########')
            break
        
        # updates the current state
        state = next_state
        
    # Decrease epsilon along the way.
    epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay * episode)
    epsilon_vec[episode] = epsilon 
        
    # Store maxmim possible reward per episode.
    max_reward[episode] = iteration + 1              
        
    #print('episode_reward:', episode_reward[episode])

Episode: 0
linear-wireless-mesh|123
Episode: 1
linear-wireless-mesh|123
Got new port for ns3gm interface:  7254
Episode: 2
linear-wireless-mesh|123
Got new port for ns3gm interface:  7007
Episode: 3
linear-wireless-mesh|123
Got new port for ns3gm interface:  6829
Episode: 4
linear-wireless-mesh|123
Got new port for ns3gm interface:  5840
Episode: 5
linear-wireless-mesh|123
Got new port for ns3gm interface:  9468
Episode: 6
linear-wireless-mesh|123
Got new port for ns3gm interface:  5713
Episode: 7
linear-wireless-mesh|123
Got new port for ns3gm interface:  9838
Episode: 8
linear-wireless-mesh|123
Got new port for ns3gm interface:  8457
Episode: 9
linear-wireless-mesh|123
Got new port for ns3gm interface:  5261
Episode: 10
linear-wireless-mesh|123
Got new port for ns3gm interface:  5245
Episode: 11
linear-wireless-mesh|123
Got new port for ns3gm interface:  5768
Episode: 12
linear-wireless-mesh|123
Got new port for ns3gm interface:  6792
Episode: 13
linear-wireless-mesh|123
Got new port

ValueError: Error when checking input: expected dense_input to have shape (1,) but got array with shape (5,)

In [None]:
from sklearn.preprocessing import label_binarize
state = label_binarize([0, 1, 2, 3], classes=[0, 1, 2, 3])

In [None]:
plt.plot(episode_reward, label='Actual reward')
plt.plot(max_reward, '.', label='Max. reward')
plt.xlabel('Episode')
plt.ylabel('Accumulated Reward')
plt.grid()
plt.legend()
plt.show()

In [None]:
max_reward

In [None]:
episode_reward

In [None]:
diff  = (max_reward - episode_reward)
plt.plot(diff, label='Diff')
plt.xlabel('Episode')
plt.ylabel('Difference')
plt.grid()
plt.legend()
plt.show()

In [None]:
plt.plot(epsilon_vec)
plt.xlabel('Episode')
plt.ylabel('Epsilon')
plt.grid()
plt.show()

In [None]:
# summarize history for accuracy
plt.plot(acc_history)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend('train', loc='upper left')
plt.grid()
plt.show()

In [None]:
# summarize history for loss
plt.plot(loss_history)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend('train', loc='upper left')
plt.grid()
plt.show()