# Importing Libs

Here most of the libs are imported, this helps to ensure that all the packages are corrected installed.

_gametoy_ is our file containing **FooEnv** which is the simulator itself.

In [None]:
from gametoy import FooEnv 

In [None]:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import time
import os
import matplotlib.pyplot as plt

# Initialize RL Parameters

These here are the key parameters when working with this RL technique. It will define: the learning rate, how fast does it changes? Memory size, how much time steps will be in our memory? The exploration decay, how long we will priorize random solutions over the ones learned? Check the pdf of the work for more information about these parameters.

In [None]:
#ENV_NAME = ""

GAMMA = 0.95
LEARNING_RATE = 0.001

MEMORY_SIZE = 10000
BATCH_SIZE = 20

EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.9999

Here we will initializate our simulator. The first number is the reward used, the second how we are changing the main parameters of the aircraft and the third in this case is the velocity, check the function code for more information.

In [None]:
env = FooEnv(6,5,100)

# Functions

## Neural Network Function

The next class defines really important functions.
- **init**: defines the neural network that will be used, here it is recommended for the user to personalizate it by himself/herself. Try new possibilities, simpler or even far more complexes networks.
- rembember: saves the the step and all relevant information for the technique called **Replay Memory**.
- **act** : exploration x explotation, it in this point that the code decides, during training, if it should take a random action or the best learned action.
- play : same as act, but ensures that the Neural Network will not take random actions, only the best one learned.
- **experience replay**: applies the **Replay memory** memory to our neural network, it's this function that trains the neural network

In [None]:
class DQNSolver:

    def __init__(self, observation_space, action_space):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)

        self.model = Sequential()
        self.model.add(Dense(9, input_shape=(observation_space,), activation="relu"))
        for i in range(14):
            self.model.add(Dense(18, activation="relu"))
        self.model.add(Dense(9, activation="relu"))
        self.model.add(Dense(self.action_space, activation="linear"))
        self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))


    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.action_space)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def play(self, state):
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])
    
    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        batch = random.sample(self.memory, BATCH_SIZE)
        for state, action, reward, state_next, terminal in batch:
            q_update = reward
            if not terminal:
                q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))
            q_values = self.model.predict(state)
            q_values[0][action] = q_update
            self.model.fit(state, q_values, verbose=0)
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)

## Score Function

In [None]:
class scorelog:

    def __init__(self, number_runs, logfile_path):
        self.n = number_runs
        self.i = 0
        self.mean_scores = np.zeros(self.n)
        self.mean_score = 0
        self.action_space = action_space
        self.scores = np.zeros(self.n)
        self.run = 0
        self.score = 0
        self.duration = 0
        self.logfile_path = logfile_path
        if not os.path.exists(logfile_path):
            self.logFile = open(logfile_path, 'w')
            self.logFile.write("Step,Mean_Reward,score,Time \n")
            #self.logFile.write("Step,Episode,Mean_Reward,score,Time \n")
        else:
            self.logFile = open(logfile_path,'a')
            
    def log(self, score, run,duration):
        self.i += 1
        self.run = run
        self.score = score
        self.duration = duration
        self.mean_score = (1.0/self.i)*(score- self.mean_score)
        self.mean_scores[self.i] = self.mean_score
        self.scores[self.i] = score
        
    def logwrite(self):
        self.logFile.write("%d,%.3f,%.3f,%.3f \n" % (self.run, self.mean_score, self.score, self.duration))
        
    def logclose(self):
        self.logFile.close()
        self.logFile = open(self.logfile_path,'a')
        
    def scoreplot(self):
        plt.plot(self.mean_scores[:self.i])
        plt.show()
        plt.plot(self.scores[:self.i])
        plt.show()

## Play and tests functions

These functions were created to make it easier to play a single game and see its results or plot it into a file.

In [None]:
def play_one(Filename):
    #The agent will play one game and plot the results in the specified file.
        step = 0
        state = env.reset()
        env.zrefer()
        beta = env.beta
        state = np.reshape(np.append(beta,state), [1, observation_space])
        while True:
            step += 1
            action = dqn_solver.play(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward# if not terminal else -reward
            state_next = np.reshape(np.append(env.beta,state_next), [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if step>10000:
                break
        env.plot(Filename)
        print('Score:',env.score)        

def play_render():
        #The agent will play one game and plot the results inside the jupyter notebook.
        step = 0
        state = env.reset()
        env.zrefer()
        beta = env.beta
        state = np.reshape(np.append(beta,state), [1, observation_space])
        while True:
            step += 1
            action = dqn_solver.play(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward# if not terminal else -reward
            state_next = np.reshape(np.append(env.beta,state_next), [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if step>10000:
                break
        env.render()
        print('Score:',env.score)

def play_dumb():
    #The game will be played with only "0s" actions.
    step = 0
    state = env.reset()
    env.zrefer()
    beta = env.beta
    state = np.reshape(np.append(beta,state), [1, observation_space])
    while True:
        step += 1
        action = 0
        state_next, reward, terminal, info = env.step(action)
        reward = reward# if not terminal else -reward
        state_next = np.reshape(np.append(env.beta,state_next), [1, observation_space])
        dqn_solver.remember(state, action, reward, state_next, terminal)
        state = state_next
        if step>10000:
            break
    env.render()
    print('Score:',env.score)
        
def play_vel(Filename):
        #The agent will play one game and plot the results in the specified file, containing the velocity on its name.
        step = 0
        state = env.reset()
        env.zrefer()
        beta = env.beta
        state = np.reshape(np.append(beta,state), [1, observation_space])
        while True:
            step += 1
            action = dqn_solver.play(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward# if not terminal else -reward
            state_next = np.reshape(np.append(env.beta,state_next), [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if step>10000:
                break
        env.plot(Filename+str(env.V))
        print('Score:',env.score)

# Preparing files

In [None]:
observation_space = env.observation_space+1 #defines the observation space
action_space  = env.action_space # receives the action space

In [None]:
run = 0 #initialize the counting to 0

In [None]:
dqn_solver = DQNSolver(observation_space, action_space) #generates the neural network
dqn_solver.model.summary() #plots the neural network's structure generated

Initializes some values and states where the files will be saved.

In [None]:
DQN_SAVE = "dqn_PCC_test_"
filepath = 'NN/'
if not os.path.exists(filepath):
    os.makedirs(filepath)
DQN_SAVE_FILE = filepath + '/' + DQN_SAVE + '0.h5'
counter_dqn = 0
dqn_solver.model.save(DQN_SAVE_FILE)

logfile = filepath+'/log.txt'
scoreLog = scorelog(100000,logfile)

duration = 0
counter_play = 0

filename_play = filepath + '/' + 'Gameplay/' 
if not os.path.exists(filename_play):
    os.makedirs(filename_play)

# Training

Here we have the code responsible for training our Neural Network and keeping the log, it is important to state that it is an infinite loop which is broken by an _if_ condition, if you erases it, it will keep running foreverrrrrr!!!! So, pay attention to this.


In [None]:
while True:
    start_time = time.time()
    scoreLog.log(env.score, run, duration)
    scoreLog.logwrite()
    scoreLog.logclose()
    if (run%5 == 0):
        #plots in the jupyter notebook the result each 5 games
        env.render()
    if (run%1 == 0):
        dqn_solver.model.save(DQN_SAVE_FILE)
        counter_dqn +=1 
        DQN_SAVE_FILE = filepath + '/' + DQN_SAVE + str(counter_dqn) +'.h5'
        FILENAME = filepath + '/' + DQN_SAVE + str(counter_dqn)
        env.plot(FILENAME)
        filename_play = filepath + '/' + 'Gameplay/' + DQN_SAVE + 'gameplay_'+ str(counter_play)
        play_one(filename_play) #saves plots of playing mode
        counter_play +=1
        
    run += 1
    state = env.reset()
    beta = env.beta
    state = np.reshape(np.append(beta,state), [1, observation_space])
    step = 0
    if run > 100000:
        break
        
   
    while True:
        step += 1
        #env.render()
        action = dqn_solver.act(state)
        #action = 0
        state_next, reward, terminal, info = env.step(action)
        reward = reward# if not terminal else -reward
        state_next = np.reshape(np.append(env.beta,state_next), [1, observation_space])
        dqn_solver.remember(state, action, reward, state_next, terminal)
        state = state_next
        if terminal or step>1500:
            end_time = time.time()
            duration = end_time-start_time
            print ("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", steps: " + str(step) + ", score: " + str(env.score), ", time:" + str(duration))
            #score_logger.add_score(step, run)
            break
        #if step%10 == 0:
        dqn_solver.experience_replay()
print('End')

# Loading Neural Network

The code hereon presented is responsible for loading an already saved neural network. 

In [None]:
from keras.models import load_model

#DQN_SAVE = "dqn_PCC_toy_R-06_"
DQN_SAVE = "dqn_PCC_toy_R6-1_"

filepath = 'NeuralNetworks'

DQN_SAVE_FILE = filepath + '/' + DQN_SAVE + '65' + '.h5'
dqn_solver.model = load_model(DQN_SAVE_FILE)

In [None]:
dqn_solver.model.summary() #show the structure of the loaded neural network

# Tests

## Notebook tests

These codes are responsible for testing the neural networks obtained in the wanted conditions

In [None]:
env = FooEnv(6,5,80) #loads the simulator with the wanted parameters

In [None]:
play_render() #play and plots the result in this notebook

In [None]:
#Prints the values for the criteria used in this work to evaluate the AI agent performance.

print('Overshoot%',100*np.max(np.abs(env.Z))/np.max(np.abs(env.zref)))

margin = np.max(np.abs(env.Z))*.03
final = env.Z[-2]
t1 = np.nonzero( (env.Z < final - np.abs(margin) ) | ( env.Z > final + np.abs(margin) ) )[0][-2]
print('Time to stabilize', t1, 'ms' )

margin = np.max(np.abs(env.zref))*.03
final = env.zref[-2]
t2 = np.nonzero( (env.zref < final - np.abs(margin) ) | ( env.zref > final + np.abs(margin) ) )[0][-2]
print('No control time to stabilize',t2, 'ms' )

print('Relative time to stabilize',t1/t2*100,)


print('Delta Max%', 100*(np.max(env.Z)-np.min(env.Z)) / (np.max(env.zref)-np.min(env.zref))  )


In [None]:
play_dumb() #plays without taking any action.

# Log Loading

In [None]:
import pandas as pd
logfile_path = filepath + '/' + 'log.txt'

Log = pd.read_csv(logfile_path)
Log.head()

In [None]:
plt.plot(Log.Episode[5:])
plt.show()

# File saving tests

Play the tests that plots the result in a file.

In [None]:
filename_play = filepath + '/' + 'Gameplay/' + DQN_SAVE + 'gameplay_'+ 'test_'
play_one(filename_play)

In [None]:
filename_play = filepath + '/' + 'Test_vel/' 
if not os.path.exists(filename_play):
    os.makedirs(filename_play)
filename_test = filepath + '/' + 'Test_vel/' + DQN_SAVE + 'Vel_'

env = FooEnv(6,1)
for i in range(10):
    play_vel(filename_test)