# 0. Install Dependencies

In [None]:
!pip install tensorflow==2.3.0
!pip install gym
!pip install keras
!pip install keras-rl2

# 1. Test Random Environment with OpenAI Gym

In [2]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import math
import tensorflow
import matplotlib.pyplot as plt
import PIL.Image as Image


In [3]:
class uavagent(Env):
    def __init__(self, density):
        # Actions we can left or right
        self.action_space = Discrete(2) #so 0,1,2
        # Uav allowed locations
        self.observation_space = Box(low=np.array([0]),high=np.array([9]),dtype=int) #where is the agent
        # Set start location
        self.state = 0 
        # Set start step
        self.steps = 0
        # Set max duration
        self.max_steps = 80
        # Density Functions counts 
        self.count = 10000 
        # Copy Density
        self.density = density
        # Dot path
        self.path = {}

    def take_step(self,action):
        if action ==0:
            if self.state == 0:
                return
            else:
                self.state -= 1
        else:
            if self.state == 9:
                return
            else:
                self.state += 1

    def find_reward(self, action):
            dist = 0
            x = self.state
            if action == 0:
                if x == 0:
                    return -200
                else:
                    for z in self.density:
                        dist += math.sqrt((x-z)**2)**(3/2)    
            else:
                if x == 9:
                    return -200
                else:
                    for z in self.density:
                        dist += math.sqrt((x-z)**2)**(3/2)
            return -1 * (dist / self.count)

    def step(self, action):
        # Increase steps by one
        self.steps += 1
        # Find reward
        reward = self.find_reward(action)
        # Apply Actions,  0 left, 1 right
        self.take_step(action)
        # Check if steps are done
        if self.steps <= self.max_steps:
            done = False     
        else:
            done = True
        self.path[self.steps] = self.state
        # Set place holder for info
        info = {} #required by open gym ai ?
        # Return step information
        return self.state, reward, done, info

    def render(self,mode="human"):
        pass
        
    def reset(self):
        # Reset all variables
        self.state = 0
        self.steps = 0
        self.path = {}
        return self.state
        

In [4]:
count = 10000
mean = 4
standard_deviation = 1
density = np.random.normal(mean,standard_deviation,count)
env = uavagent(density)
print(density)

[2.214387   4.56656818 2.89157706 ... 4.36259194 3.12203713 2.400624  ]


In [5]:
env.observation_space.sample()

array([1])

In [6]:
episodes = 10
for episode in range(1, episodes+1):
    print(env.path)
    state = env.reset()
    done = False
    score = 0 
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

{}
Episode:1 Score:-1783.3058747843295
{1: 1, 2: 0, 3: 1, 4: 0, 5: 1, 6: 2, 7: 1, 8: 2, 9: 3, 10: 4, 11: 5, 12: 6, 13: 5, 14: 6, 15: 7, 16: 8, 17: 7, 18: 8, 19: 7, 20: 6, 21: 5, 22: 6, 23: 5, 24: 4, 25: 5, 26: 6, 27: 5, 28: 6, 29: 7, 30: 6, 31: 7, 32: 8, 33: 9, 34: 8, 35: 7, 36: 6, 37: 7, 38: 8, 39: 9, 40: 9, 41: 9, 42: 9, 43: 8, 44: 7, 45: 8, 46: 9, 47: 9, 48: 8, 49: 7, 50: 6, 51: 5, 52: 6, 53: 7, 54: 6, 55: 7, 56: 8, 57: 9, 58: 8, 59: 7, 60: 8, 61: 7, 62: 8, 63: 7, 64: 6, 65: 5, 66: 6, 67: 5, 68: 4, 69: 5, 70: 6, 71: 5, 72: 6, 73: 7, 74: 8, 75: 9, 76: 9, 77: 9, 78: 9, 79: 8, 80: 9, 81: 8}
Episode:2 Score:-1891.743150397695
{1: 1, 2: 0, 3: 1, 4: 2, 5: 1, 6: 0, 7: 0, 8: 0, 9: 1, 10: 0, 11: 0, 12: 1, 13: 0, 14: 1, 15: 0, 16: 1, 17: 2, 18: 3, 19: 2, 20: 1, 21: 2, 22: 3, 23: 4, 24: 3, 25: 4, 26: 3, 27: 2, 28: 1, 29: 0, 30: 1, 31: 2, 32: 3, 33: 4, 34: 5, 35: 4, 36: 3, 37: 4, 38: 3, 39: 4, 40: 3, 41: 4, 42: 3, 43: 2, 44: 1, 45: 0, 46: 1, 47: 0, 48: 0, 49: 1, 50: 2, 51: 1, 52: 0, 53: 0, 54: 

# 2. Create a Deep Learning Model with Keras

In [7]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [8]:
states = env.observation_space.shape
actions = env.action_space.n

In [9]:
actions

2

In [18]:
def build_model(states, actions):
    model = tensorflow.keras.Sequential()
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [19]:
del model

In [20]:
model = build_model(states, actions)

In [21]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 24)                48        
_________________________________________________________________
dense_7 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 50        
Total params: 698
Trainable params: 698
Non-trainable params: 0
_________________________________________________________________


# 3. Build Agent with Keras-RL

In [22]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [23]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [24]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
   10/10000 [..............................] - ETA: 3:03 - reward: -25.7624



123 episodes - episode_reward: -539.263 [-9245.250, -194.194] - loss: 488.126 - mae: 63.262 - mean_q: -99.720

Interval 2 (10000 steps performed)
123 episodes - episode_reward: -204.853 [-251.648, -196.456] - loss: 84.100 - mae: 60.612 - mean_q: -105.423

Interval 3 (20000 steps performed)
124 episodes - episode_reward: -199.508 [-208.283, -196.456] - loss: 66.541 - mae: 56.628 - mean_q: -99.050

Interval 4 (30000 steps performed)
123 episodes - episode_reward: -200.238 [-220.110, -196.456] - loss: 66.200 - mae: 56.511 - mean_q: -99.290

Interval 5 (40000 steps performed)
done, took 1250.943 seconds


<tensorflow.python.keras.callbacks.History at 0x2b0bf265eb0>

In [25]:

scores = dqn.test(env, nb_episodes=1,visualize=False)
print(env.path)

print(np.mean(scores.history['episode_reward']))

Testing for 1 episodes ...
Episode 1: reward: -196.456, steps: 81
{1: 1, 2: 2, 3: 3, 4: 2, 5: 3, 6: 2, 7: 3, 8: 2, 9: 3, 10: 2, 11: 3, 12: 2, 13: 3, 14: 2, 15: 3, 16: 2, 17: 3, 18: 2, 19: 3, 20: 2, 21: 3, 22: 2, 23: 3, 24: 2, 25: 3, 26: 2, 27: 3, 28: 2, 29: 3, 30: 2, 31: 3, 32: 2, 33: 3, 34: 2, 35: 3, 36: 2, 37: 3, 38: 2, 39: 3, 40: 2, 41: 3, 42: 2, 43: 3, 44: 2, 45: 3, 46: 2, 47: 3, 48: 2, 49: 3, 50: 2, 51: 3, 52: 2, 53: 3, 54: 2, 55: 3, 56: 2, 57: 3, 58: 2, 59: 3, 60: 2, 61: 3, 62: 2, 63: 3, 64: 2, 65: 3, 66: 2, 67: 3, 68: 2, 69: 3, 70: 2, 71: 3, 72: 2, 73: 3, 74: 2, 75: 3, 76: 2, 77: 3, 78: 2, 79: 3, 80: 2, 81: 3}
-196.45581904216175
