<h1>Setting up Environment </h1>

In [1]:
#Parameters
nb_actions = 5
i_width = 227
i_height = 227

In [2]:
from rl.core import Env
import numpy as np
import sys
import scipy.misc
from IPython.display import clear_output
sys.path.append('/home/arusia/fastai/airsim/AirSim/PythonClient')
from AirSimClient import *
class simulation(Env):
    import time
    reward_range = (-np.inf, np.inf)
    observation_space = None
    solar_coordinates = [47.64192159915037, -122.13949407490249,131.7526092529297]
    old_dist=0
    scale_reward_1=1
    scale_time=1
    state=None
    
    def __init__(self):
        
        # connect to the AirSim simulator
        self.client = MultirotorClient()
        self.client.confirmConnection()
        self.client.enableApiControl(True)
        self.client.armDisarm(True)
        self.client.takeoff()
        self.state=self.client.getMultirotorState()
        
        
    def get_image(self):
        image_response = self.client.simGetImages([ImageRequest(0, AirSimImageType.Scene, False, False)])[0]
        image1d = np.fromstring(image_response.image_data_uint8, dtype=np.uint8)
        image_rgba = image1d.reshape(image_response.height, image_response.width, 4)
        image_rgba = image_rgba[:,:,:3]
        img=scipy.misc.imresize(image_rgba,[i_height, i_width])
        return img
    
    def get_distance(self):
        self.state=self.client.getMultirotorState()
        alt=self.state.gps_location.altitude
        lat=self.state.gps_location.latitude
        lon=self.state.gps_location.longitude
        coord=np.array([lat,lon,alt])
        diff=coord-self.solar_coordinates
        diff[0:2]=diff[0:2]*1e5 #Scaling factors for latitudes and longitudes
        return np.linalg.norm(diff)
    
    def get_reward(self):
        self.new_dist=self.get_distance()
        if(self.state.collision.has_collided==True):
            reward=-10
            return reward
        if(self.new_dist<1):
            reward=10
            return reward
        reward = self.scale_reward_1*(-self.new_dist+self.old_dist)/self.scale_time
        self.old_dist=self.new_dist
        return reward
    
    def tonative(self,array):
        ans=[]
        if(np.isscalar(array)):
            return array.item()
        for i in range(array.size):
            ans.insert(i,array[i].item())
        return ans
    
    def step(self, action):
        #perform action
        
        #find the direction to move to
        normpara=np.linalg.norm(action[0:3])
        direc = action[0:3]/normpara
        
        #find the velocity in each direction
        velocity = direc * action[3]
        
        #Careful, need conversion from float32 to native float for msgpack module
        velocity=self.tonative(velocity)
        t=self.tonative(action[4])*self.scale_time
        
        self.client.moveByVelocity(velocity[0], velocity[1], velocity[2], t)
        
        time.sleep(t)
        
        clear_output(wait=True)
        #Get observations
        self.state=self.client.getMultirotorState()
        observation=self.get_image()
        reward = self.get_reward()
        done=False
        if(self.new_dist<1):
            done=True
        info={}
        
        print(' \n action = {} reward={}'.format(action,reward))
        return observation,reward,done,info
    
    def reset(self):
        self.client.reset()
        self.client = MultirotorClient()
        self.client.confirmConnection()
        self.client.enableApiControl(True)
        self.client.armDisarm(True)
        self.client.takeoff()
        self.old_dist=self.get_distance()
        observation=self.get_image()
        self.state=self.client.getMultirotorState()
        return observation
    
    def __delete__(self):
        self.client.reset()
        self.client.enableApiControl(False)

        
    class a_space():
        def sample(self,seed=None):
            direction=-1+np.random.random(3)*2
            speed_time=np.random.random(2)
            return [direction[0],direction[1],direction[2],speed_time[0],speed_time[1]]
        def contains(self,x):
            if(max(x[0:3])>1 or min(x[0:3])<-1):
                return False
            
            if(max(x[0:3])>1 or min(x[0:3])<0):
                return False
            return True
        
    action_space = a_space()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


<h1> Getting vanilla SqueezeNet to be used as the driver </h1>

In [3]:
import numpy as np
from keras_squeezenet import SqueezeNet
from keras.applications.imagenet_utils import preprocess_input, decode_predictions
from keras.preprocessing import image

In [4]:
from keras import layers
from keras.layers import Dense,Flatten,Input,Concatenate
from keras.models import Model

def get_actor():
    model = SqueezeNet()
    hid=Dense(512,activation='relu')(model.layers[-2].output)
    x=Dense(1, activation='tanh')(hid)
    y=Dense(1, activation='tanh')(hid)
    z=Dense(1, activation='tanh')(hid)
    v=Dense(1, activation='sigmoid')(hid)
    t=Dense(1, activation='sigmoid')(hid)
    out=Concatenate()([x,y,z,v,t])
    Driver = Model(inputs=model.layers[0].output, outputs=out)
    return Driver

def get_critic():
    model = SqueezeNet()
    
    features=model.layers[-2].output
    action=Input(shape=(nb_actions,),name="input_action")
    conc=Concatenate()([features,action])
    hid=Dense(512,activation='relu')(conc)
    output_layer=Dense(1, activation='sigmoid')(hid)
    Driver = Model(inputs=[model.layers[0].output,action], outputs=output_layer)
    return Driver

<h1>Writing a reinforcement learning routine</h1>

In [None]:
from rl.random import OrnsteinUhlenbeckProcess
class random_proc(OrnsteinUhlenbeckProcess):
    def sample(self):
        x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + self.current_sigma * np.sqrt(self.dt) * np.random.normal(size=self.size)
        x[self.size-1]=abs(x[self.size-1])
        self.x_prev = x
        self.n_steps += 1
        return x

from rl.core import Processor
class Sim_processor(Processor):
    def process_step(self, observation, reward, done, info):
        return observation, reward, done, info
    
    def process_action(self, action):
        action[0:3]=np.clip(action[0:3],-1.0,1.0)
        action[3:5]=np.clip(action[3:5],0.0,1.0)
        return action
    
    @property
    def metrics(self):
        """The metrics of the processor, which will be reported during training.

        # Returns
            List of `lambda y_true, y_pred: metric` functions.
        """
        return []

    @property
    def metrics_names(self):
        """The human-readable names of the agent's metrics. Must return as many names as there
        are metrics (see also `compile`).
        """
        return []

In [None]:
import numpy as np

from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Concatenate
from keras.optimizers import Adam

from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess


ENV_NAME='airsim'
env=simulation()
# Get actor and critic
actor=get_actor()
critic=get_critic()

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)

random_process = None #random_proc(size=nb_actions, theta=.15, mu=0., sigma=.3)
processor_instance=Sim_processor()
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=critic.input[1],
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,batch_size=2,
                  random_process=random_process, gamma=.97, target_model_update=0.5,processor=processor_instance)
agent.compile(Adam(lr=.01), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
#agent.load_weights('ddpg_{}_weights.h5f'.format(ENV_NAME))
agent.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_episode_steps=200)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
env.__delete__()
agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=200)

 
 action = [ 0.59640276 -0.15872204  0.41836798  0.9087057   0.05655092] reward=-10


In [None]:
print(agent.processor.metrics_names)

In [None]:
experiences=agent.memory.sample(agent.batch_size)

In [None]:
from rl.memory import zeroed_observation
x=[agent.memory.observations[0]]
x.insert(0,zeroed_observation(x[0]))

In [None]:
experiences[31].state1[1].shape

In [None]:
env.__delete__()

In [None]:

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)