In [2]:

#* fundemental modules
import gymnasium as gym
import highway_env
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import gc
import time
import pprint
from tqdm.notebook import trange

In [3]:

#* display visuals 
from utils import record_videos, show_videos

In [4]:

#* deep learning modules
from keras.optimizers import Adam
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import Adam
import tensorflow as tf
import keras.backend as K
from keras.utils import plot_model
tf.random.set_seed(43)


2023-10-06 20:54:45.603890: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-06 20:54:45.805671: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-06 20:54:45.842135: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-10-06 20:54:45.842159: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar

In [5]:

#* PER

class ReplayBuffer(object):
    """
    * init the values
    * for DQN actions are discrete
    """
    def __init__(self, max_size, min_size, input_shape, n_actions, discrete=True):
        self.mem_size = max_size
        self.mem_cntr = 0
        self.min_size = min_size
        self.discrete = discrete
        self.index = 0
        
        self.state_memory = np.zeros((self.mem_size, *input_shape), dtype=np.float16)
        self.new_state_memory = np.zeros((self.mem_size, *input_shape), dtype=np.float16)
        dtype = np.int8 if self.discrete else np.float16
        self.action_memory = np.zeros((self.mem_size, n_actions), dtype=dtype)
        self.reward_memory = np.zeros(self.mem_size, dtype = np.float16)
        self.terminal_memory = np.zeros(self.mem_size)
        self.priorities = np.zeros(self.mem_size, dtype=np.float32)

    def store_transition(self, state, action, reward, state_, done):

        index = self.mem_cntr % self.mem_size                
        self.state_memory[index] = state
        self.new_state_memory[index] = state_

        #* store one hot encoding of actions, if appropriate
        if self.discrete:
            #* Create an zeros-array size of the number of actions
            actions = np.zeros(self.action_memory.shape[1])
            #* Make 1 the value of performed action
            actions[action] = 1.0
            #* Store in action memory
            self.action_memory[index] = actions
        else:
            self.action_memory[index] = action

        #* store reward and if it's terminal info 
        self.reward_memory[index] = reward
        #* we send inverse done info!!!
        self.terminal_memory[index] = 1 - done
        self.priorities[index] = max((self.priorities.max()), 1.0)
        self.mem_cntr +=1
        self.index = self.mem_cntr

    def get_probabilities(self, priority_scale):
        scaled_priorities = np.array(self.priorities) ** priority_scale
        sample_probabilities = scaled_priorities / sum(scaled_priorities)
        return sample_probabilities
        
    def get_importance(self, probabilities):
        importance = 1/(self.mem_cntr) * 1/probabilities
        importance_normalized = importance / max(importance)
        return importance_normalized

    def sample_buffer(self, batch_size, priority_scale=1.0):
        
        if self.mem_cntr >= self.mem_size:
            self.index = self.mem_size
            
        sample_size = batch_size
        sample_probs = self.get_probabilities(priority_scale)
        sample_indices = random.choices(range(self.index), k=sample_size, weights=sample_probs[:self.index])

        states = self.state_memory[sample_indices]
        actions = self.action_memory[sample_indices]
        rewards = self.reward_memory[sample_indices]
        states_ = self.new_state_memory[sample_indices]
        terminal = self.terminal_memory[sample_indices]

        # samples = np.array(self.buffer)[sample_indices]
        importance = self.get_importance(sample_probs[sample_indices])
        return states, actions, rewards, states_, terminal, sample_indices

    def set_priorities(self, indices, errors, offset=0.1):
        for i,e in zip(indices, errors):
            error = abs(e) + offset
            clipped_error = np.minimum(error, 1.0)
            self.priorities[i] = clipped_error

In [6]:

#*DDQN agent

class DDQNAgent:

    def __init__(self, alpha, gamma, epsilon, obs_shape,
                 batch_size, epsilon_dec, epsilon_end, mem_size, 
                 min_mem_size, learning_rate, replace_target):

        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_dec = epsilon_dec
        self.epsilon_end = epsilon_end
        self.batch_size = batch_size
        self.mem_size = mem_size
        self.min_mem_size = min_mem_size
        self.replace_target = replace_target
        self.obs_shape = obs_shape
        self.learning_rate = learning_rate

        self.discrete_action_space = np.array([-1.0, -0.75, -0.5, -0.25, 0.0, 0.25, 0.5, 0.75, 1.0])
        self.n_actions = len(self.discrete_action_space)
        self.action_space = [i for i in range(self.n_actions)]

        self.memory = ReplayBuffer(max_size=self.mem_size, min_size=self.min_mem_size,input_shape=self.obs_shape,
                             n_actions=self.n_actions,discrete=True)
                        
        self.q_eval = self._make_model()
        self.q_target = self._make_model()      #we keep a target model which we update every K timesteps
        self.q_eval.summary()
        plot_model(self.q_eval, to_file='./model_ddqn.png')

    def _make_model(self):
        
        model = Sequential()
        model.add( Dense(512, input_dim = self.obs_shape[0], activation='relu') )
        model.add( Dense(512, activation='relu') )
        model.add( Dense( self.n_actions))
        model.compile(loss='mse',optimizer= Adam(learning_rate = self.learning_rate),metrics=["accuracy"]) # type: ignore
 
        return model

    def epsilon_decay(self):
        self.epsilon = self.epsilon*self.epsilon_dec if self.epsilon > self.epsilon_end \
        else self.epsilon_end

    def remember(self, state, action, reward, new_state, done):
        self.memory.store_transition(state, action, reward, new_state, done)

    def update_network_parameters(self):
        self.q_target.set_weights(self.q_eval.get_weights())
        
    def get_action(self, observation):

        if np.random.random() > self.epsilon: # type: ignore
    
            # observation = tf.convert_to_tensor(observation, dtype = tf.float16)

            qs_= self.q_eval.predict(observation)
            action_index = np.argmax(qs_)
            action = self.discrete_action_space[action_index]
        else:
            action_index = np.random.randint(0, self.n_actions)
            action = self.discrete_action_space[action_index]
        
        return action, action_index

    def train(self):

        if (self.memory.mem_cntr) < self.min_mem_size:
            return
        #* and ELSE:
        #* sample minibatch and get states vs..
        state, action, reward, new_state, done, sample_indices = \
                            self.memory.sample_buffer(self.batch_size)

        action_values = np.array(self.action_space, dtype=np.int8)
        action_indices = np.dot(action, action_values)

        # state = tf.convert_to_tensor(state, dtype = tf.float16)
        # new_state = tf.convert_to_tensor(new_state, dtype = tf.float16)
        # reward = tf.convert_to_tensor(reward, dtype = tf.float16)
        # done = tf.convert_to_tensor(done)
        # action_indices = tf.convert_to_tensor(action_indices, dtype=np.int8)
        
        #* get the q values of current states by main network
        q_pred = self.q_eval.predict(state)

        #! for abs error
        target_old = np.array(q_pred)

        #* get the q values of next states by target network
        q_next = self.q_target.predict(new_state) #! target_val

        #* get the q values of next states by main network
        q_eval = self.q_eval.predict(new_state) #! target_next

        #* get the actions with highest q values
        max_actions = np.argmax(q_eval, axis=1)

        #* we will update this dont worry
        q_target = q_pred

        batch_index = np.arange(self.batch_size, dtype=np.int32)

        #* new_q = reward + DISCOUNT * max_future_q
        q_target[batch_index, action_indices] = reward + \
                    self.gamma*q_next[batch_index, max_actions.astype(int)]*done

        #* error
        error = target_old[batch_index, action_indices]-q_target[batch_index, action_indices]
        self.memory.set_priorities(sample_indices, error)

        #* now we fit the main model (q_eval)
        _ = self.q_eval.fit(state, q_target, verbose='auto')

        #* If counter reaches set value, update target network with weights of main network
        #* it will update it at the very beginning also
        if self.memory.mem_cntr & self.replace_target == 0:
            self.update_network_parameters()
            print("Target Updated")

        gc.collect()
        K.clear_session()
        self.epsilon_decay()

    def save_model(self):
        print("-----saving models------")
        self.q_eval.save_weights("q_net.h5")
        # self.q_target.save_weights(self.network.checkpoint_file)



In [7]:

#* envirenment set-up

env = gym.make('racetrack-v0', render_mode='rgb_array')
env.configure({
    'action': {'lateral': True,
            'longitudinal': False,
            'target_speeds': [0, 5],
            'type': 'ContinuousAction'},
    "observation": {
        "type": "Kinematics",
        "vehicles_count": 2,
        "features": ["presence", "x", "y", "vx", "vy", "cos_h", "sin_h",
                     "heading", "long_off", "lat_off", "ang_off"],
    },
    "other_vehicles": 1,
    'show_trajectories': True,
     'offroad_terminal': True,
})

  logger.warn(


In [8]:

#* prints env configs
#* obs is flattened to 1D array for nn

pprint.pprint(env.config)
(obs, info), done = env.reset(), False
obs = np.array(obs.flatten())
print("Environment is setted up.")

{'action': {'lateral': True,
            'longitudinal': False,
            'target_speeds': [0, 5],
            'type': 'ContinuousAction'},
 'action_reward': -0.3,
 'centering_position': [0.5, 0.5],
 'collision_reward': -1,
 'controlled_vehicles': 1,
 'duration': 300,
 'lane_centering_cost': 4,
 'lane_centering_reward': 1,
 'manual_control': False,
 'observation': {'features': ['presence',
                              'x',
                              'y',
                              'vx',
                              'vy',
                              'cos_h',
                              'sin_h',
                              'heading',
                              'long_off',
                              'lat_off',
                              'ang_off'],
                 'type': 'Kinematics',
                 'vehicles_count': 2},
 'offroad_terminal': True,
 'offscreen_rendering': False,
 'other_vehicles': 1,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehi

  logger.warn(


In [9]:

#* setting-up agent

agent = DDQNAgent(alpha=0.001, gamma=0.999, epsilon=1.0, obs_shape=obs.shape,
                  batch_size=64, epsilon_dec=0.993, epsilon_end=0.05, mem_size=10000,
                  min_mem_size=100, replace_target=1000, learning_rate=0.001)

print("Agent is initialized.")


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               11776     
                                                                 
 dense_1 (Dense)             (None, 512)               262656    
                                                                 
 dense_2 (Dense)             (None, 9)                 4617      
                                                                 
Total params: 279,049
Trainable params: 279,049
Non-trainable params: 0
_________________________________________________________________
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
Agent is initialized.


2023-10-06 20:55:17.522097: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-06 20:55:17.522697: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-10-06 20:55:17.522800: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2023-10-06 20:55:17.522841: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2023-10-06 20:55:17.522877: W tensorflow/c

In [10]:
#* basic params for training

best_score = -1000.0
score_history = []


In [11]:
env = record_videos(env)

for episode in trange(1000, desc='Test episodes'):
        (observation, info), done = env.reset(), False
        observation = np.array(observation.flatten())


        done_ = False
        score = 0
        step = 0
        # env.render()
        while True:
            action, action_index = agent.get_action(observation.reshape((1,observation.shape[0])))
            new_observation, reward, done, truncated, info = env.step(action=[action])
            new_observation = np.array(new_observation.flatten())

            if info["crashed"] == True or info["rewards"]["on_road_reward"] == False:
                done_ = True
                reward = -1.0
            else: done_ = False

            score += reward
            agent.remember(state=observation, action=action_index, done=done,
                           reward=reward, new_state=new_observation)
            agent.train()

            observation = new_observation

            if done or done_:
                break

        score_history.append(score)
        avg_score = np.mean(score_history[-100:])

        if avg_score > best_score:
            best_score = avg_score
            agent.save_model()

        print('episode ', episode, 'score %.1f' % score,
               'avg score %.1f' % avg_score)

env.close()

  logger.warn(


Test episodes:   0%|          | 0/1000 [00:00<?, ?it/s]

-----saving models------
episode  0 score 1.3 avg score 1.3
-----saving models------
episode  1 score 3.3 avg score 2.3
-----saving models------
episode  2 score 13.7 avg score 6.1
-----saving models------
episode  3 score 9.0 avg score 6.8
episode  4 score 4.8 avg score 6.4
episode  5 score 2.1 avg score 5.7
episode  6 score 4.2 avg score 5.5
episode  7 score 4.1 avg score 5.3
episode  8 score 4.3 avg score 5.2
episode  9 score 0.8 avg score 4.8
episode  10 score 2.6 avg score 4.6
episode  11 score 2.6 avg score 4.4
episode  12 score 3.2 avg score 4.3
episode  13 score 10.0 avg score 4.7
episode  14 score 3.8 avg score 4.6
episode  15 score 1.7 avg score 4.5
episode  16 score 9.6 avg score 4.8
episode  17 score 0.7 avg score 4.5
episode  18 score 0.6 avg score 4.3
episode  19 score 4.2 avg score 4.3
episode  20 score 2.5 avg score 4.2
episode  21 score 2.5 avg score 4.2
episode  22 score 2.5 avg score 4.1
episode  23 score 6.4 avg score 4.2
episode  24 score 3.4 avg score 4.1
episode 



Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-0.mp4
-----saving models------
episode  138 score 9.6 avg score 8.5
-----saving models------
episode  139 score 16.2 avg score 8.6
-----saving models------
episode  140 score 16.3 avg score 8.8
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
-----saving models------
episode  141 score 19.4 avg score 9.0
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
-----saving models------
episode  142 score 15.1 avg score 9.1
-----saving models------
episode  143 score 12.7 avg score 9.2
-----saving models------
episode  144 score 4.4 avg score 9.3
-----saving models------
episode  145 score 32.4 avg score 9.6
-----saving models------
episode  146 score 66.7 avg score 10.2
-----saving models------
episode  147 score 21.8 avg score 10.4
-----saving models------
epi



Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-1.mp4
-----saving models------
episode  207 score 17.5 avg score 17.5
-----saving models------
episode  208 score 40.6 avg score 17.8
-----saving models------
episode  209 score 29.8 avg score 18.0
episode  210 score 20.0 avg score 17.9
-----saving models------
episode  211 score 50.5 avg score 18.3
-----saving models------
episode  212 score 39.4 avg score 18.6
-----saving models------
episode  213 score 20.4 avg score 18.6
-----saving models------
episode  214 score 25.6 avg score 18.7
-----saving models------
episode  215 score 27.8 avg score 18.8
episode  216 score 4.6 avg score 18.8
-----saving models------
episode  217 score 28.4 avg score 18.9
Moviepy - Building video /home/o/Documents/thesis/highway/videos/rl-video-episode-2.mp4.
Moviepy - Writing video /home/o/Documents/thesis/highway/videos/rl-video-episode-2.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-2.mp4
episode  218 score 12.1 avg score 18.8
-----saving models------
episode  219 score 29.2 avg score 18.9
-----saving models------
episode  220 score 23.8 avg score 19.0
-----saving models------
episode  221 score 24.5 avg score 19.1
episode  222 score 4.5 avg score 19.0
-----saving models------
episode  223 score 31.5 avg score 19.2
episode  224 score 2.7 avg score 19.0
episode  225 score 25.2 avg score 19.1
-----saving models------
episode  226 score 29.9 avg score 19.3
-----saving models------
episode  227 score 34.2 avg score 19.4
episode  228 score 16.0 avg score 19.4
-----saving models------
episode  229 score 29.7 avg score 19.5
episode  230 score 10.1 avg score 19.4
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Moviepy - Building video /home/o/Documents/thesis/highway/videos/rl-video-episode-3.mp4.
Moviepy -



Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-3.mp4
-----saving models------
episode  231 score 33.8 avg score 19.6
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
-----saving models------
episode  232 score 17.7 avg score 19.6
-----saving models------
episode  233 score 29.2 avg score 19.7
-----saving models------
episode  234 score 13.0 avg score 19.7
-----saving models------
episode  235 score 28.9 avg score 19.8
-----saving models------
episode  236 score 25.4 avg score 19.9
-----saving models------
episode  237 score 17.0 avg score 19.9
-----saving models------
episode  238 score 28.3 avg score 20.1
episode  239 score 13.7 avg score 20.1
episode  240 score 18.2 avg score 20.1
episode  241 score 17.4 avg score 20.1
Moviepy - Building video /home/o/Documents/thesis/highway/videos/rl-video-episode-4.mp4.
Moviepy - Writing video /home/o/Documents/thesis/highway/vid



Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-4.mp4
-----saving models------
episode  242 score 47.8 avg score 20.4
-----saving models------
episode  243 score 20.9 avg score 20.5
-----saving models------
episode  244 score 21.6 avg score 20.7
episode  245 score 27.7 avg score 20.6
episode  246 score 33.9 avg score 20.3
episode  247 score 27.0 avg score 20.3
episode  248 score 32.2 avg score 20.6
episode  249 score 17.6 avg score 20.6
episode  250 score 23.6 avg score 20.6
-----saving models------
episode  251 score 16.1 avg score 20.7
episode  252 score 10.7 avg score 20.6
episode  253 score 21.8 avg score 20.7
episode  254 score 27.1 avg score 20.7
-----saving models------
episode  255 score 21.1 avg score 20.7
-----saving models------
episode  256 score 28.7 avg score 20.7
-----saving models------
episode  257 score 37.3 avg score 20.9
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Up



Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-5.mp4
episode  306 score 11.8 avg score 22.2
episode  307 score 39.1 avg score 22.5
episode  308 score 24.6 avg score 22.3
-----saving models------
episode  309 score 54.8 avg score 22.5
episode  310 score 19.4 avg score 22.5
episode  311 score 16.1 avg score 22.2
episode  312 score 28.3 avg score 22.1
episode  313 score 31.9 avg score 22.2
episode  314 score 24.4 avg score 22.2
episode  315 score 16.3 avg score 22.1
episode  316 score 22.6 avg score 22.3
episode  317 score 25.8 avg score 22.2
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
-----saving models------
episode  318 score 51.5 avg score 22.6
episode  319 score 27.0 avg score 22.6
episode  320 score 18.0 avg score 22.5
-----saving models-----



Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-6.mp4
-----saving models------
episode  352 score 13.3 avg score 23.0
Moviepy - Building video /home/o/Documents/thesis/highway/videos/rl-video-episode-7.mp4.
Moviepy - Writing video /home/o/Documents/thesis/highway/videos/rl-video-episode-7.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-7.mp4
-----saving models------
episode  353 score 47.8 avg score 23.2
-----saving models------
episode  354 score 42.6 avg score 23.4
Moviepy - Building video /home/o/Documents/thesis/highway/videos/rl-video-episode-8.mp4.
Moviepy - Writing video /home/o/Documents/thesis/highway/videos/rl-video-episode-8.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-8.mp4
-----saving models------
episode  355 score 40.8 avg score 23.6
-----saving models------
episode  356 score 46.3 avg score 23.8
episode  357 score 3.8 avg score 23.4
Moviepy - Building video /home/o/Documents/thesis/highway/videos/rl-video-episode-9.mp4.
Moviepy - Writing video /home/o/Documents/thesis/highway/videos/rl-video-episode-9.mp4





Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-9.mp4
episode  358 score 2.6 avg score 23.2
episode  359 score 41.1 avg score 23.3
episode  360 score 17.8 avg score 23.2
episode  361 score 46.7 avg score 23.4
episode  362 score 33.1 avg score 23.4
episode  363 score 38.8 avg score 23.6
-----saving models------
episode  364 score 40.0 avg score 23.8
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
-----saving models------
episode  365 score 56.6 avg score 24.2
-----saving models------
episode  366 score 40.5 avg score 24.4
-----saving models------
episode  367 score 24.4 avg score 24.5
-----saving models------
episode  368 score 40.4 avg score 24.7
-----saving models------
episode  369 score 35.8 avg score 24.9
-----saving models------
episode  370 sco



Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-10.mp4
episode  380 score 17.9 avg score 26.0
episode  381 score 24.6 avg score 26.0
-----saving models------
episode  382 score 27.4 avg score 26.0
-----saving models------
episode  383 score 26.3 avg score 26.1
-----saving models------
episode  384 score 35.5 avg score 26.1
-----saving models------
episode  385 score 15.5 avg score 26.2
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
Target Updated
-----saving models------
episode  386 score 56.2 avg score 26.6
-----saving models------
episode  387 score 27.6 avg score 26.7
episode  388 score 20.9 avg score 26.6
-----saving models------
episode  389 score 30.4 avg score 26.8
episode  390 score 14.2 avg score 26.8
-----saving models------
episode  391 score 30.1 avg 



Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-11.mp4
episode  778 score 6.9 avg score 5.2
episode  779 score 5.7 avg score 5.2
episode  780 score 3.8 avg score 5.3
episode  781 score 1.5 avg score 5.3
episode  782 score 0.3 avg score 5.3
episode  783 score -0.1 avg score 5.3
episode  784 score 6.2 avg score 5.3
episode  785 score 1.2 avg score 5.4
episode  786 score 4.7 avg score 5.3
episode  787 score 1.6 avg score 5.3
episode  788 score 0.9 avg score 5.3
episode  789 score 3.7 avg score 5.4
episode  790 score -0.1 avg score 5.3
episode  791 score 1.3 avg score 5.3
episode  792 score 2.5 avg score 5.3
episode  793 score -0.1 avg score 5.3
episode  794 score -0.6 avg score 5.2
episode  795 score 4.8 avg score 5.3
episode  796 score -0.1 avg score 5.2
episode  797 score 1.0 avg score 5.2
episode  798 score 4.7 avg score 5.2
episode  799 score 5.6 avg score 5.2
episode  800 score 2.5 avg score 5.1
episode  801 score 3.9 avg score 5.1
epis

                                                   

Moviepy - Done !
Moviepy - video ready /home/o/Documents/thesis/highway/videos/rl-video-episode-12.mp4




In [14]:
h = np.save("score_history", np.array(score_history))

In [19]:
print(best_score)
print(avg_score)

27.02133133098047
1.974266679505516
