In [None]:
!pip install gymnasium[atari] --quiet
!pip install gymnasium --quiet
!pip install -U gymnasium[atari] --quiet
!pip install imageio_ffmpeg --quiet
!pip install npy_append_array --quiet
!pip install pyTelegramBotAPI --quiet
!pip install gymnasium[accept-rom-license] --quiet
!pip install gymnasium[box2d] --quiet

In [None]:
import tensorflow as tf 
from tensorflow.keras.layers import Dense, Conv2D, Input
 
class PolicyNetwork2D(tf.keras.Model):
    def __init__(self, action_dim=1):
        super(PolicyNetwork2D, self).__init__()
        self.fc1 = Dense(24, activation="relu")
        self.fc2 = Dense(36, activation="relu")
        self.fc3 = Dense(action_dim, activation="softmax")

    def call(self, state):
        x = self.fc1(state)
        x = self.fc2(x)
        x = self.fc3(x)
        return x


In [None]:
from tensorflow.keras.optimizers import Adam
import numpy as np 
import tensorflow_probability as tfp 

class PolicyGradientAgent: 
  
  def __init__(self, input_dims, out_dims, lr, action_space, gamma, chpkt, algo_name): 
    self.input_dims = input_dims
    self.out_dims = out_dims
    self.lr = lr
    self.action_space = action_space
    self.gamma = gamma
    self.optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
   
    self.policy_network = PolicyNetwork2D(self.out_dims)
    self.policy_network.compile(optimizer=Adam(learning_rate=self.lr))

    self.fname = chpkt + algo_name + "_"

  def save_model(self): 
    self.policy_network.save(self.fname)
    print("[+] Saved the model!!")

  def load_model(self):
    self.policy_network = tf.models.load_model(self.fname)
    print("[+] Loaded the mode!!")

  def policy(self, state): 
    state = state.reshape(1, -1)
    state = tf.convert_to_tensor(state)
    action_logits = self.policy_network(state)
    action = tf.random.categorical(tf.math.log(action_logits), num_samples=1)
    return action

  def get_action(self, state): 
    action = self.policy(state).numpy()
    return action.squeeze()
  
  def learn(self, rewards, actions, states): 
    discounted_rewards = []
    discounted_reward = 0

    for reward in rewards[::-1]: 
      discounted_reward = reward + self.gamma * discounted_reward 
      discounted_rewards.append(discounted_reward)

    discounted_rewards = discounted_rewards[::-1]

    for discounted_reward, state, action in zip(discounted_rewards, states, actions): 
     # discounted_rewards = tf.convert_to_tensor(discounted_rewards)
    #  states = tf.convert_to_tensor(states)
      #actions = tf.convert_to_tensor(actions)

      with tf.GradientTape() as tape: 
        action_probs = self.policy_network(np.array(state).reshape(1, -1), training=True)
        loss = self.loss(action_probs, action, discounted_reward)

      params = self.policy_network.trainable_variables
      grads = tape.gradient(loss, params)
      self.optimizer.apply_gradients(
                  zip(grads, params)
              ) 

  def loss(self, action_probabilities, actions, rewards): 
    dist = tfp.distributions.Categorical(
            probs=action_probabilities, dtype=tf.float32
        )
    log_prob = dist.log_prob(actions)
    loss = -log_prob * rewards
    return loss 
    

In [None]:
import collections
import cv2
import numpy as np
import matplotlib.pyplot as plt
import gymnasium as gym
import tensorflow as tf
from gymnasium.wrappers import *


def manage_memory():
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)


def plot_learning_curve(scores, figure_file):

    x = [_ for _ in range(len(scores))]
    running_avg = np.zeros(len(scores))
    for i in range(len(running_avg)):
        running_avg[i] = np.mean(scores[max(0, i-100):(i+1)])
    plt.plot(x, running_avg)
    plt.title('Running average of previous 100 scores')
    plt.savefig(figure_file)


def make_env(env_name): 
    env = gym.make(env_name, render_mode="rgb_array")
    
    if len(env.observation_space.shape) >= 3: 
        #env = AtariPreprocessing(env, 10, 4, 84, False, True)
        env = ResizeObservation(env, 84)
        env = GrayScaleObservation(env, keep_dim=False)
        env = FrameStack(env, 4, lz4_compress=False)
        env = NormalizeObservation(env)

    return env

In [None]:
class Writer:
    def __init__(self, fname): 
        self.fname = fname 

    def write_to_file(self, content): 
        with open(self.fname, "a") as file: 
            file.write(content + "\n")

    def read_file(self, fname):
        with open(fname, "r") as file: 
            return file.read()
            

In [None]:
import time
from telebot import TeleBot
import datetime
import telebot

token = "6238487424:AAG0jRhvbiVa90qUcf2fAirQr_-quPMs7cU"
chat_id = "1055055706"
bot = TeleBot(token=token) 

def telegram_send(message, bot):
    chat_id = "1055055706"
    bot.send_message(chat_id=chat_id, text=message)

def welcome_msg(multi_step, double_dqn, dueling):
    st = 'Hi! Starting learning with DQN Multi-step = %d, Double DQN = %r, Dueling DQN = %r' % (multi_step, double_dqn, dueling)
    telegram_send(st, bot)
    
def info_msg(episode, max_episode, reward, best_score, loss): 
    st = f"Current Episode: {episode}, Current Reward: {reward}, Max Episode: {max_episode}, Best Score: {best_score}, loss: {loss}"
    telegram_send(st, bot)

def end_msg(learning_time):
    st = 'Finished! Learning time: ' + str(datetime.timedelta(seconds=int(learning_time)))
    telegram_send(st, bot)
    print(st)


In [None]:
import numpy as np 
import imageio


class RecordVideo: 
    
    def __init__(self, prefix_fname,  out_directory="videos/", fps=10): 
        self.prefix_fname = prefix_fname
        self.out_directory = out_directory
        self.fps = fps
        self.images = []
        
    def add_image(self, image): 
        self.images.append(image)
    
    def save(self, episode_no): 
        name = self.out_directory + self.prefix_fname + "_" + str(episode_no) + ".mp4"
        imageio.mimsave(name, [np.array(img) for i, img in enumerate(self.images)], fps=self.fps)
        self.images = []

In [None]:
class Trainer: 
  def __init__(self, env, action_space, input_dims, out_dims, video_prefix, is_tg, noe, max_steps, record, lr, gamma, chkpt, algo_name): 
    self.env = env
    self.noe = noe 
    self.max_steps = max_steps 

    self.recorder = RecordVideo(video_prefix)
    self.is_tg = is_tg 
    self.record = record
    self.agent = PolicyGradientAgent(input_dims, out_dims, lr, action_space, gamma, chkpt, algo_name)
    
  def train(self): 

    ep_rewards = []
    avg_rewards = []
    best_reward = float("-inf")

    for episode in range(self.noe): 
      state = self.env.reset()
      rewards = []
      actions = []
      states = []

      if self.record and episode % 50 == 0: 
        img = self.env.render()
        self.recorder.add_image(img)

      for step in range(self.max_steps):
        
        if type(state) == tuple: 
          state = state[0]
        states.append(state) 
        action = self.agent.get_action(state)

        next_info = self.env.step(action)

        next_state, reward_prob, terminated, truncated, _ = next_info 
        done = terminated or truncated 

        rewards.append(reward_prob)
        actions.append(action)

        state = next_state

        if record and episode % 50 == 0:
          img = self.env.render()
          self.recorder.add_image(img)

        if done: 
          l = self.agent.learn(rewards, actions, states)
          break 
        
      if self.record and episode % 50 == 0:
        self.recorder.save(episode)

      if sum(rewards) > best_reward: 
        self.agent.save_model()
        best_reward = sum(rewards)

      ep_rewards.append(sum(rewards))
      avg_reward = np.mean(ep_rewards[-100:])
      avg_rewards.append(avg_reward)
      print(f"Episode: {episode} Reward: {sum(rewards)} Best Score: {best_reward}, Average Reward: {avg_reward}")

    return ep_rewards, avg_rewards



In [None]:
import pickle 


env = make_env("CartPole-v1")
action_space = [_ for _ in range((env.action_space.n))]
n_actions = len(action_space)
input_dims = env.observation_space.shape
noe = 1000 
print(input_dims, n_actions)
max_steps = 100000000
video_prefix = "policy_gradient_reinforce"
is_tg = True 
record = True
lr = 1e-5
gamma = 0.94
chpkt = 'models/'
algo_name = "cartpole_reinforce"

if __name__ == "__main__": 
  trainer = Trainer(env, action_space, input_dims, n_actions, video_prefix, is_tg, noe, max_steps, record, lr, gamma, chpkt, algo_name)
  ep_rewards, avg_rewards = trainer.train()
  
  with open("pg_episode_rewards.obj", "wb") as f: 
    pickle.dump(ep_rewards, f)
  
  with open("pg_avg_rewards.obj", "wb") as f: 
    pickle.dump(avg_rewards, f)

(4,) 2




Episode: 0 Reward: 22.0 Best Score: 22.0, Average Reward: 22.0
Episode: 1 Reward: 15.0 Best Score: 22.0, Average Reward: 18.5
Episode: 2 Reward: 18.0 Best Score: 22.0, Average Reward: 18.333333333333332
Episode: 3 Reward: 13.0 Best Score: 22.0, Average Reward: 17.0
Episode: 4 Reward: 13.0 Best Score: 22.0, Average Reward: 16.2
Episode: 5 Reward: 17.0 Best Score: 22.0, Average Reward: 16.333333333333332
Episode: 6 Reward: 15.0 Best Score: 22.0, Average Reward: 16.142857142857142
Episode: 7 Reward: 13.0 Best Score: 22.0, Average Reward: 15.75
Episode: 8 Reward: 17.0 Best Score: 22.0, Average Reward: 15.88888888888889




Episode: 9 Reward: 33.0 Best Score: 33.0, Average Reward: 17.6
Episode: 10 Reward: 16.0 Best Score: 33.0, Average Reward: 17.454545454545453
Episode: 11 Reward: 19.0 Best Score: 33.0, Average Reward: 17.583333333333332




Episode: 12 Reward: 38.0 Best Score: 38.0, Average Reward: 19.153846153846153
Episode: 13 Reward: 22.0 Best Score: 38.0, Average Reward: 19.357142857142858
Episode: 14 Reward: 14.0 Best Score: 38.0, Average Reward: 19.0
Episode: 15 Reward: 25.0 Best Score: 38.0, Average Reward: 19.375
Episode: 16 Reward: 10.0 Best Score: 38.0, Average Reward: 18.823529411764707
Episode: 17 Reward: 12.0 Best Score: 38.0, Average Reward: 18.444444444444443




Episode: 18 Reward: 59.0 Best Score: 59.0, Average Reward: 20.57894736842105
Episode: 19 Reward: 29.0 Best Score: 59.0, Average Reward: 21.0
Episode: 20 Reward: 16.0 Best Score: 59.0, Average Reward: 20.761904761904763
Episode: 21 Reward: 40.0 Best Score: 59.0, Average Reward: 21.636363636363637
Episode: 22 Reward: 18.0 Best Score: 59.0, Average Reward: 21.47826086956522
Episode: 23 Reward: 15.0 Best Score: 59.0, Average Reward: 21.208333333333332
Episode: 24 Reward: 13.0 Best Score: 59.0, Average Reward: 20.88
Episode: 25 Reward: 20.0 Best Score: 59.0, Average Reward: 20.846153846153847
Episode: 26 Reward: 14.0 Best Score: 59.0, Average Reward: 20.59259259259259
Episode: 27 Reward: 38.0 Best Score: 59.0, Average Reward: 21.214285714285715
Episode: 28 Reward: 32.0 Best Score: 59.0, Average Reward: 21.586206896551722
Episode: 29 Reward: 36.0 Best Score: 59.0, Average Reward: 22.066666666666666
Episode: 30 Reward: 12.0 Best Score: 59.0, Average Reward: 21.741935483870968




Episode: 31 Reward: 86.0 Best Score: 86.0, Average Reward: 23.75
Episode: 32 Reward: 13.0 Best Score: 86.0, Average Reward: 23.424242424242426
Episode: 33 Reward: 29.0 Best Score: 86.0, Average Reward: 23.58823529411765
Episode: 34 Reward: 37.0 Best Score: 86.0, Average Reward: 23.97142857142857
Episode: 35 Reward: 56.0 Best Score: 86.0, Average Reward: 24.86111111111111
Episode: 36 Reward: 60.0 Best Score: 86.0, Average Reward: 25.81081081081081
Episode: 37 Reward: 34.0 Best Score: 86.0, Average Reward: 26.026315789473685
Episode: 38 Reward: 48.0 Best Score: 86.0, Average Reward: 26.58974358974359
Episode: 39 Reward: 65.0 Best Score: 86.0, Average Reward: 27.55
Episode: 40 Reward: 71.0 Best Score: 86.0, Average Reward: 28.609756097560975
Episode: 41 Reward: 45.0 Best Score: 86.0, Average Reward: 29.0
Episode: 42 Reward: 34.0 Best Score: 86.0, Average Reward: 29.11627906976744
Episode: 43 Reward: 56.0 Best Score: 86.0, Average Reward: 29.727272727272727
Episode: 44 Reward: 51.0 Best Sc



Episode: 45 Reward: 159.0 Best Score: 159.0, Average Reward: 33.0
Episode: 46 Reward: 61.0 Best Score: 159.0, Average Reward: 33.59574468085106
Episode: 47 Reward: 112.0 Best Score: 159.0, Average Reward: 35.229166666666664
Episode: 48 Reward: 107.0 Best Score: 159.0, Average Reward: 36.69387755102041
Episode: 49 Reward: 42.0 Best Score: 159.0, Average Reward: 36.8




Episode: 50 Reward: 59.0 Best Score: 159.0, Average Reward: 37.23529411764706
Episode: 51 Reward: 73.0 Best Score: 159.0, Average Reward: 37.92307692307692
Episode: 52 Reward: 60.0 Best Score: 159.0, Average Reward: 38.339622641509436
Episode: 53 Reward: 40.0 Best Score: 159.0, Average Reward: 38.370370370370374
Episode: 54 Reward: 54.0 Best Score: 159.0, Average Reward: 38.654545454545456
Episode: 55 Reward: 40.0 Best Score: 159.0, Average Reward: 38.67857142857143
Episode: 56 Reward: 39.0 Best Score: 159.0, Average Reward: 38.68421052631579
Episode: 57 Reward: 12.0 Best Score: 159.0, Average Reward: 38.224137931034484
Episode: 58 Reward: 60.0 Best Score: 159.0, Average Reward: 38.59322033898305
Episode: 59 Reward: 65.0 Best Score: 159.0, Average Reward: 39.03333333333333
Episode: 60 Reward: 54.0 Best Score: 159.0, Average Reward: 39.278688524590166
Episode: 61 Reward: 40.0 Best Score: 159.0, Average Reward: 39.29032258064516
Episode: 62 Reward: 43.0 Best Score: 159.0, Average Reward:



Episode: 78 Reward: 196.0 Best Score: 196.0, Average Reward: 47.278481012658226
Episode: 79 Reward: 152.0 Best Score: 196.0, Average Reward: 48.5875




Episode: 80 Reward: 245.0 Best Score: 245.0, Average Reward: 51.01234567901235
Episode: 81 Reward: 29.0 Best Score: 245.0, Average Reward: 50.74390243902439
Episode: 82 Reward: 169.0 Best Score: 245.0, Average Reward: 52.16867469879518
Episode: 83 Reward: 17.0 Best Score: 245.0, Average Reward: 51.75
Episode: 84 Reward: 33.0 Best Score: 245.0, Average Reward: 51.529411764705884
Episode: 85 Reward: 143.0 Best Score: 245.0, Average Reward: 52.593023255813954
Episode: 86 Reward: 113.0 Best Score: 245.0, Average Reward: 53.28735632183908
Episode: 87 Reward: 66.0 Best Score: 245.0, Average Reward: 53.43181818181818
Episode: 88 Reward: 11.0 Best Score: 245.0, Average Reward: 52.95505617977528
Episode: 89 Reward: 184.0 Best Score: 245.0, Average Reward: 54.41111111111111
Episode: 90 Reward: 114.0 Best Score: 245.0, Average Reward: 55.065934065934066
Episode: 91 Reward: 113.0 Best Score: 245.0, Average Reward: 55.69565217391305
Episode: 92 Reward: 183.0 Best Score: 245.0, Average Reward: 57.06



Episode: 100 Reward: 74.0 Best Score: 245.0, Average Reward: 60.29
Episode: 101 Reward: 222.0 Best Score: 245.0, Average Reward: 62.36
Episode: 102 Reward: 212.0 Best Score: 245.0, Average Reward: 64.3
Episode: 103 Reward: 185.0 Best Score: 245.0, Average Reward: 66.02
Episode: 104 Reward: 173.0 Best Score: 245.0, Average Reward: 67.62
Episode: 105 Reward: 184.0 Best Score: 245.0, Average Reward: 69.29
Episode: 106 Reward: 189.0 Best Score: 245.0, Average Reward: 71.03
Episode: 107 Reward: 65.0 Best Score: 245.0, Average Reward: 71.55
Episode: 108 Reward: 69.0 Best Score: 245.0, Average Reward: 72.07
Episode: 109 Reward: 155.0 Best Score: 245.0, Average Reward: 73.29
Episode: 110 Reward: 53.0 Best Score: 245.0, Average Reward: 73.66
Episode: 111 Reward: 187.0 Best Score: 245.0, Average Reward: 75.34
Episode: 112 Reward: 198.0 Best Score: 245.0, Average Reward: 76.94




Episode: 113 Reward: 279.0 Best Score: 279.0, Average Reward: 79.51
Episode: 114 Reward: 182.0 Best Score: 279.0, Average Reward: 81.19
Episode: 115 Reward: 269.0 Best Score: 279.0, Average Reward: 83.63
Episode: 116 Reward: 254.0 Best Score: 279.0, Average Reward: 86.07
Episode: 117 Reward: 52.0 Best Score: 279.0, Average Reward: 86.47
Episode: 118 Reward: 168.0 Best Score: 279.0, Average Reward: 87.56
Episode: 119 Reward: 181.0 Best Score: 279.0, Average Reward: 89.08
Episode: 120 Reward: 63.0 Best Score: 279.0, Average Reward: 89.55
Episode: 121 Reward: 55.0 Best Score: 279.0, Average Reward: 89.7
Episode: 122 Reward: 48.0 Best Score: 279.0, Average Reward: 90.0
Episode: 123 Reward: 47.0 Best Score: 279.0, Average Reward: 90.32
Episode: 124 Reward: 51.0 Best Score: 279.0, Average Reward: 90.7
Episode: 125 Reward: 41.0 Best Score: 279.0, Average Reward: 90.91
Episode: 126 Reward: 49.0 Best Score: 279.0, Average Reward: 91.26
Episode: 127 Reward: 45.0 Best Score: 279.0, Average Reward



Episode: 150 Reward: 30.0 Best Score: 279.0, Average Reward: 88.81
Episode: 151 Reward: 30.0 Best Score: 279.0, Average Reward: 88.38
Episode: 152 Reward: 68.0 Best Score: 279.0, Average Reward: 88.46
Episode: 153 Reward: 29.0 Best Score: 279.0, Average Reward: 88.35
Episode: 154 Reward: 26.0 Best Score: 279.0, Average Reward: 88.07
Episode: 155 Reward: 20.0 Best Score: 279.0, Average Reward: 87.87
Episode: 156 Reward: 26.0 Best Score: 279.0, Average Reward: 87.74
Episode: 157 Reward: 29.0 Best Score: 279.0, Average Reward: 87.91
Episode: 158 Reward: 131.0 Best Score: 279.0, Average Reward: 88.62
Episode: 159 Reward: 27.0 Best Score: 279.0, Average Reward: 88.24
Episode: 160 Reward: 61.0 Best Score: 279.0, Average Reward: 88.31
Episode: 161 Reward: 57.0 Best Score: 279.0, Average Reward: 88.48
Episode: 162 Reward: 147.0 Best Score: 279.0, Average Reward: 89.52
Episode: 163 Reward: 99.0 Best Score: 279.0, Average Reward: 90.38
Episode: 164 Reward: 82.0 Best Score: 279.0, Average Reward:



Episode: 199 Reward: 297.0 Best Score: 297.0, Average Reward: 120.79




Episode: 200 Reward: 252.0 Best Score: 297.0, Average Reward: 122.57
Episode: 201 Reward: 234.0 Best Score: 297.0, Average Reward: 122.69
Episode: 202 Reward: 260.0 Best Score: 297.0, Average Reward: 123.17
Episode: 203 Reward: 247.0 Best Score: 297.0, Average Reward: 123.79
Episode: 204 Reward: 275.0 Best Score: 297.0, Average Reward: 124.81
Episode: 205 Reward: 260.0 Best Score: 297.0, Average Reward: 125.57




Episode: 206 Reward: 357.0 Best Score: 357.0, Average Reward: 127.25
Episode: 207 Reward: 55.0 Best Score: 357.0, Average Reward: 127.15
Episode: 208 Reward: 123.0 Best Score: 357.0, Average Reward: 127.69
Episode: 209 Reward: 288.0 Best Score: 357.0, Average Reward: 129.02
Episode: 210 Reward: 150.0 Best Score: 357.0, Average Reward: 129.99
Episode: 211 Reward: 225.0 Best Score: 357.0, Average Reward: 130.37
Episode: 212 Reward: 249.0 Best Score: 357.0, Average Reward: 130.88
Episode: 213 Reward: 271.0 Best Score: 357.0, Average Reward: 130.8
Episode: 214 Reward: 253.0 Best Score: 357.0, Average Reward: 131.51
Episode: 215 Reward: 177.0 Best Score: 357.0, Average Reward: 130.59
Episode: 216 Reward: 219.0 Best Score: 357.0, Average Reward: 130.24
Episode: 217 Reward: 164.0 Best Score: 357.0, Average Reward: 131.36
Episode: 218 Reward: 103.0 Best Score: 357.0, Average Reward: 130.71
Episode: 219 Reward: 207.0 Best Score: 357.0, Average Reward: 130.97
Episode: 220 Reward: 239.0 Best Scor



Episode: 250 Reward: 47.0 Best Score: 357.0, Average Reward: 146.99
Episode: 251 Reward: 90.0 Best Score: 357.0, Average Reward: 147.59
Episode: 252 Reward: 76.0 Best Score: 357.0, Average Reward: 147.67
Episode: 253 Reward: 124.0 Best Score: 357.0, Average Reward: 148.62
Episode: 254 Reward: 92.0 Best Score: 357.0, Average Reward: 149.28
Episode: 255 Reward: 44.0 Best Score: 357.0, Average Reward: 149.52
Episode: 256 Reward: 15.0 Best Score: 357.0, Average Reward: 149.41
Episode: 257 Reward: 51.0 Best Score: 357.0, Average Reward: 149.63
Episode: 258 Reward: 52.0 Best Score: 357.0, Average Reward: 148.84
Episode: 259 Reward: 57.0 Best Score: 357.0, Average Reward: 149.14
Episode: 260 Reward: 59.0 Best Score: 357.0, Average Reward: 149.12
Episode: 261 Reward: 55.0 Best Score: 357.0, Average Reward: 149.1
Episode: 262 Reward: 56.0 Best Score: 357.0, Average Reward: 148.19
Episode: 263 Reward: 52.0 Best Score: 357.0, Average Reward: 147.72
Episode: 264 Reward: 52.0 Best Score: 357.0, Ave



Episode: 300 Reward: 62.0 Best Score: 357.0, Average Reward: 97.66
Episode: 301 Reward: 76.0 Best Score: 357.0, Average Reward: 96.08
Episode: 302 Reward: 23.0 Best Score: 357.0, Average Reward: 93.71
Episode: 303 Reward: 90.0 Best Score: 357.0, Average Reward: 92.14
Episode: 304 Reward: 69.0 Best Score: 357.0, Average Reward: 90.08
Episode: 305 Reward: 64.0 Best Score: 357.0, Average Reward: 88.12
Episode: 306 Reward: 18.0 Best Score: 357.0, Average Reward: 84.73
Episode: 307 Reward: 91.0 Best Score: 357.0, Average Reward: 85.09
Episode: 308 Reward: 85.0 Best Score: 357.0, Average Reward: 84.71
Episode: 309 Reward: 138.0 Best Score: 357.0, Average Reward: 83.21
Episode: 310 Reward: 94.0 Best Score: 357.0, Average Reward: 82.65
Episode: 311 Reward: 143.0 Best Score: 357.0, Average Reward: 81.83
Episode: 312 Reward: 64.0 Best Score: 357.0, Average Reward: 79.98
Episode: 313 Reward: 71.0 Best Score: 357.0, Average Reward: 77.98
Episode: 314 Reward: 145.0 Best Score: 357.0, Average Reward



Episode: 350 Reward: 230.0 Best Score: 357.0, Average Reward: 99.3
Episode: 351 Reward: 145.0 Best Score: 357.0, Average Reward: 99.85
Episode: 352 Reward: 190.0 Best Score: 357.0, Average Reward: 100.99
Episode: 353 Reward: 71.0 Best Score: 357.0, Average Reward: 100.46
Episode: 354 Reward: 84.0 Best Score: 357.0, Average Reward: 100.38
Episode: 355 Reward: 96.0 Best Score: 357.0, Average Reward: 100.9
Episode: 356 Reward: 209.0 Best Score: 357.0, Average Reward: 102.84
Episode: 357 Reward: 308.0 Best Score: 357.0, Average Reward: 105.41
Episode: 358 Reward: 278.0 Best Score: 357.0, Average Reward: 107.67
Episode: 359 Reward: 159.0 Best Score: 357.0, Average Reward: 108.69
Episode: 360 Reward: 264.0 Best Score: 357.0, Average Reward: 110.74
Episode: 361 Reward: 279.0 Best Score: 357.0, Average Reward: 112.98
Episode: 362 Reward: 192.0 Best Score: 357.0, Average Reward: 114.34
Episode: 363 Reward: 318.0 Best Score: 357.0, Average Reward: 117.0
Episode: 364 Reward: 102.0 Best Score: 357



Episode: 400 Reward: 30.0 Best Score: 357.0, Average Reward: 150.92
Episode: 401 Reward: 18.0 Best Score: 357.0, Average Reward: 150.34
Episode: 402 Reward: 33.0 Best Score: 357.0, Average Reward: 150.44
Episode: 403 Reward: 70.0 Best Score: 357.0, Average Reward: 150.24
Episode: 404 Reward: 15.0 Best Score: 357.0, Average Reward: 149.7
Episode: 405 Reward: 159.0 Best Score: 357.0, Average Reward: 150.65
Episode: 406 Reward: 13.0 Best Score: 357.0, Average Reward: 150.6
Episode: 407 Reward: 18.0 Best Score: 357.0, Average Reward: 149.87
Episode: 408 Reward: 82.0 Best Score: 357.0, Average Reward: 149.84
Episode: 409 Reward: 33.0 Best Score: 357.0, Average Reward: 148.79
Episode: 410 Reward: 152.0 Best Score: 357.0, Average Reward: 149.37
Episode: 411 Reward: 158.0 Best Score: 357.0, Average Reward: 149.52
Episode: 412 Reward: 167.0 Best Score: 357.0, Average Reward: 150.55
Episode: 413 Reward: 167.0 Best Score: 357.0, Average Reward: 151.51
Episode: 414 Reward: 151.0 Best Score: 357.0,



Episode: 450 Reward: 183.0 Best Score: 357.0, Average Reward: 146.04
Episode: 451 Reward: 80.0 Best Score: 357.0, Average Reward: 145.39
Episode: 452 Reward: 188.0 Best Score: 357.0, Average Reward: 145.37
Episode: 453 Reward: 145.0 Best Score: 357.0, Average Reward: 146.11
Episode: 454 Reward: 135.0 Best Score: 357.0, Average Reward: 146.62
Episode: 455 Reward: 140.0 Best Score: 357.0, Average Reward: 147.06
Episode: 456 Reward: 38.0 Best Score: 357.0, Average Reward: 145.35
Episode: 457 Reward: 89.0 Best Score: 357.0, Average Reward: 143.16
Episode: 458 Reward: 174.0 Best Score: 357.0, Average Reward: 142.12
Episode: 459 Reward: 71.0 Best Score: 357.0, Average Reward: 141.24
Episode: 460 Reward: 118.0 Best Score: 357.0, Average Reward: 139.78
Episode: 461 Reward: 81.0 Best Score: 357.0, Average Reward: 137.8
Episode: 462 Reward: 151.0 Best Score: 357.0, Average Reward: 137.39
Episode: 463 Reward: 172.0 Best Score: 357.0, Average Reward: 135.93
Episode: 464 Reward: 168.0 Best Score: 3



Episode: 500 Reward: 131.0 Best Score: 357.0, Average Reward: 129.69
Episode: 501 Reward: 13.0 Best Score: 357.0, Average Reward: 129.64
Episode: 502 Reward: 128.0 Best Score: 357.0, Average Reward: 130.59
Episode: 503 Reward: 20.0 Best Score: 357.0, Average Reward: 130.09
Episode: 504 Reward: 21.0 Best Score: 357.0, Average Reward: 130.15
Episode: 505 Reward: 75.0 Best Score: 357.0, Average Reward: 129.31
Episode: 506 Reward: 136.0 Best Score: 357.0, Average Reward: 130.54
Episode: 507 Reward: 126.0 Best Score: 357.0, Average Reward: 131.62
Episode: 508 Reward: 20.0 Best Score: 357.0, Average Reward: 131.0
Episode: 509 Reward: 128.0 Best Score: 357.0, Average Reward: 131.95
Episode: 510 Reward: 118.0 Best Score: 357.0, Average Reward: 131.61
Episode: 511 Reward: 143.0 Best Score: 357.0, Average Reward: 131.46
Episode: 512 Reward: 148.0 Best Score: 357.0, Average Reward: 131.27
Episode: 513 Reward: 150.0 Best Score: 357.0, Average Reward: 131.1
Episode: 514 Reward: 108.0 Best Score: 35



Episode: 550 Reward: 12.0 Best Score: 357.0, Average Reward: 105.77
Episode: 551 Reward: 10.0 Best Score: 357.0, Average Reward: 105.07
Episode: 552 Reward: 60.0 Best Score: 357.0, Average Reward: 103.79
Episode: 553 Reward: 47.0 Best Score: 357.0, Average Reward: 102.81
Episode: 554 Reward: 10.0 Best Score: 357.0, Average Reward: 101.56
Episode: 555 Reward: 61.0 Best Score: 357.0, Average Reward: 100.77
Episode: 556 Reward: 10.0 Best Score: 357.0, Average Reward: 100.49
Episode: 557 Reward: 44.0 Best Score: 357.0, Average Reward: 100.04
Episode: 558 Reward: 33.0 Best Score: 357.0, Average Reward: 98.63
Episode: 559 Reward: 13.0 Best Score: 357.0, Average Reward: 98.05
Episode: 560 Reward: 32.0 Best Score: 357.0, Average Reward: 97.19
Episode: 561 Reward: 64.0 Best Score: 357.0, Average Reward: 97.02
Episode: 562 Reward: 28.0 Best Score: 357.0, Average Reward: 95.79
Episode: 563 Reward: 12.0 Best Score: 357.0, Average Reward: 94.19
Episode: 564 Reward: 11.0 Best Score: 357.0, Average R



Episode: 600 Reward: 56.0 Best Score: 357.0, Average Reward: 66.66
Episode: 601 Reward: 95.0 Best Score: 357.0, Average Reward: 67.48
Episode: 602 Reward: 80.0 Best Score: 357.0, Average Reward: 67.0
Episode: 603 Reward: 100.0 Best Score: 357.0, Average Reward: 67.8
Episode: 604 Reward: 81.0 Best Score: 357.0, Average Reward: 68.4
Episode: 605 Reward: 12.0 Best Score: 357.0, Average Reward: 67.77
Episode: 606 Reward: 14.0 Best Score: 357.0, Average Reward: 66.55
Episode: 607 Reward: 81.0 Best Score: 357.0, Average Reward: 66.1
Episode: 608 Reward: 15.0 Best Score: 357.0, Average Reward: 66.05
Episode: 609 Reward: 110.0 Best Score: 357.0, Average Reward: 65.87
Episode: 610 Reward: 15.0 Best Score: 357.0, Average Reward: 64.84
Episode: 611 Reward: 116.0 Best Score: 357.0, Average Reward: 64.57
Episode: 612 Reward: 14.0 Best Score: 357.0, Average Reward: 63.23
Episode: 613 Reward: 17.0 Best Score: 357.0, Average Reward: 61.9
Episode: 614 Reward: 12.0 Best Score: 357.0, Average Reward: 60.



Episode: 650 Reward: 109.0 Best Score: 357.0, Average Reward: 71.11
Episode: 651 Reward: 163.0 Best Score: 357.0, Average Reward: 72.64
Episode: 652 Reward: 162.0 Best Score: 357.0, Average Reward: 73.66
Episode: 653 Reward: 155.0 Best Score: 357.0, Average Reward: 74.74
Episode: 654 Reward: 151.0 Best Score: 357.0, Average Reward: 76.15
Episode: 655 Reward: 165.0 Best Score: 357.0, Average Reward: 77.19
Episode: 656 Reward: 128.0 Best Score: 357.0, Average Reward: 78.37
Episode: 657 Reward: 167.0 Best Score: 357.0, Average Reward: 79.6
Episode: 658 Reward: 99.0 Best Score: 357.0, Average Reward: 80.26
Episode: 659 Reward: 153.0 Best Score: 357.0, Average Reward: 81.66
Episode: 660 Reward: 158.0 Best Score: 357.0, Average Reward: 82.92
Episode: 661 Reward: 167.0 Best Score: 357.0, Average Reward: 83.95
Episode: 662 Reward: 143.0 Best Score: 357.0, Average Reward: 85.1
Episode: 663 Reward: 166.0 Best Score: 357.0, Average Reward: 86.64
Episode: 664 Reward: 116.0 Best Score: 357.0, Avera



Episode: 700 Reward: 15.0 Best Score: 357.0, Average Reward: 116.85
Episode: 701 Reward: 12.0 Best Score: 357.0, Average Reward: 116.02
Episode: 702 Reward: 14.0 Best Score: 357.0, Average Reward: 115.36
Episode: 703 Reward: 197.0 Best Score: 357.0, Average Reward: 116.33
Episode: 704 Reward: 15.0 Best Score: 357.0, Average Reward: 115.67
Episode: 705 Reward: 98.0 Best Score: 357.0, Average Reward: 116.53
Episode: 706 Reward: 157.0 Best Score: 357.0, Average Reward: 117.96
Episode: 707 Reward: 19.0 Best Score: 357.0, Average Reward: 117.34
Episode: 708 Reward: 174.0 Best Score: 357.0, Average Reward: 118.93
Episode: 709 Reward: 137.0 Best Score: 357.0, Average Reward: 119.2
Episode: 710 Reward: 203.0 Best Score: 357.0, Average Reward: 121.08
Episode: 711 Reward: 128.0 Best Score: 357.0, Average Reward: 121.2
Episode: 712 Reward: 202.0 Best Score: 357.0, Average Reward: 123.08
Episode: 713 Reward: 16.0 Best Score: 357.0, Average Reward: 123.07
Episode: 714 Reward: 18.0 Best Score: 357.0



Episode: 750 Reward: 212.0 Best Score: 357.0, Average Reward: 149.69
Episode: 751 Reward: 215.0 Best Score: 357.0, Average Reward: 150.21
Episode: 752 Reward: 228.0 Best Score: 357.0, Average Reward: 150.87
Episode: 753 Reward: 223.0 Best Score: 357.0, Average Reward: 151.55
Episode: 754 Reward: 196.0 Best Score: 357.0, Average Reward: 152.0
Episode: 755 Reward: 199.0 Best Score: 357.0, Average Reward: 152.34
Episode: 756 Reward: 177.0 Best Score: 357.0, Average Reward: 152.83
Episode: 757 Reward: 223.0 Best Score: 357.0, Average Reward: 153.39
Episode: 758 Reward: 134.0 Best Score: 357.0, Average Reward: 153.74
Episode: 759 Reward: 240.0 Best Score: 357.0, Average Reward: 154.61
Episode: 760 Reward: 258.0 Best Score: 357.0, Average Reward: 155.61
Episode: 761 Reward: 233.0 Best Score: 357.0, Average Reward: 156.27
Episode: 762 Reward: 210.0 Best Score: 357.0, Average Reward: 156.94
Episode: 763 Reward: 159.0 Best Score: 357.0, Average Reward: 156.87
Episode: 764 Reward: 219.0 Best Sco



Episode: 800 Reward: 223.0 Best Score: 357.0, Average Reward: 168.1
Episode: 801 Reward: 249.0 Best Score: 357.0, Average Reward: 170.47
Episode: 802 Reward: 228.0 Best Score: 357.0, Average Reward: 172.61
Episode: 803 Reward: 232.0 Best Score: 357.0, Average Reward: 172.96
Episode: 804 Reward: 249.0 Best Score: 357.0, Average Reward: 175.3
Episode: 805 Reward: 237.0 Best Score: 357.0, Average Reward: 176.69
Episode: 806 Reward: 238.0 Best Score: 357.0, Average Reward: 177.5
Episode: 807 Reward: 181.0 Best Score: 357.0, Average Reward: 179.12
Episode: 808 Reward: 214.0 Best Score: 357.0, Average Reward: 179.52
Episode: 809 Reward: 194.0 Best Score: 357.0, Average Reward: 180.09
Episode: 810 Reward: 122.0 Best Score: 357.0, Average Reward: 179.28
Episode: 811 Reward: 270.0 Best Score: 357.0, Average Reward: 180.7
Episode: 812 Reward: 103.0 Best Score: 357.0, Average Reward: 179.71
Episode: 813 Reward: 198.0 Best Score: 357.0, Average Reward: 181.53
Episode: 814 Reward: 125.0 Best Score:



Episode: 850 Reward: 82.0 Best Score: 357.0, Average Reward: 164.3
Episode: 851 Reward: 78.0 Best Score: 357.0, Average Reward: 162.93
Episode: 852 Reward: 105.0 Best Score: 357.0, Average Reward: 161.7
Episode: 853 Reward: 130.0 Best Score: 357.0, Average Reward: 160.77
Episode: 854 Reward: 82.0 Best Score: 357.0, Average Reward: 159.63
Episode: 855 Reward: 64.0 Best Score: 357.0, Average Reward: 158.28
Episode: 856 Reward: 78.0 Best Score: 357.0, Average Reward: 157.29
Episode: 857 Reward: 66.0 Best Score: 357.0, Average Reward: 155.72
Episode: 858 Reward: 110.0 Best Score: 357.0, Average Reward: 155.48
Episode: 859 Reward: 107.0 Best Score: 357.0, Average Reward: 154.15
Episode: 860 Reward: 65.0 Best Score: 357.0, Average Reward: 152.22
Episode: 861 Reward: 84.0 Best Score: 357.0, Average Reward: 150.73
Episode: 862 Reward: 73.0 Best Score: 357.0, Average Reward: 149.36
Episode: 863 Reward: 60.0 Best Score: 357.0, Average Reward: 148.37
Episode: 864 Reward: 77.0 Best Score: 357.0, A



Episode: 900 Reward: 178.0 Best Score: 357.0, Average Reward: 137.62
Episode: 901 Reward: 202.0 Best Score: 357.0, Average Reward: 137.15
Episode: 902 Reward: 211.0 Best Score: 357.0, Average Reward: 136.98
Episode: 903 Reward: 185.0 Best Score: 357.0, Average Reward: 136.51
Episode: 904 Reward: 165.0 Best Score: 357.0, Average Reward: 135.67
Episode: 905 Reward: 157.0 Best Score: 357.0, Average Reward: 134.87
Episode: 906 Reward: 159.0 Best Score: 357.0, Average Reward: 134.08
Episode: 907 Reward: 149.0 Best Score: 357.0, Average Reward: 133.76
Episode: 908 Reward: 190.0 Best Score: 357.0, Average Reward: 133.52
Episode: 909 Reward: 157.0 Best Score: 357.0, Average Reward: 133.15
Episode: 910 Reward: 169.0 Best Score: 357.0, Average Reward: 133.62
Episode: 911 Reward: 131.0 Best Score: 357.0, Average Reward: 132.23
Episode: 912 Reward: 117.0 Best Score: 357.0, Average Reward: 132.37
Episode: 913 Reward: 118.0 Best Score: 357.0, Average Reward: 131.57
Episode: 914 Reward: 159.0 Best Sc



Episode: 950 Reward: 180.0 Best Score: 357.0, Average Reward: 138.38
Episode: 951 Reward: 186.0 Best Score: 357.0, Average Reward: 139.46
Episode: 952 Reward: 190.0 Best Score: 357.0, Average Reward: 140.31
Episode: 953 Reward: 183.0 Best Score: 357.0, Average Reward: 140.84
Episode: 954 Reward: 206.0 Best Score: 357.0, Average Reward: 142.08
Episode: 955 Reward: 184.0 Best Score: 357.0, Average Reward: 143.28
Episode: 956 Reward: 220.0 Best Score: 357.0, Average Reward: 144.7
Episode: 957 Reward: 193.0 Best Score: 357.0, Average Reward: 145.97
Episode: 958 Reward: 204.0 Best Score: 357.0, Average Reward: 146.91
Episode: 959 Reward: 199.0 Best Score: 357.0, Average Reward: 147.83
Episode: 960 Reward: 195.0 Best Score: 357.0, Average Reward: 149.13
Episode: 961 Reward: 177.0 Best Score: 357.0, Average Reward: 150.06
Episode: 962 Reward: 163.0 Best Score: 357.0, Average Reward: 150.96
Episode: 963 Reward: 163.0 Best Score: 357.0, Average Reward: 151.99
Episode: 964 Reward: 182.0 Best Sco

1

In [None]:
def greedy_policy(observation, model, action_space): 
    state = tf.convert_to_tensor([observation])
    actions = model(state)
    action = tf.math.argmax(actions, axis=1).numpy()[0]
    return action

In [None]:
import random 
import imageio
import tensorflow as tf 

class Eval: 

    def __init__(self, env, model_path, number_of_episode=50):
        self.env = env 
        self.model = tf.keras.models.load_model(model_path)
        self.recorder = RecordVideo('dqn_lunarlander', 'test_videos/', 15)
        self.number_of_episode = number_of_episode
        
    def test(self): 
        rewards = []
        steps = []
        for episode in range(self.number_of_episode): 
            done = False
            reward = 0
            step = 0
            state = env.reset(seed=random.randint(0,500))
            if episode % 10 == 0: 
                img = env.render()
                self.recorder.add_image(img) 

            while not done:
                action =  greedy_policy(state, self.model, ACTION_SPACE)
                state, reward_prob, terminated, truncated, _ = env.step(action)
                done = terminated or truncated 
                reward += reward_prob
                step += 1 
                if episode % 10 == 0:
                    img = env.render()
                    self.recorder.add_image(img)
            
            rewards.append(reward)
            steps.append(step)
            recorder.save(1) if episode % 10 == 0
        
        return rewards, steps
