Importing the Required Libraries

In [None]:
!pip install tensorflow==2.12.0
!pip install gym==0.25.2
!pip install keras
!pip install keras-rl2==1.0.5

In [2]:
import gym
import random


In [3]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
!pip install protobuf==3.20.*
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [None]:
env = gym.make('CartPole-v1',render_mode='human')
states = env.observation_space.shape[0]
actions = env.action_space.n

In [6]:
#Creating a Sequential Keras Model 
model = Sequential()
model.add(Flatten(input_shape=(1,states)))
model.add(Dense(24, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))



In [7]:
#Creating agent and setting up a policy for it 
agent=DQNAgent(
    model=model,
    memory=SequentialMemory(limit=50000,window_length=1),
    policy=BoltzmannQPolicy(),
    nb_actions=actions,
    nb_steps_warmup=10,
    target_model_update=0.01
)

In [8]:
from tensorflow.keras.optimizers.legacy import Adam

In [9]:
#The model is currently trained for 50k steps but can be changed for more accurate model
agent.compile(Adam(learning_rate=0.001),metrics=["mae"])
agent.fit(env,nb_steps=50000,visualize=False,verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
    7/10000 [..............................] - ETA: 3:40 - reward: 1.0000

  updates=self.state_updates,
  if not isinstance(terminated, (bool, np.bool8)):


   10/10000 [..............................] - ETA: 3:33 - reward: 1.0000

  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   21/10000 [..............................] - ETA: 7:17 - reward: 1.0000

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   30/10000 [..............................] - ETA: 6:10 - reward: 1.0000

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


97 episodes - episode_reward: 102.289 [8.000, 320.000] - loss: 3.238 - mae: 19.889 - mean_q: 40.428

Interval 2 (10000 steps performed)
41 episodes - episode_reward: 244.073 [179.000, 389.000] - loss: 4.689 - mae: 42.707 - mean_q: 86.568

Interval 3 (20000 steps performed)
43 episodes - episode_reward: 230.116 [157.000, 370.000] - loss: 3.817 - mae: 47.837 - mean_q: 96.611

Interval 4 (30000 steps performed)
38 episodes - episode_reward: 262.684 [162.000, 397.000] - loss: 2.588 - mae: 45.840 - mean_q: 92.448

Interval 5 (40000 steps performed)
done, took 1014.609 seconds


<keras.callbacks.History at 0x7accb1847820>

In [13]:
#Testing the model for 50 sequences/episodes 
results=agent.test(env,nb_episodes=50,visualize=True)
print(np.mean(results.history["episode_reward"]))

Testing for 50 episodes ...


See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


Episode 1: reward: 224.000, steps: 224
Episode 2: reward: 500.000, steps: 500
Episode 3: reward: 195.000, steps: 195
Episode 4: reward: 224.000, steps: 224
Episode 5: reward: 433.000, steps: 433
Episode 6: reward: 241.000, steps: 241
Episode 7: reward: 214.000, steps: 214
Episode 8: reward: 438.000, steps: 438
Episode 9: reward: 243.000, steps: 243
Episode 10: reward: 189.000, steps: 189
Episode 11: reward: 477.000, steps: 477
Episode 12: reward: 390.000, steps: 390
Episode 13: reward: 500.000, steps: 500
Episode 14: reward: 288.000, steps: 288
Episode 15: reward: 246.000, steps: 246
Episode 16: reward: 389.000, steps: 389
Episode 17: reward: 373.000, steps: 373
Episode 18: reward: 253.000, steps: 253
Episode 19: reward: 293.000, steps: 293
Episode 20: reward: 234.000, steps: 234
Episode 21: reward: 404.000, steps: 404
Episode 22: reward: 305.000, steps: 305
Episode 23: reward: 207.000, steps: 207
Episode 24: reward: 500.000, steps: 500
Episode 25: reward: 499.000, steps: 499
Episode 2

In [14]:
# Saves model in high,rich Keras format
model.save('model.keras')

In [15]:
# Saves model in low hd5 format
model.save('model.h5')

In [16]:
#Closing the environment
env.close()