In [None]:
!pip install gym[atari]
!pip install autorom[accept-rom-license]
!pip install autorom[accept-rom-license]
!pip install --upgrade gym==0.24.0
!pip install --upgrade ale_py==0.7.2
!pip install ale_py==0.7.2
!pip install keras-rl2
!pip install --upgrade tensorflow==2.8.0

In [None]:
import gym
import random
import ale_py
import matplotlib.pyplot as plt
from IPython.display import clear_output
import numpy as np


In [None]:
env=gym.make('SpaceInvaders-v0')
height, width, channels = env.observation_space.shape
actions=env.action_space.n
env.render_mode='human'
disable_env_checker=True

In [None]:
import cv2

class ImageCompressionWrapper(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.compressed_shape = (32, 32, 3)  # Set the desired compressed shape
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=self.compressed_shape, dtype=np.uint8)

    def observation(self, observation):
      #print(observation)
      compressed_observation = self.compress_image(observation)
      return compressed_observation

    def compress_image(self,image):
      compressed_image = cv2.resize(image, (self.compressed_shape[1], self.compressed_shape[0]), interpolation=cv2.INTER_AREA)
      return compressed_image
env = ImageCompressionWrapper(env)
height, width, channels = env.observation_space.shape
print(height,width)

In [None]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [None]:
episodes=5
img = plt.imshow(env.render(mode="rgb_array"))
for episode in range(episodes):
    state=env.reset()
    done=False
    score=0
    while not done:
        img.set_data(env.render(mode='rgb_array'))
        display(plt.gcf())
        action=random.choice([0,1,2,3,4,5])
        #print(env.step(action))
        n_state, reward, done,info=env.step(action)
        score+=reward
        clear_output(wait=True)
        #print(score)
    print('Episode:{} Score:{}'.format(episode,score))
env.close()

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

In [None]:
def build_model(height,width, channels, actions):
    model=Sequential()
    model.add(Convolution2D(32,(8,8),strides=(4,4),activation='relu',input_shape=(1,height,width,channels)))
    model.add(Convolution2D(32,(4,4),strides=(2,2),activation='relu'))
    #model.add(Convolution2D(64,(3,3),activation='relu'))
    model.add(Flatten())
    model.add(Dense(128,activation='relu'))
    model.add(Dense(64,activation='relu'))
    model.add(Dense(actions,activation='linear'))
    return model


In [None]:
del model

In [None]:
model=build_model(height,width,channels,actions)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 1, 7, 7, 32)       6176      
                                                                 
 conv2d_1 (Conv2D)           (None, 1, 2, 2, 32)       16416     
                                                                 
 flatten (Flatten)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 128)               16512     
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 6)                 390       
                                                                 
Total params: 47,750
Trainable params: 47,750
Non-traina

In [None]:
from tensorflow import keras
import rl
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy,EpsGreedyQPolicy

In [None]:
def build_agent(model,actions):
    policy=LinearAnnealedPolicy(EpsGreedyQPolicy(),attr='eps',value_max=1.,value_min=.1, value_test=.2, nb_steps=10000)
    memory=SequentialMemory(limit=1000,window_length=1)
    dqn=DQNAgent(model=model,memory=memory,policy=policy,
                 enable_dueling_network=True, dueling_type='avg',nb_actions=actions,nb_steps_warmup=1000)
    return dqn

In [None]:
dqn=build_agent(model,actions)
dqn.compile(Adam(learning_rate=1e-4))
dqn.fit(env,nb_steps=10000,visualize=False,verbose=2)

Training for 10000 steps ...
  796/10000: episode: 1, duration: 2.029s, episode steps: 796, steps per second: 392, episode reward: 165.000, mean reward:  0.207 [ 0.000, 30.000], mean action: 2.521 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --


  deprecation(
  updates=self.state_updates,


 1678/10000: episode: 2, duration: 16.287s, episode steps: 882, steps per second:  54, episode reward: 215.000, mean reward:  0.244 [ 0.000, 30.000], mean action: 2.539 [0.000, 5.000],  loss: 1.555089, mean_q: 4.713728, mean_eps: 0.879490


  deprecation(


 3064/10000: episode: 3, duration: 30.790s, episode steps: 1386, steps per second:  45, episode reward: 630.000, mean reward:  0.455 [ 0.000, 200.000], mean action: 2.659 [0.000, 5.000],  loss: 14.960479, mean_q: 5.032790, mean_eps: 0.786655


  deprecation(


 3420/10000: episode: 4, duration: 8.362s, episode steps: 356, steps per second:  43, episode reward: 50.000, mean reward:  0.140 [ 0.000, 20.000], mean action: 2.517 [0.000, 5.000],  loss: 32.659678, mean_q: 6.266610, mean_eps: 0.708265


  deprecation(


 4423/10000: episode: 5, duration: 21.955s, episode steps: 1003, steps per second:  46, episode reward: 305.000, mean reward:  0.304 [ 0.000, 30.000], mean action: 2.693 [0.000, 5.000],  loss: 7.223173, mean_q: 5.270829, mean_eps: 0.647110


  deprecation(


 5243/10000: episode: 6, duration: 17.773s, episode steps: 820, steps per second:  46, episode reward: 110.000, mean reward:  0.134 [ 0.000, 25.000], mean action: 2.735 [0.000, 5.000],  loss: 1.865297, mean_q: 4.258023, mean_eps: 0.565075


  deprecation(


 5738/10000: episode: 7, duration: 11.458s, episode steps: 495, steps per second:  43, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 2.299 [0.000, 5.000],  loss: 0.679664, mean_q: 4.414666, mean_eps: 0.505900


  deprecation(


 6930/10000: episode: 8, duration: 26.902s, episode steps: 1192, steps per second:  44, episode reward: 160.000, mean reward:  0.134 [ 0.000, 30.000], mean action: 2.414 [0.000, 5.000],  loss: 0.988114, mean_q: 4.613723, mean_eps: 0.429985


  deprecation(


 7635/10000: episode: 9, duration: 15.650s, episode steps: 705, steps per second:  45, episode reward: 60.000, mean reward:  0.085 [ 0.000, 15.000], mean action: 2.328 [0.000, 5.000],  loss: 0.617496, mean_q: 4.156478, mean_eps: 0.344620


  deprecation(


 8516/10000: episode: 10, duration: 19.010s, episode steps: 881, steps per second:  46, episode reward: 305.000, mean reward:  0.346 [ 0.000, 30.000], mean action: 2.537 [0.000, 5.000],  loss: 1.168459, mean_q: 3.937995, mean_eps: 0.273250


  deprecation(


 8922/10000: episode: 11, duration: 9.747s, episode steps: 406, steps per second:  42, episode reward: 55.000, mean reward:  0.135 [ 0.000, 20.000], mean action: 2.436 [0.000, 5.000],  loss: 2.250030, mean_q: 4.317207, mean_eps: 0.215335


  deprecation(


 9809/10000: episode: 12, duration: 19.016s, episode steps: 887, steps per second:  47, episode reward: 200.000, mean reward:  0.225 [ 0.000, 30.000], mean action: 2.807 [0.000, 5.000],  loss: 1.449578, mean_q: 4.691851, mean_eps: 0.157150


  deprecation(


done, took 202.836 seconds


<keras.callbacks.History at 0x7ac4c21b9a50>