## Fixed Obstacle and fixed target position

In [1]:
import gym
import pygame
import numpy as np
import cv2
class AgentEnv(gym.Env):
    def __init__(self):        
        pygame.init()
        pygame.font.init()
        self.width = 800
        self.height = 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        self.clock = pygame.time.Clock()
        self.fps = 15   # frame per second
        # Create a VideoWriter object to save the video
        self.fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        self.out = cv2.VideoWriter('fixed_obstacle_and_target.mp4', self.fourcc, self.fps, (self.width, self.height))
        
        self.grid_size = 40
        self.grid_width = self.width//self.grid_size ## no. of grids in row
        self.grid_height = self.height//self.grid_size  ## no. of grids in column
        
        self.action_space = gym.spaces.Discrete(4)
        ## self.observation_space = gym.spaces.Tuple((gym.spaces.Discrete(self.grid_width),gym.spaces.Discrete(self.grid_height)))
        self.observation_space = gym.spaces.Box(np.array([0,0]),np.array([self.grid_width-1,self.grid_height-1]))
        
        self.agent_image = pygame.image.load('Documents\TANK.png')
        self.agent_image = pygame.transform.scale(self.agent_image,(self.grid_size ,self.grid_size))
        self.player_image = pygame.transform.rotate(self.agent_image, 180)
        self.player_pos = [0, 0]
        self.target_pos = [self.grid_width - 1, self.grid_height - 1]
        self.target_image = pygame.Rect(self.target_pos[0]*self.grid_size,self.target_pos[1]*self.grid_size,self.grid_size,self.grid_size)
        
        self.obstacle1_pos = [self.grid_width - 7, self.grid_height - 9]
        self.obstacle1_image = pygame.Rect(self.obstacle1_pos[0]*self.grid_size,self.obstacle1_pos[1]*self.grid_size,self.grid_size,self.grid_size)

    def step(self,action):
        
        if action == 0:  # up
            self.player_pos[1] = max(0, self.player_pos[1] - 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        elif action == 1:  # down
            self.player_pos[1] = min(self.grid_height - 1, self.player_pos[1] + 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        elif action == 2:  # left
            self.player_pos[0] = max(0, self.player_pos[0] - 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        elif action == 3:  # right
            self.player_pos[0] = min(self.grid_width - 1, self.player_pos[0] + 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        
        if self.player_pos == self.target_pos:
            reward += 2000
            done = True
        elif self.player_pos == self.obstacle1_pos or self.player_pos == self.obstacle1_pos + [1,0] or self.player_pos == self.obstacle1_pos + [0,1] or self.player_pos == self.obstacle1_pos +[1,1]:
            reward -= 1000
            done = True

    
        else:
            reward -= 1
            done = False
        return self.player_pos, reward, done, {}
    
    def reset(self):
        self.player_pos = [0, 0]
        return self.player_pos

    def render(self,score,i,num_episodes,mode = "rgb_array"):
        self.screen.fill((255, 255, 255))
        self.screen.blit(self.player_image, (self.player_pos[0] * self.grid_size, self.player_pos[1] * self.grid_size))
        pygame.draw.rect(self.screen,(0,255,0),self.target_image)
        pygame.draw.rect(self.screen,(255,0,0),self.obstacle1_image)
        font = pygame.font.Font(None, 36)
        score_text = font.render(f'Score: {score}', True, (0, 0, 255))
        self.screen.blit(score_text, (600, 20))
        
        # Save the frame as a PNG image
        pygame.image.save(self.screen, 'frame.png')
    
        # Add the frame to the video
        self.frame = cv2.imread('frame.png')
        self.out.write(self.frame)
        
        pygame.display.update()
        self.clock.tick(self.fps)
        if i==num_episodes-1:
            self.out.release()


pygame 2.1.0 (SDL 2.0.16, Python 3.9.13)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
from stable_baselines3 import PPO

env = AgentEnv()   # Create the environment
# model = PPO('MlpPolicy', env, verbose=1)  # Create the PPO agent
# model.learn(total_timesteps=100000)    # Train the agent for 100000 timesteps
# model.save('Documents\Trained Model_2D\Agent_Target_Obstacle_100000_timesteps')
model = PPO.load('Documents\Trained Model_2D\Agent_Target_Obstacle_100000_timesteps.zip', env = env)

                  
obs = env.reset()
score_tot = 0
num_episodes = 10
for i in range(num_episodes):
    score = 0
    while True:
        action, _states = model.predict(obs)
        obs, rewards, done,info = env.step(action)       
        score += rewards
        env.render(score,i,num_episodes)
        if done == True:
            break
    print('Score: ' + str(score))
    obs = env.reset()
    score_tot += score
print('Avg Score = ' + str(score_tot/num_episodes))
env.close()


  logger.warn(


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Score: 1512.1256042964453
Score: 1505.4965361771674
Score: 1466.1091481660133
Score: 1588.8615567518605
Score: 1560.060688774664
Score: 1512.05120368788
Score: 1532.8764925338244
Score: 1455.993730196706
Score: 1589.081491136875
Score: 1449.362357168195
Avg Score = 1517.201880888963


## Changing Obstacle position after training the agent at different position of obstacle

In [None]:
import gym
import pygame
import numpy as np
import cv2
class AgentEnv(gym.Env):
    def __init__(self):        
        pygame.init()
        pygame.font.init()
        self.width = 800
        self.height = 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        self.clock = pygame.time.Clock()
        self.fps = 15   # frame per second
        # Create a VideoWriter object to save the video
        self.fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        self.out = cv2.VideoWriter('Changing_obstacle_after_training_agent.mp4', self.fourcc, self.fps, (self.width, self.height))
        
        
        self.grid_size = 40
        self.grid_width = self.width//self.grid_size ## no. of grids in row
        self.grid_height = self.height//self.grid_size  ## no. of grids in column
        
        self.action_space = gym.spaces.Discrete(4)
        ## self.observation_space = gym.spaces.Tuple((gym.spaces.Discrete(self.grid_width),gym.spaces.Discrete(self.grid_height)))
        self.observation_space = gym.spaces.Box(np.array([0,0,0,0]),np.array([self.grid_width-1,self.grid_height-1,self.grid_width-1,self.grid_height-1]))
        
        self.agent_image = pygame.image.load('Documents\TANK.png')
        self.agent_image = pygame.transform.scale(self.agent_image,(self.grid_size ,self.grid_size))
        self.player_image = pygame.transform.rotate(self.agent_image, 180)
        self.player_pos = [0, 0]
        self.target_pos = [self.grid_width - 1, self.grid_height - 1]
        self.target_image = pygame.Rect(self.target_pos[0]*self.grid_size,self.target_pos[1]*self.grid_size,self.grid_size,self.grid_size)
        
        self.obstacle1_pos = [self.grid_width - 4, self.grid_height - 1]
        self.obstacle1_image = pygame.Rect(self.obstacle1_pos[0]*self.grid_size,self.obstacle1_pos[1]*self.grid_size,self.grid_size,self.grid_size)

    def step(self,action):
        reward = 0
        if action == 0:  # up
            self.player_pos[1] = max(0, self.player_pos[1] - 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)*10
            reward -= 23 -np.sqrt((self.player_pos[0]-self.obstacle1_pos[0])**2 + (self.player_pos[1]-self.obstacle1_pos[1])**2)
        elif action == 1:  # down
            self.player_pos[1] = min(self.grid_height - 1, self.player_pos[1] + 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)*10
            reward -= 23 -np.sqrt((self.player_pos[0]-self.obstacle1_pos[0])**2 + (self.player_pos[1]-self.obstacle1_pos[1])**2)
        elif action == 2:  # left
            self.player_pos[0] = max(0, self.player_pos[0] - 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)*10
            reward -= 23 -np.sqrt((self.player_pos[0]-self.obstacle1_pos[0])**2 + (self.player_pos[1]-self.obstacle1_pos[1])**2)
        elif action == 3:  # right
            self.player_pos[0] = min(self.grid_width - 1, self.player_pos[0] + 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)*10
            reward -= 23 -np.sqrt((self.player_pos[0]-self.obstacle1_pos[0])**2 + (self.player_pos[1]-self.obstacle1_pos[1])**2)
        
        if self.player_pos == self.target_pos:
            reward += 2000
            done = True
        elif self.player_pos == self.obstacle1_pos:
            reward -= 1000
            done = True

    
        else:
            reward -= 1
            done = False
        state1 = np.array([self.player_pos[0],self.player_pos[1],self.obstacle1_pos[0],self.obstacle1_pos[1]])
        return state1, reward, done, {}
    
    def reset(self):
        self.player_pos = [0, 0]
        state1 = np.array([self.player_pos[0],self.player_pos[1],self.obstacle1_pos[0],self.obstacle1_pos[1]])
        return state1

    def render(self,score,i,num_episodes,mode = "rgb_array"):
        self.screen.fill((255, 255, 255))
        self.screen.blit(self.player_image, (self.player_pos[0] * self.grid_size, self.player_pos[1] * self.grid_size))
        pygame.draw.rect(self.screen,(0,255,0),self.target_image)
        pygame.draw.rect(self.screen,(255,0,0),self.obstacle1_image)
        font = pygame.font.Font(None, 36)
        score_text = font.render(f'Score: {score}', True, (0, 0, 255))
        self.screen.blit(score_text, (600, 20))
        # Save the frame as a PNG image
        pygame.image.save(self.screen, 'frame.png')
    
        # Add the frame to the video
        self.frame = cv2.imread('frame.png')
        self.out.write(self.frame)
        
        pygame.display.update()
        self.clock.tick(self.fps)
        if i==num_episodes-1:
             self.out.release()


In [None]:
from stable_baselines3 import PPO

env = AgentEnv()   # Create the environment
model = PPO('MlpPolicy', env, verbose=1)  # Create the PPO agent
model.learn(total_timesteps=100000)    # Train the agent for 10000 timesteps
model.save('Documents\Trained Model_2D\Agent_Target_onlychange_Obstacle_100000_timesteps')
# model = PPO.load('Documents\Trained Model_2D\Agent_Target_onlychange_Obstacle_100000_timesteps.zip', env = env)

                  
obs = env.reset()
score_tot = 0
num_episodes = 10
for i in range(num_episodes):
    score = 0
    while True:
        action, _states = model.predict(obs)
        obs, rewards, done,info = env.step(action)       
        score += rewards
        env.render(score,i,num_episodes)
        if done == True:
            break
    print('Score: ' + str(score))
    obs = env.reset()
    score_tot += score
print('Avg Score = ' + str(score_tot/num_episodes))
env.close()

## Randomized Obstacle position

In [3]:
import gym
import pygame
import numpy as np
import cv2
class AgentEnv(gym.Env):
    def __init__(self):        
        pygame.init()
        pygame.font.init()
        self.width = 800
        self.height = 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        self.clock = pygame.time.Clock()
        self.fps = 15   # frame per second
        self.grid_size = 40
        
        # Create a VideoWriter object to save the video
        self.fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        self.out = cv2.VideoWriter('randomized_obstacle_and_fixed_target.mp4', self.fourcc, self.fps, (self.width, self.height))
        
        self.grid_width = self.width//self.grid_size ## no. of grids in row
        self.grid_height = self.height//self.grid_size  ## no. of grids in column
        
        self.action_space = gym.spaces.Discrete(4)
        ## self.observation_space = gym.spaces.Tuple((gym.spaces.Discrete(self.grid_width),gym.spaces.Discrete(self.grid_height)))
        self.observation_space = gym.spaces.Box(np.array([0,0,0,0]),np.array([self.grid_width-1,self.grid_height-1,self.grid_width-1,self.grid_height-1]))
        
        self.agent_image = pygame.image.load('Documents\TANK.png')
        self.agent_image = pygame.transform.scale(self.agent_image,(self.grid_size ,self.grid_size))
        self.player_image = pygame.transform.rotate(self.agent_image, 180)
        self.player_pos = [0, 0]
        self.target_pos = [self.grid_width - 7, self.grid_height - 1]
        self.target_image = pygame.Rect(self.target_pos[0]*self.grid_size,self.target_pos[1]*self.grid_size,self.grid_size,self.grid_size)
        
        self.obstacle1_pos = [self.grid_width - 7, self.grid_height - 9]
        self.obstacle1_image = pygame.Rect(self.obstacle1_pos[0]*self.grid_size,self.obstacle1_pos[1]*self.grid_size,self.grid_size,self.grid_size)

    def step(self,action):
        
        if action == 0:  # up
            self.player_pos[1] = max(0, self.player_pos[1] - 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        elif action == 1:  # down
            self.player_pos[1] = min(self.grid_height - 1, self.player_pos[1] + 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        elif action == 2:  # left
            self.player_pos[0] = max(0, self.player_pos[0] - 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        elif action == 3:  # right
            self.player_pos[0] = min(self.grid_width - 1, self.player_pos[0] + 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        
        if self.player_pos == self.target_pos:
            reward += 2000
            done = True
        elif self.player_pos == self.obstacle1_pos:
            reward -= 1000
            done = True

    
        else:
            reward -= 1
            done = False
        state1 = np.array([self.player_pos[0],self.player_pos[1],self.obstacle1_pos[0],self.obstacle1_pos[1]])
        return state1, reward, done, {}
    
    def reset(self):
        self.player_pos = [0, 0]
        self.obstacle1_pos = [np.random.randint(low = 3,high=16,size=None),np.random.randint(low = 3,high=12,size=None)]
        self.obstacle1_image = pygame.Rect(self.obstacle1_pos[0]*self.grid_size,self.obstacle1_pos[1]*self.grid_size,self.grid_size,self.grid_size)
        state1 = np.array([self.player_pos[0],self.player_pos[1],self.obstacle1_pos[0],self.obstacle1_pos[1]])
        return state1

    def render(self,score,i,num_episodes,mode = "rgb_array"):
        self.screen.fill((255, 255, 255))
        self.screen.blit(self.player_image, (self.player_pos[0] * self.grid_size, self.player_pos[1] * self.grid_size))
        pygame.draw.rect(self.screen,(0,255,0),self.target_image)
        pygame.draw.rect(self.screen,(255,0,0),self.obstacle1_image)
        font = pygame.font.Font(None, 36)
        score_text = font.render(f'Score: {score}', True, (0, 0, 255))
        self.screen.blit(score_text, (600, 20))
        # Save the frame as a PNG image
        pygame.image.save(self.screen, 'frame.png')
    
        # Add the frame to the video
        self.frame = cv2.imread('frame.png')
        self.out.write(self.frame)
        
        pygame.display.update()
        self.clock.tick(self.fps)
        if i==num_episodes-1:
             self.out.release()


In [4]:
from stable_baselines3 import PPO

env = AgentEnv()   # Create the environment
# model = PPO('MlpPolicy', env, verbose=1)  # Create the PPO agent
# model.learn(total_timesteps=100000)    # Train the agent for 10000 timesteps
# model.save('Documents\Trained Model_2D\Agent_Target_change_Obstacle_100000_timesteps')
model = PPO.load('Documents\Trained Model_2D\Agent_Target_change_Obstacle_100000_timesteps.zip', env = env)

                  
obs = env.reset()
score_tot = 0
num_episodes = 10
for i in range(num_episodes):
    score = 0
    while True:
        action, _states = model.predict(obs)
        obs, rewards, done,info = env.step(action)       
        score += rewards
        env.render(score,i,num_episodes)
        if done == True:
            break
    print('Score: ' + str(score))
    obs = env.reset()
    score_tot += score
print('Avg Score = ' + str(score_tot/num_episodes))
env.close()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Score: 1680.266396766882
Score: 1659.2951965536179
Score: 1724.4217422082152
Score: 1705.854547561415
Score: 1712.7633934418836
Score: 1720.3203497977318
Score: 1677.5609863750146
Score: 1720.5029737894251
Score: 1674.238072876586
Score: 1704.6679544723759
Avg Score = 1697.989161384315


## Randomized both Obstacle and Target Position

In [1]:
import gym
import pygame
import numpy as np
import cv2
class AgentEnv(gym.Env):
    def __init__(self):        
        pygame.init()
        pygame.font.init()
        self.width = 800
        self.height = 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        self.clock = pygame.time.Clock()
        self.fps = 15   # frame per second
        self.grid_size = 40
         # Create a VideoWriter object to save the video
        self.fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        self.out = cv2.VideoWriter('randomized_obstacle_and_target.mp4', self.fourcc, self.fps, (self.width, self.height))
        
        self.grid_width = self.width//self.grid_size ## no. of grids in row
        self.grid_height = self.height//self.grid_size  ## no. of grids in column
        
        self.action_space = gym.spaces.Discrete(4)
        ## self.observation_space = gym.spaces.Tuple((gym.spaces.Discrete(self.grid_width),gym.spaces.Discrete(self.grid_height)))
        self.observation_space = gym.spaces.Box(np.array([0,0,0,0]),np.array([self.grid_width-1,self.grid_height-1,self.grid_width-1,self.grid_height-1]))
        
        self.agent_image = pygame.image.load('Documents\TANK.png')
        self.agent_image = pygame.transform.scale(self.agent_image,(self.grid_size ,self.grid_size))
        self.player_image = pygame.transform.rotate(self.agent_image, 180)
        self.player_pos = [0, 0]
        self.target_pos = [self.grid_width - 7, self.grid_height - 1]
        self.target_image = pygame.Rect(self.target_pos[0]*self.grid_size,self.target_pos[1]*self.grid_size,self.grid_size,self.grid_size)
        
        self.obstacle1_pos = [self.grid_width - 7, self.grid_height - 9]
        self.obstacle1_image = pygame.Rect(self.obstacle1_pos[0]*self.grid_size,self.obstacle1_pos[1]*self.grid_size,self.grid_size,self.grid_size)

    def step(self,action):
        reward = 0
        if action == 0:  # up
            self.player_pos[1] = max(0, self.player_pos[1] - 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        elif action == 1:  # down
            self.player_pos[1] = min(self.grid_height - 1, self.player_pos[1] + 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        elif action == 2:  # left
            self.player_pos[0] = max(0, self.player_pos[0] - 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        elif action == 3:  # right
            self.player_pos[0] = min(self.grid_width - 1, self.player_pos[0] + 1)
            reward = -np.sqrt((self.player_pos[0]-self.target_pos[0])**2 + (self.player_pos[1]-self.target_pos[1])**2)
        
        if self.player_pos == self.target_pos:
            reward += 2000
            done = True
        elif self.player_pos == self.obstacle1_pos:
            reward -= 1000
            done = True

    
        else:
            reward -= 1
            done = False
        state1 = np.array([self.player_pos[0],self.player_pos[1],self.obstacle1_pos[0],self.obstacle1_pos[1]])
        return state1, reward, done, {}
    
    def reset(self):
        self.player_pos = [0, 0]
        self.obstacle1_pos = [np.random.randint(low = 3,high=19,size=None),np.random.randint(low = 3,high=14,size=None)]
        self.obstacle1_image = pygame.Rect(self.obstacle1_pos[0]*self.grid_size,self.obstacle1_pos[1]*self.grid_size,self.grid_size,self.grid_size)
        self.target_pos = [np.random.randint(low = 3,high=19,size=None),np.random.randint(low = 13,high=14,size=None)]
        self.target_image = pygame.Rect(self.target_pos[0]*self.grid_size,self.target_pos[1]*self.grid_size,self.grid_size,self.grid_size)
        state1 = np.array([self.player_pos[0],self.player_pos[1],self.obstacle1_pos[0],self.obstacle1_pos[1]])
        return state1

    def render(self,score,i,num_episodes,mode = "rgb_array"):
        self.screen.fill((255, 255, 255))
        self.screen.blit(self.player_image, (self.player_pos[0] * self.grid_size, self.player_pos[1] * self.grid_size))
        pygame.draw.rect(self.screen,(0,255,0),self.target_image)
        pygame.draw.rect(self.screen,(255,0,0),self.obstacle1_image)
        font = pygame.font.Font(None, 36)
        score_text = font.render(f'Score: {score}', True, (0, 0, 255))
        self.screen.blit(score_text, (600, 20))
         # Save the frame as a PNG image
        pygame.image.save(self.screen, 'frame.png')
    
        # Add the frame to the video
        self.frame = cv2.imread('frame.png')
        self.out.write(self.frame)
        
        pygame.display.update()
        self.clock.tick(self.fps)
        if i==num_episodes-1:
             self.out.release()


pygame 2.1.0 (SDL 2.0.16, Python 3.9.13)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
from stable_baselines3 import PPO

env = AgentEnv()   # Create the environment
# model = PPO('MlpPolicy', env, verbose=1)  # Create the PPO agent
# model.learn(total_timesteps=100000)    # Train the agent for 10000 timesteps
# model.save('Documents\Trained Model_2D\Agent_rand_Target_rand_Obstacle_100000_timesteps')
model = PPO.load('Documents\Trained Model_2D\Agent_rand_Target_rand_Obstacle_100000_timesteps.zip', env = env)

                  
obs = env.reset()
score_tot = 0
num_episodes = 3
for i in range(num_episodes):
    score = 0
    while True:
        action, _states = model.predict(obs)
        obs, rewards, done,info = env.step(action)       
        score += rewards
        env.render(score,i,num_episodes)
        if done == True:
            break
    print('Score: ' + str(score))
    obs = env.reset()
    score_tot += score
print('Avg Score = ' + str(score_tot/num_episodes))
env.close()

  logger.warn(


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Score: 1147.6649607755871
Score: -259.92833832087854
Score: -3867.385031306523
Avg Score = -993.2161362839382
