In [1]:
# set up code for pygame and reinforcement learning model
import numpy as np 
import gym
from gym import Env, spaces
import time
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import pygame
import random
from pygame.locals import RLEACCEL


pygame.init()

myfont = pygame.font.SysFont("Comic Sans MS", 30)

pygame.mixer.music.load("BackgroundMusic.mp3")

LaserSound = pygame.mixer.Sound("LaserSound.mp3")
ExplosionSound = pygame.mixer.Sound("ExplosionSound.mp3")
IceCreamSound = pygame.mixer.Sound("IceCreamSound.mp3")

SCREEN_WIDTH = 500
SCREEN_HEIGHT = 500

clock = pygame.time.Clock()

all_sprites = pygame.sprite.Group()
enemies_group = pygame.sprite.Group()
friends_group = pygame.sprite.Group()
laser_group = pygame.sprite.Group()
explosion_group = pygame.sprite.Group()


pygame 2.0.1 (SDL 2.0.14, Python 3.9.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# sets up environment class
class SpaceLaserEnv(Env):
    def __init__(self):
        super(SpaceLaserEnv, self).__init__()
        # sets up attributes
        self.rendering = False
        self.counter = 0 # incremented every frame
        self.gameScore = 0 # used to increase num enemies with score
        self.numEnemies = 1 # not used anymore
        self.numLasers = 5 # number fo lasers
        self.distance_to_closest_obstacle_x = SCREEN_WIDTH # starting value, distance to player of enemy (from middle to middle)
        self.distance_to_closest_obstacle_y = 3000 # starting value
        self.closest_obstacle_stats = np.array([3000,250,self.distance_to_closest_obstacle_x,self.numLasers,0]) # starting obs space
        self.observation_space = spaces.Box(low=0, high=SCREEN_WIDTH, shape=(5,)) # makes observation space
        self.action_space = spaces.Discrete(4) # makes action space
        self.screen = pygame.display.set_mode([SCREEN_WIDTH, SCREEN_HEIGHT])
        self.gameSpeed = -5 # dont think this does anything
        self.player = Player() # creates player instance (Player class is in cell below)
        all_sprites.add(self.player) # add player to all_sprites!!
        self.numShot = 0 # used so that player doesn't shoot all its lasers on each enemy
        self.prevDistance = 3000 # used so that player doesn't shoot all its lasers on each enemy
        
        # note: can add functions to env class as long as have basic functions (step, reset, render)
    def createEnemy(self): # function to create obstacle (at certain pos)
        x = random.randint(0, 10)
        y = random.randint(0, SCREEN_HEIGHT)
        enemy = Enemy(x,y)
        all_sprites.add(enemy)
        enemies_group.add(enemy) # add obstacle to all_sprites!!
        
    def createLaser(self): # function to create laser 
        laser = Laser(self.player.rect.center)
        self.numLasers -= 1
        all_sprites.add(laser)
        laser_group.add(laser) # add laser to all_sprites!!     
    
    def step(self, action): # step gets called every frame ... like while loop in VSC games
        # so, all that stuff from while loop should be moved here (with some changes/additions)
        # instead of keyboard input, we need to base input off action parameter
        if self.rendering == True: # if env is being rendered, caps frame rate at 30 (but when training, frame rate will be as high as possoble to speed it up)
            clock.tick(60)
        
        reward = 1 # get reward for staying alive
        done = False
        score2 = 0 # used to track how many enemies that computer kills
        self.counter += 1 # increases every frame
        
        if self.counter % 30 == 0: # every 30 frames, makes enemy
            self.createEnemy() 
            if self.gameScore >= 5 : # more enemies made as score increases
                self.createEnemy() 
            if self.gameScore >= 10 :
                self.createEnemy() 
            if self.gameScore >= 15 :
                self.createEnemy() 
            if self.gameScore >= 20 :
                self.createEnemy() 
            if self.gameScore >= 25 :
                self.createEnemy() 
        
        if self.counter % 180 == 0: # adds laser every 180 franes
            self.numLasers += 1 
        
        nearestEnemyYPos = 3000 # startig value, Y pos of nearest enemy (in the y dir)
        self.distance_to_closest_obstacle_y = abs(nearestEnemyYPos - self.player.rect.centery) # startig value
        
        for entity in all_sprites: # loops thru all sprites
            if isinstance(entity, Player):
                self.player.update(action) # if entity is player, calls update with inputted action
            elif isinstance(entity, Enemy):
                entity.update() # calls update on enemies
                # this finds nearestEnemyY and self.distance_to_closest_obstacle_x
                if abs(entity.rect.centery - self.player.rect.centery) < self.distance_to_closest_obstacle_y and entity.rect.centery < 45:
                    nearestEnemyYPos = entity.rect.centery
                    self.distance_to_closest_obstacle_y = abs(entity.rect.centery - self.player.rect.centery)
                    self.distance_to_closest_obstacle_x = self.player.rect.centerx - entity.rect.centerx
                if entity.rect.colliderect(self.player.rect):
                    done = True # ends game
                elif entity.rect.centerx > SCREEN_WIDTH : # deletes entity if its off the screen
                    entity.kill()
            elif isinstance(entity, Laser):
                entity.update() # updates lasers
                for enemy in enemies_group:
                    if entity.rect.colliderect(enemy.rect): # if laser hits enemy 
                        enemy.kill()
                        score2 = 1
                        self.gameScore += 1
        
        if self.prevDistance != nearestEnemyYPos : # resets numShot if there is a new enemy that the computer sees
            self.numShot = 0
        self.prevDistance = nearestEnemyYPos
        
        if action == 3 and self.numLasers > 0 and self.numShot == 0: # creates laser if action = 3 
            self.createLaser()
            if abs(nearestEnemyYPos - self.player.rect.centery) < 15 and self.numShot == 0:
                reward += 50 # big reward if shoot laser in pos where it will hit enemy
            else :
                reward -= 1 # small neg reward if shoots laser where will not hit an enemy (that it sees)
            self.numShot += 1 # back to this
        
        if action == 3 and self.numLasers == 0 :
            reward -= 1 # small neg reward if tries to shoot when has no lasers
           
        if abs(nearestEnemyYPos - self.player.rect.centery) <= 28 and self.distance_to_closest_obstacle_x < 150 and self.distance_to_closest_obstacle_x > 85:
            if action == 0 or action == 1 :
                reward += 5 # reward if it moves on same y level as enemy when enemy is far enough away
            elif action == 2 :
                reward -= 20 # neg reward for not moving while in enemies path
                
        if abs(nearestEnemyYPos - self.player.rect.centery) <= 28 and self.distance_to_closest_obstacle_x <= 85 : # and self.numLasers == 0: (not yet)
            reward -= 4 # neg reward for enemy getting too close
        
        # return stats for nearest y enemy, player, num lasers, and num shot
        self.closest_obstacle_stats = np.array([nearestEnemyYPos,self.player.rect.centery,self.distance_to_closest_obstacle_x,self.numLasers,self.numShot]) # puts stats into closest_obstacle_stats

        info = {} # nothing in info

        # NOTE - CAN ONLY RETURN SCORE2 IF NOT TRAINING THE DATA
        return self.closest_obstacle_stats, reward, done, info, score2 # closest_obstacle_stats is defined as state; this along with reward, done, and info are passed back

    def reset(self):
        # resets everything
        all_sprites.empty() # empties sprites
        enemies_group.empty()
        friends_group.empty()
        laser_group.empty()
        explosion_group.empty()
        self.player = Player() # reinstantiates player
        all_sprites.add(self.player)
        self.canvas = pygame.surfarray.array3d(self.screen)
        self.distance_to_closest_obstacle_x = SCREEN_WIDTH
        self.distance_to_closest_obstacle_y = 3000
        self.numLasers = 5
        self.gameScore = 0
        self.numEnemies = 1
        self.numShot = 0
        self.prevDistance = 3000
        self.counter = 0
        
        self.closest_obstacle_stats = np.array([3000,250,self.distance_to_closest_obstacle_x,self.numLasers,self.numShot])

        return self.closest_obstacle_stats # resets and returns observation space
    
    def render(self, mode="human"): # draws it; make sure to include mode="human"
        self.rendering = True
        self.screen.fill((0, 0, 0))
        for x in range(255) :
            pygame.draw.circle(self.screen, (0, 0, x), (SCREEN_WIDTH/2,SCREEN_HEIGHT/2), 255 - x)
    
        for sprite in all_sprites: # blits sprites
            self.screen.blit(sprite.surf, sprite.rect)
            
        pygame.display.flip() # include this (just like in VSC games)



In [3]:
# all of the object classes (player, enemy, lasers)
class Player(pygame.sprite.Sprite):
    def __init__(self):
        super(Player, self).__init__()
        self.surf = pygame.image.load("PlayerShip.png").convert()
        self.surf = pygame.transform.scale(self.surf, (40,26))
        self.surf.set_colorkey((0, 0, 0), RLEACCEL)
        self.rect = self.surf.get_rect()
        self.rect.center = (460, 250)
    def getPos(self) :
        return self.rect.center
    def update(self,action) :
        if action == 0 and self.rect.centery > 10:
            self.rect.move_ip(0, -10)
        elif action == 1 and self.rect.centery < SCREEN_HEIGHT - 10:
            self.rect.move_ip(0, 10)
        elif action == 2 :
            self.rect.move_ip(0, 0)

class Enemy(pygame.sprite.Sprite):
    def __init__(self, xPos, yPos):
        super(Enemy, self).__init__()
        self.speed = random.randint(4, 6)
        self.surf = pygame.image.load("EnemyShip.png").convert()
        self.surf = pygame.transform.scale(self.surf, (30,30))
        self.surf.set_colorkey((0,0,0), RLEACCEL)
        self.rect = self.surf.get_rect()
        self.rect.center = (xPos,yPos)
        
    def update(self):
        self.rect.move_ip(self.speed, 0)
        
class Laser(pygame.sprite.Sprite):
    def __init__(self, pos):
        super(Laser, self).__init__()
        self.speed = 4
        self.surf = pygame.image.load("BulletPic3.jpg").convert()
        self.surf = pygame.transform.scale(self.surf, (20,10))
        self.surf.set_colorkey((255,255,255), RLEACCEL)
        self.rect = self.surf.get_rect()
        self.rect.center = (pos)
        
    def update(self):
        self.rect.move_ip(-self.speed, 0)


In [4]:
env = SpaceLaserEnv() # initilizes new instance of the evironment 

tot = 0

#makes computer take random actions in environment (runs thru 5 episodes)
episodes = 5
for episode in range(1, episodes + 1):
    # resets environment
    state = env.reset() # state is Box(4) that contains pos, vel, pole angle and angular vel

    done = False 
    score = 0
    killedScore = 0
    while not done: # done is set true when game is over
        #displays game
        #env.render() 
        #performs random actions
        action = env.action_space.sample()  # either 0,1,2 (stay, move, shoot)
        n_state, reward, done, info, score2 = env.step(action)  # does all of the calculations, returns new state 
        
        score += reward 
        killedScore += score2
    tot += killedScore
        
    print('Episode:{} Score:{} Killed:{}'.format(episode, score, killedScore))
print(tot/episodes)

Episode:1 Score:527 Killed:3
Episode:2 Score:279 Killed:0
Episode:3 Score:189 Killed:0
Episode:4 Score:514 Killed:2
Episode:5 Score:619 Killed:1
1.2


In [5]:
"""
# to run the program with the trained model (have computer play after learning) ...
# ... COMMENT OUT THIS WHOLE SECTION AND THEN RUN ALL CELLS
#TRAINING THE DATA

env4 = SpaceLaserEnv() # make environment
env4 = DummyVecEnv([lambda: env4]) # helps speed up learning
model = PPO("MlpPolicy", env4, verbose=1)  # use algorithm and policy to create model

model.learn(total_timesteps=1000000) # will train the model for 2,000,000 time steps

model.save('SpaceLaserGameModel7.6')

# 7.6 is final model!!
"""

'\n# to run the program with the trained model (have computer play after learning) ...\n# ... COMMENT OUT THIS WHOLE SECTION AND THEN RUN ALL CELLS\n#TRAINING THE DATA\n\nenv4 = SpaceLaserEnv() # make environment\nenv4 = DummyVecEnv([lambda: env4]) # helps speed up learning\nmodel = PPO("MlpPolicy", env4, verbose=1)  # use algorithm and policy to create model\n\nmodel.learn(total_timesteps=1000000) # will train the model for 2,000,000 time steps\n\nmodel.save(\'SpaceLaserGameModel7.6\')\n\n# 7.6 is final model!!\n'

In [None]:

model = PPO.load("SpaceLaserGameModel7.6")

env3 = SpaceLaserEnv() # make environment

tot = 0

finalScores = []
highScore = 0

episodes = 5
for episode in range(1, episodes + 1):
    obs = env3.reset()     
    done = False
    score = 0
    killedScore = 0
    while not done: 
        env3.render()
        action, _ = model.predict(obs)  # use model predict and pass in observations in order to choose action (_ is just throwaway variable)
        obs, reward, done, info, score2 = env3.step(action)
        score += reward
        killedScore += score2
    tot += killedScore
    if killedScore > highScore :
        highScore = killedScore
    finalScores.append(killedScore)
    #print('Episode:{} Score:{} Killed:{}'.format(episode, score, killedScore))
print("Mean: " + str(tot/episodes))
print("Number of Episodes: " + str(episodes))

# draws plot
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
bin_edges = [x for x in range(0,highScore+1)]
plt.hist(finalScores, bins=bin_edges)
plt.show()