In [None]:
import pygame # import needed to run next few cells
# !pip install pygame

In [None]:
# import other required packages 
import numpy as np

import tensorflow as tf
from tensorflow import keras

from collections import deque

import random
from random import randint
from random import choice

import time

import matplotlib.pyplot as plt
from IPython import display

import warnings
warnings.filterwarnings('ignore')

In [None]:
# class to create paddle and update position

class Player(pygame.sprite.Sprite):
    # paddle class extends Sprite class in Pygame
    
    def __init__(self, color, width, height):
        # parent class Sprite constructor
        super().__init__()
        
        # set color, width and height of paddle
        self.image = pygame.Surface([width, height])
        self.image.fill((0,0,0))
        self.image.set_colorkey((0,0,0))
 
        # Draw the paddle 
        pygame.draw.rect(self.image, color, [0, 0, width, height])
        
        # Fetch the rectangle object
        self.rect = self.image.get_rect()
        
    def moveUp(self, pixels):
        self.rect.y -= pixels 
        # check that paddle does not go off the screen
        if self.rect.y < 0:
            self.rect.y = 0
          
    def moveDown(self, pixels):
        self.rect.y += pixels 
        # check that paddle does not go off the screen
        if self.rect.y > 420:
            self.rect.y = 420

In [None]:
# extend Player class and create new function to execute action 0, 1 or 2

class AI(Player):

    def action_update(self, action):
        self.action = action
        
        if self.action == 1:
            self.moveDown(7)
            
        if self.action == 2:
            self.moveUp(7)
            
        if self.action == 0:
            self.rect.y = self.rect.y

In [None]:
# class to create and update ball object

class Ball(pygame.sprite.Sprite):
    # ball class extends Sprite class in Pygame
    
    def __init__(self, color, width, height):
        # parent class Sprite constructor
        super().__init__()
        
        # set color, width and height of the ball
        self.image = pygame.Surface([width, height])
        self.image.fill((0,0,0))
        self.image.set_colorkey((0,0,0))
 
        # Draw the ball 
        pygame.draw.rect(self.image, color, [0, 0, width, height])
        
        # set velocity
        self.velocity = [0,0]
        self.velocity[0] = randint(7,8) * random.choice([-1,1])
        self.velocity[1] = random.choice([-3, 3])
        
        # Fetch the rectangle object that has the dimensions of the image.
        self.rect = self.image.get_rect()
        
        self.active = False
        self.score_time = 0
    
    def counter(self):
        current_time = pygame.time.get_ticks()
            
        if current_time - self.score_time >= 2000:
            self.active = True
    
    def update(self):
        if self.active:
            self.rect.x += self.velocity[0]
            self.rect.y += self.velocity[1]
            return True
        else:
            self.counter()
        
    def reset(self):
        self.active = False
        self.rect.x = 345
        self.rect.y = 195
        self.velocity[0] = randint(7,8) * random.choice([-1,1])
        self.velocity[1] = random.choice([-3, 3])
        self.score_time = pygame.time.get_ticks()
        
    def bounce(self, side):
        if(side == 'cpu'):
            self.velocity[0] = abs(self.velocity[0])
        else:
            self.velocity[0] = -abs(self.velocity[0])
        self.velocity[1] = randint(4,8) * random.choice([-1,1])

In [None]:
# main Pong Environment class
# partly built with the help of: https://www.101computing.net/pong-tutorial-using-pygame-getting-started/

class PongEnv():
    def __init__(self):
        
        self.black = (0,0,0)
        self.white = (255,255,255)
        self.red = (255, 0, 0)
        
        self.height = 500
        self.width = 700
        self.fps = 60 # set FPS to 80+ for faster training
    
        self.scorePlayer = 0
        self.scoreAI = 0
        self.reward = 0
        
        self.hit = False
        self.wait = False
        
        self.action_space = [0,1,2]
        self.observation_space = np.zeros(5) # for now only for obs count
        
        self.screen = None
        self.clock = pygame.time.Clock()
        self.state = None
        self.is_open = True
        
        self.paddlePlayer = Player(self.white, 10, 80)
        self.paddleAI = AI(self.white, 10, 80)
        self.ball = Ball(self.red,10,10)
        
        # list of all the sprites in the game
        self.all_sprites_list = pygame.sprite.Group()
        
        # add 2 paddles and the ball to the list of spirtes
        self.all_sprites_list.add(self.paddlePlayer)
        self.all_sprites_list.add(self.paddleAI)
        self.all_sprites_list.add(self.ball)
    
    def render(self):
        
        import pygame
        
        if self.screen is None:
            # set new window
            pygame.init()
            pygame.display.set_caption("Pong")
            self.screen = pygame.display.set_mode((self.width, self.height))
        
        # set screen to black
        self.screen.fill(self.black)
        
        # draw line for net 
        pygame.draw.line(self.screen, self.white, [349, 0], [349, 500], 5)
        pygame.draw.ellipse(self.screen, self.white, [self.height/2, self.width/5, 200, 200], 4)

        # draw all the sprites
        self.all_sprites_list.draw(self.screen)
        
        # display all scores
        font = pygame.font.Font(None, 25)
        self.screen.blit(font.render('Player: ' + str(self.scorePlayer), 1, self.white), (50,10))
        self.screen.blit(font.render('AI: ' + str(self.scoreAI), 1, self.white), (600,10))
        
        # update events and screen
        pygame.event.pump()
        pygame.display.flip()
        
        return self.is_open

    def step(self, action):
        
        # moving paddle with keys: up and down keys
        keys = pygame.key.get_pressed()
        
        if keys[pygame.K_UP]:
            self.paddlePlayer.moveUp(7)
        if keys[pygame.K_DOWN]:
            self.paddlePlayer.moveDown(7)   
            
        # update ball and paddle position and velocity
        self.wait = self.ball.update()
        self.paddlePlayer.update(self.ball)
            
        #Check if the ball is bouncing against any of the 4 walls:
        if self.ball.rect.x>=690:
            self.scorePlayer += 1
            self.ball.reset()
        if self.ball.rect.x<=0:
            self.scoreAI += 1
            self.ball.reset()
        if self.ball.rect.y>490:
            self.ball.velocity[1] = -self.ball.velocity[1]
        if self.ball.rect.y<0:
            self.ball.velocity[1] = -self.ball.velocity[1]

        #Detect collisions between the ball and the paddles
        if(pygame.sprite.collide_mask(self.ball, self.paddlePlayer)):
            self.ball.bounce('cpu')

        if(pygame.sprite.collide_mask(self.ball, self.paddleAI)):
            self.ball.bounce(None)
        
        # ----- implement action and work out reward -----
        
        # update ai paddle using random or predicted action
        self.paddleAI.action_update(action)
        
        # score of 3 wins game returns done=True
        done = bool(self.scorePlayer > 4 or self.scoreAI > 4)
        
        self.state = (self.ball.velocity[0], self.ball.velocity[1], self.ball.rect.x, self.ball.rect.y, self.paddleAI.rect.y)
        self.state = self.norm_state(self.state)
        
        # tick / frames per second
        self.clock.tick(self.fps)
        
        return [self.reward, self.state, done]
    
    # normalise state values for ann training
    def norm_state(self, arr):
        state = np.zeros(len(arr))
        state[0] = arr[0]/1 # velocity 0
        state[1] = arr[1]/1 # velocity 1
        state[2] = arr[2]/500 # ball x
        state[3] = arr[3]/500 # ball y
        state[4] = arr[4]/500 # paddle y
        return state
    
    # reset ball and paddle posiitons and reset scores
    def reset(self):
        self.paddlePlayer.rect.x = 20
        self.paddlePlayer.rect.y = 200

        self.paddleAI.rect.x = 670
        self.paddleAI.rect.y = 200
        
        self.ball.reset()
        
        self.state = (self.ball.velocity[0], self.ball.velocity[1], self.ball.rect.centerx, self.ball.rect.centery, self.paddleAI.rect.y)
        self.state = self.norm_state(self.state)
        
        self.scoreAI = 0
        self.scorePlayer = 0
        
        self.hit = False
        
        return self.state
    
    # close pong environment
    def close(self):
        pygame.display.quit()
        pygame.quit()

In [None]:
# play the game with trained model
# 2 seconds for each round - paddle positions resets to centre for each round

env = PongEnv()
observation_count = env.observation_space.shape[0]
action_count = len(env.action_space)

model_trained = keras.models.load_model('pong_model')

observation = env.reset() # reset the environment 
observation = np.reshape(observation, [1, observation_count])

done = False
while not done:
    env.render()
    action_values = model_trained(observation) #run observation through the ANN Q(s,a)
    action = np.argmax(action_values[0]) # get the best action
    reward, observation, done = env.step(action)  # execute action
    observation = np.reshape(observation, [1, observation_count])
env.close() 