<center>
    <h1> Arcanoid </h1>
</center> 

In [1]:
import turtle
import numpy as np
from colorama import Fore
from IPython.display import clear_output

## <center> Physics Core </center>
Only physics primitives, without RL-stuff and visualisation

In [2]:
class World:
    def __init__(self, window_width=28, window_height=28, cells_len=4):
        self.cells_len = cells_len
        self.window_width = window_width
        self.window_height = window_height
    
    @staticmethod
    def any_step_reward():
        return -0.1
    
    @staticmethod
    def hit_reward():
        return 1
    
    @staticmethod
    def lose_reward():
        return -10

In [3]:
class Paddle(World):
    def __init__(self, **kwargs):
        super().__init__()
        
        self.width = kwargs["width"] if "width" in kwargs else self.cells_len
        self.height = kwargs["height"] if "height" in kwargs else 1
        self.speed = self.cells_len
        self.x = self.window_width // self.cells_len
    
    def shift_left(self):
        if self.x >= 0 + self.speed:
            self.x = self.x - self.speed
            
    def shift_right(self):
        if self.x <= self.window_width - self.speed:
            self.x = self.x + self.speed
            
    def get_grid_coordinates(self):
        return self.x // self.cells_len

In [4]:
class Ball(World):
    def __init__(self, **kwargs):
        super().__init__()
        
        self.radius = kwargs["radius"] if "radius" in kwargs else self.cells_len//2
        self.dx = kwargs["speed"] if "speed" in kwargs else self.cells_len
        self.dy = -kwargs["speed"] if "speed" in kwargs else -self.cells_len
        
        self.x = self.window_width // 2
        self.y = self.window_height // 2
    
    def wall_checks(self):
        # If ball touch the right wall
        if self.x >= self.window_width:
            self.x = self.window_width
            # Reverse the x-axis velocity
            self.dx *= -1

        # If ball touch the left wall
        elif self.x <= 0:
            self.x = 0
            self.dx *= -1

        # If ball touch the upper wall
        if self.y >= self.window_height:
            self.y = self.window_height
            self.dy *= -1
            
    def get_grid_coordinates(self):
        grid_x = self.x // self.cells_len
        grid_y = self.y // self.cells_len
        
        return (grid_x, grid_y)

## <center> GUI </center>

In [5]:
class GUI_Paddle(turtle.Turtle):
    def __init__(self, paddle_start_position, paddle_width, paddle_height, paddle_speed, paddle_color="blue", ):
        super().__init__()

        self.shape('square')
        self.speed(paddle_speed)      
        self.color(paddle_color)
        self.shapesize(stretch_wid=paddle_width, stretch_len=paddle_height)
        self.goto(*paddle_start_position)
        self.penup()
        
    def new_position(self, x):
        self.setx(x)

In [6]:
class GUI_Ball(turtle.Turtle):
    def __init__(self, ball_start_position, ball_dx, ball_dy, speed):
        super().__init__()
        
        self.speed(speed)
        self.shape('circle')
        self.color('orange')
        self.dx = ball_dx
        self.dy = ball_dy
        self.goto(*ball_start_position)
        self.penup()
        
    def new_position(self, x, y, dx, dy):
        self.setx(x)
        self.sety(y)
        self.dx = dx
        self.dy = dy

In [7]:
# class GUI_Score(turtle.Turtle):
#     def __init__(self):
#         super().__init__()
        
#         self.hit = 0
#         self.miss = 0
        
#         self.speed(0)
#         self.color('white')      # Set the color to white
# #         self.hideturtle()        # Hide the shape of the object
#         self.goto(0, 250)        # Set scorecard to upper middle of the screen
#         self.penup()
#         self.write("Hit: {}   Missed: {}".format(self.hit, self.miss), align='center', font=('Courier', 24, 'normal'))

## <center> Environment API </center>

In [8]:
class Environment:
    
    def __init__(self, GUI=False):
        self.GUI = GUI
        self.ball = Ball()
        self.paddle = Paddle(height=2)
        self.hit = 0
        self.lose = 0
        
        if GUI is True:
            self.turn_on_GUI()
        
        
    def turn_on_GUI(self):

        self.win = turtle.Screen()
        self.win.title('RL Environment')
        self.win.bgcolor('black')
        self.win.tracer(0)
        self.win.setup(width=self.ball.window_width, height=self.ball.window_height)

        self.GUI_paddle = GUI_Paddle(
            paddle_start_position=(self.paddle.x, self.paddle.window_height),
            paddle_width=self.paddle.width,
            paddle_height=self.paddle.height,
            paddle_speed=self.paddle.speed
        )

        self.GUI_ball = GUI_Ball(
            ball_start_position=(self.ball.x, self.ball.y),
            ball_dx=self.ball.dx,
            ball_dy=self.ball.dy,
            speed=self.ball.dx
        )
        
        # self.GUI_score  = GUI_Score()
        
    
    def turn_off_GUI(self):
        pass  # TODO
    
    
    def reset(self):
        """ Reset a world к его первозданному состоянию
        """
        self.ball.x = self.ball.window_width // 2
        self.ball.y = self.ball.window_height // 2
        self.paddle.x = self.ball.window_width // 2
    
    def step(self, action: int):
        """ Move the ball
        
        if 0 then -1 then move paddle on -1 * speed
        if 1 then  0 then move paddle on 0 * speed
        if 2 then  1 then move paddle on 1 * speed
        """
        action = {0: -1, 1: 0, 2: 1}[action]
        if action == -1:
            self.paddle.shift_left()
        elif action == 1:
            self.paddle.shift_right()
        else:
            pass
        
        
        self.ball.x += self.ball.dx
        self.ball.y += self.ball.dy
        self.ball.wall_checks()
#         self.paddle.x += action * self.paddle.speed
        
            
        done = False
        
        # check if the ball hit the paddle
        if self.paddle.x - self.paddle.width/2 <= self.ball.x <= self.paddle.x + self.paddle.width/2 and \
            self.ball.y <= self.paddle.height:
            self.ball.dy *= -1
            reward = self.paddle.hit_reward()
            self.hit += 1

        # check if the ball fall into the lava
        elif self.ball.y <= self.paddle.height and \
            not (self.paddle.x - self.paddle.width/2 <= self.ball.x <= self.paddle.x + self.paddle.width/2):
            reward = self.paddle.lose_reward()
            done = True
            self.lose += 1
            
        # if not two last conditions, but the agent was step
        else:
            reward = self.paddle.any_step_reward()
            
        self.render()
        
        next_state = {
            "ball_dx": self.ball.dx,
            "ball_dy": self.ball.dy,
            "ball_grid": self.ball.get_grid_coordinates(),
            "paddle_x": self.paddle.get_grid_coordinates()
        }
        
        info = {
            "ball_x": self.ball.x,
            "ball_y": self.ball.y,
            "paddle_x": self.paddle.x,
        }
        
        return next_state, reward, done, info
    
    def render(self):
        if self.GUI is True:
            self.GUI_ball.new_position(self.ball.x, self.ball.y, self.ball.dx, self.ball.dy)
            self.GUI_paddle.new_position(self.paddle.x)
            self.win.update()

In [9]:
def np_render(env):
    scale = env.ball.cells_len
    field = [
        [" " for i in range(env.ball.window_height // scale + 1)]
        for i in range(env.ball.window_height // scale + 1)]
    
    y = env.ball.window_height // scale - env.ball.y // scale 
    x = env.ball.x // scale

    field[y][x] = Fore.LIGHTBLUE_EX + "x" + Fore.RESET
    
    x1 = (env.paddle.x + env.paddle.height) // scale
    y1 = env.paddle.window_height // scale


    field[y1][x1] = Fore.CYAN+"~"+Fore.RESET
    if x == x1 and y == y1:
        field[y1][x1] = Fore.CYAN+"+"+Fore.RESET
        
    for _ in field:
        print(Fore.LIGHTMAGENTA_EX + "~ " + Fore.RESET, end=" ")
    print("\n")
    for line in field:
        print(Fore.LIGHTMAGENTA_EX + "~" + Fore.RESET, end=" ")
        for i in line:
            print(i, end = " ")
        print(Fore.LIGHTMAGENTA_EX + "~" + Fore.RESET, end=" ")
        print("\n")
    print(
        "Hit: {}{}{}  /  Lose: {}{}{}\n\
Ball: {}\nPaddle: {}".format(Fore.GREEN, env.hit, Fore.RESET, Fore.RED, env.lose, Fore.RESET,
                         [env.ball.get_grid_coordinates()[0], env.ball.get_grid_coordinates()[1]],
                        env.paddle.get_grid_coordinates(),
                        )
    )
    


In [12]:
env = Environment(GUI=False)

In [13]:
import time
env.reset()

action = 2

done = False

while not done:
    np_render(env)
    time.sleep(0.5)
    next_state, reward, done, info = env.step(action)
#     print(env.ball.y // scale, env.ball.x // scale)
    time.sleep(0.5)
    clear_output(wait=True)

[95m~ [39m [95m~ [39m [95m~ [39m [95m~ [39m [95m~ [39m [95m~ [39m [95m~ [39m [95m~ [39m 

[95m~[39m                 [95m~[39m 

[95m~[39m                 [95m~[39m 

[95m~[39m                 [95m~[39m 

[95m~[39m                 [95m~[39m 

[95m~[39m           [94mx[39m     [95m~[39m 

[95m~[39m                 [95m~[39m 

[95m~[39m                 [95m~[39m 

[95m~[39m               [36m~[39m [95m~[39m 

Hit: [32m2[39m  /  Lose: [31m0[39m
Ball: [5, 3]
Paddle: 6


KeyboardInterrupt: 

 <i> TODO:  падл почему-то выезжает за границы, херовая визуализация - поправить; костыли с выезжанием за границы массива возможно из-за нормировки фиговой </i>

## <center> RL Core </center>