## Game Source Code

In [1]:
import turtle
import time
import random
import torch
import numpy as np

GAME_SPEED = 0.5 # MS
ACTIONS = [1,2,3,4]

class Head:
    def __init__(self):
        self.head = turtle.Turtle();
        self.head.speed(0)
        self.head.shape("square")
        self.head.color("black")
        self.head.penup()
        self.head.goto(0,0)
        self.head.direction = "stop"
    
    def go_up(self):
        if self.head.direction != "down":
            self.head.direction = "up"

    def go_down(self):
        if self.head.direction != "up":
            self.head.direction = "down"

    def go_left(self):
        if self.head.direction != "right":
            self.head.direction = "left"

    def go_right(self):
        if self.head.direction != "left":
            self.head.direction = "right"

    def move(self):
        if self.head.direction == "up":
            y = self.head.ycor()
            self.head.sety(y + 20)

        if self.head.direction == "down":
            y = self.head.ycor()
            self.head.sety(y - 20)

        if self.head.direction == "left":
            x = self.head.xcor()
            self.head.setx(x - 20)

        if self.head.direction == "right":
            x = self.head.xcor()
            self.head.setx(x + 20)

class GameLauncher:
    def __init__(self):
        self.turtle = turtle
        self.boundary = 140  # Game boundary is ±140
        self.cell_size = 20  # Each block in the snake segment is 20 x 20
        
        # Calculate grid size from boundaries and cell size
        # (-140 to +140 = 280 units total, divided by 20 = 14 cells)
        self.grid_size = (self.boundary * 2) // self.cell_size  # Equals 14``
        
        self.setup_window()
        self.segments = []
        self.high_score = 0
        self.head = Head()
        self.setup_pen()
        self.setup_food()
        self.score = 0


        
    def check_wall_collision(self):
        head = self.head
        segments = self.segments
        if head.head.xcor()>140 or head.head.xcor()<-140 or head.head.ycor()>140 or head.head.ycor()<-140:
            time.sleep(1)
            head.head.goto(0,0)
            head.head.direction = "stop"

            # Hide the segments
            for segment in segments:
                segment.goto(1000, 1000)
    
            # Clear the segments list
            segments.clear()

            # Reset the score
            self.score = 0

            # Reset the delay
            self.delay = GAME_SPEED

            self.pen.clear()
            self.pen.write("Score: {}  High Score: {}".format(self.score, self.high_score), align="center", font=("Courier", 24, "normal")) 
            return 1
        return 0

    def check_food_collision(self):
        if self.head.head.distance(self.food) < 20:
            # Move the self.food to a random spot
            x = random.randint(-140, 140)
            y = random.randint(-140, 140)
            self.food.goto(x,y)

            # Add a segment
            new_segment = turtle.Turtle()
            new_segment.speed(0)
            new_segment.shape("square")
            new_segment.color("grey")
            new_segment.penup()
            self. segments.append(new_segment)

            # Shorten the delay
            self.delay -= 0.001

            # Increase the score
            self.score += 10

            if self.score > self.high_score:
                self.high_score = self.score
            
            self.pen.clear()
            self.pen.write("Score: {}  High Score: {}".format(self.score, self.high_score), align="center", font=("Courier", 24, "normal")) 

    def state_to_array(self):
        def turtle_to_grid(x, y):
            # Convert from (-140, 140) range to (0, 13) range
            grid_x = int((x + self.boundary) // self.cell_size)
            grid_y = int((y + self.boundary) // self.cell_size)
            # Ensure coordinates are within grid bounds
            grid_x = max(0, min(grid_x, self.grid_size - 1))
            grid_y = max(0, min(grid_y, self.grid_size - 1))
            return grid_x, grid_y

        # Create empty grid
        grid = np.zeros((self.grid_size, self.grid_size, 3))

        # Set head
        head_x, head_y = turtle_to_grid(self.head.head.xcor(), self.head.head.ycor())
        grid[head_y, head_x, 0] = 1
        print(f"Setting head at [{head_y}, {head_x}], value: {grid[head_y, head_x, 0]}")

        # Set body segments
        for segment in self.segments:
            seg_x, seg_y = turtle_to_grid(segment.xcor(), segment.ycor())
            grid[seg_y, seg_x, 1] = 1
            print(f"Setting body segment at [{seg_y}, {seg_x}], value: {grid[seg_y, seg_x, 1]}")

        # Set food
        food_x, food_y = turtle_to_grid(self.food.xcor(), self.food.ycor())
        grid[food_y, food_x, 2] = 1  # Make sure this line is actually setting the value
        print(f"Setting food at [{food_y}, {food_x}], value: {grid[food_y, food_x, 2]}")


        return torch.from_numpy(grid.flatten()).float()

        


    def update_snake_body(self):
        # Move the end segments first in reverse order
        for index in range(len(self.segments)-1, 0, -1):
            x = self.segments[index-1].xcor()
            y = self.segments[index-1].ycor()
            self.segments[index].goto(x, y)

        # Move segment 0 to where the head is
        if len(self.segments) > 0:
            x = self.head.head.xcor()
            y = self.head.head.ycor()
            self.segments[0].goto(x,y)


    def get_random_action(self):
        # This is a placeholder. In a real RL setup, you'd get the action from your model
        return random.choice(ACTIONS)
    
    def get_reward(self, old_score, new_score, collision):
        if collision:
            return -1
        elif new_score > old_score:
            return 1
        else:
            return 0

    def step(self,action):
        old_score = self.score
        head = self.head
        self.delay = GAME_SPEED

        # Score
        segments = self.segments
         
        match action:
            case 0:
                self.head.go_up()
                print("UP")
            case 1:
                self.head.go_down()
            case 2:
                self.head.go_left()
            case _:
                self.head.go_right()

        self.wn.update()
        collision_check = self.check_wall_collision() or self.check_self_collision()
        self.check_food_collision()
        self.update_snake_body()
        head.move()
        new_state = self.state_to_array()
        reward = self.get_reward(old_score, self.score, collision_check)
        done = collision_check

        return new_state,reward,done
        
    def check_self_collision(self):
        segments = self.segments
        head = self.head
        pen = self.pen

    # Check for head collision with the body segments
        for segment in segments:
            if segment.distance(head.head) < 20:
                time.sleep(1)
                head.head.goto(0,0)
                head.direction = "stop"
            
                # Hide the segments
                for segment in segments:
                    segment.goto(1000, 1000)
            
                # Clear the segments list
                segments.clear()

                # Reset the score
                self.score = 0

                # Reset the delay
                self.delay = 0.1
            
                # Update the score display
                pen.clear()
                pen.write("Score: {}  High Score: {}".format(self.score, self.high_score), align="center", font=("Courier", 24, "normal"))

                return 1
        return 0

    def setup_window(self):
        self.wn = self.turtle.Screen()
        self.wn = turtle.Screen()
        self.wn.title("SnaKE")
        self.wn.bgcolor("green")
        self.wn.setup(width=300, height=300)
        self.wn.tracer(0) # Turns off the screen updates

    def setup_pen(self):
        self.pen = self.turtle.Turtle()
        self.pen.speed(0)
        self.pen.shape("square")
        self.pen.color("white")
        self.pen.penup()
        self.pen.hideturtle()
        self.pen.goto(0, 140)
        self.pen.write("Score: 0  High Score: 0", align="center", font=("Courier", 24, "normal"))

    def setup_food(self):
        self.food = turtle.Turtle()
        self.food.speed(0)
        self.food.shape("circle")
        self.food.color("red")
        self.food.penup()
        self.food.goto(0,100)


# game = GameLauncher()
# for i in range(20):
#     action = game.get_random_action()
#     new_state,reward,done =game.step(action)
#     print(f"Run {new_state.shape} |Reward is {reward} | Done is {done}")
#     time.sleep(2)


# Model and Replay Buffer definition

In [2]:
from collections import deque, namedtuple
import torch.nn as nn
import random

Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))
class ReplayBuffer:
    def __init__(self,max_capacity):
        self.memory = deque([],max_capacity)
        
    # Takes a named tuple of Transition
    def push(self,transition_):
        self.memory.append(transition_)
        
    def can_sample(self,batch_size):
        return len(self.memory) >= batch_size
        
    def sample(self,batch_size):
        return random.sample(self.memory,batch_size)
        
    def __len__(self):
        return len(self.memory)        

class DQN(nn.Module):
    def __init__(self,n_observations,actions):
        super(DQN,self).__init__()
        self.relu = nn.ReLU() 
        self.input = nn.Linear(n_observations,128)
        self.middle = nn.Linear(128,256)
        self.fc = nn.Linear(256,len(actions))
        
    def forward(self,x):
        x = self.relu(self.input(x))
        x = self.relu(self.middle(x))        
        return self.fc(x)
    


# Training

In [3]:
import torch
from torch.functional import F
import copy
epsilon = 1.0       # Start epsilon at 1.0 for exploration
epsilon_min = 0.01  # Minimum epsilon for a reasonable amount of exploitation
epsilon_decay = 0.995
device = torch.device("cpu")

def policy(state,action_list,inference_model):
    global epsilon  # Ensure epsilon is tracked across calls
    if torch.rand(1) < epsilon:
        epsilon = max(epsilon_min,epsilon * epsilon_decay)
        return random.randrange(len(action_list))
    else:
        return inference_model(state).detach().argmax().item()
    
    
    
def training_model(policy_net:DQN,game_instance:GameLauncher,lr,batch_size,episodes = 20,gamma=0.99,actions=[1,2,3,4]):
    optimizer = torch.optim.AdamW(policy_net.parameters(),lr=lr,)
    replay_buffer = ReplayBuffer(max_capacity=300)
    target_q_model = copy.deepcopy(policy_net).to(device).eval()
    
    for episode in range(1,episodes + 1):
        done = False
        total_loss = 0
        while not done:
            current_state = game_instance.state_to_array()
            action = policy(state=current_state,action_list=actions,inference_model=target_q_model)
            new_state,reward,done =game_instance.step(action)
            
            print(f"New state is {new_state.shape} | Reward is {reward} | Done {done} ")
            replay_buffer.push(Transition(state=current_state,action=action,next_state=new_state,reward=reward))
            
            if replay_buffer.can_sample(batch_size):
                state_b,action_b,next_state_b,reward_b = replay_buffer.sample(batch_size=200)
                action_b = policy_net(state_b).to(device).gather(1, action_b)
                
                with torch.no_grad():
                    maximum_next_q_value = torch.max(target_q_model(next_state_b),dim=1,keepdim=True)[0]
                    target_q_value =  reward_b + (gamma * maximum_next_q_value * reward_b)
                
                criterion = nn.SmoothL1Loss()
                
                loss = criterion(action_b,target_q_value)
                loss.backward() # compute gradients for all parameters
                total_loss += loss.item()
                
                torch.nn.utils.clip_grad_value_(policy_net.parameters(), 100)
                optimizer.zero_grad()
                optimizer.step()
        print(f"Episode {episode} done with average error | {total_loss / len(replay_buffer)}")
        target_q_model.load_state_dict(policy_net.state_dict())
        
        if episode % 10:
            torch.save(policy_net.state_dict(),f"policy_episode({episode}).pth")
                

game = GameLauncher()
policy_net = DQN(len(torch.rand(588,)),actions=ACTIONS)
training_model(policy_net=policy_net,game_instance=game,lr=0.001,batch_size=200,)
                
                
            

Setting head at [7, 7], value: 1.0
Setting food at [12, 7], value: 1.0
UP
Setting head at [8, 7], value: 1.0
Setting food at [12, 7], value: 1.0
New state is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.

ValueError: too many values to unpack (expected 4)

: 