## Setup Code

In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import os

# TODO: Fill in the Google Drive path where you uploaded the assignment
# Example: If you create a HAPPYSNAKE folder and put all the files under SADSNAKE folder, then 'HAPPYSNAKE/SADSNAKE'
GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = 'newSnakeRL'
GOOGLE_DRIVE_PATH = os.path.join('drive', 'My Drive', GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)
print(os.listdir(GOOGLE_DRIVE_PATH))

['LICENSE', 'main.py', 'README.md', 'common', 'game', 'pngs', 'agent', 'model', '.git', 'main']


In [6]:
import sys
sys.path.append(GOOGLE_DRIVE_PATH)

import time, os

## Main Code

In [7]:
import torch

from common.settings import *
from agent.play_game_with_agent import *
from agent.dqn_agent import DQNAgent


pygame 2.6.0 (SDL 2.28.4, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [8]:
def main():
    """
    Start
    │
    └─── Choose mode
        │
        ├─── Play
        │   │
        │   └─── Load previous model
        │       │
        │       ├─── Success
        │       │   └─── Play with loaded model
        │       │
        │       └─── Fail
        │           │
        │           └─── Continue without loaded model?
        │               │
        │               ├─── Yes
        │               │   └─── Play with untrained model
        │               │
        │               └─── No
        │                   └─── Return to mode selection
        │
        └─── Learn and Play
            │
            └─── Start training from scratch?
                │
                ├─── Yes
                │   └─── Start new training
                │
                └─── No
                    │
                    └─── Load previous model
                        │
                        ├─── Success
                        │   └─── Play and learn with loaded model
                        │
                        └─── Fail
                            │
                            └─── Continue without loaded model?
                                │
                                ├─── Yes
                                │   └─── Play and learn with untrained model
                                │
                                └─── No
                                    └─── Return to mode selection
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # dqn_agent
    dqn_agent = DQNAgent(device=device)
    while True:
        user_input = input("Choose mode: (a) Play or (b) Learn and Play (a/b): ").lower()
        if user_input in ['a', 'play']:
            print("Play mode selected.")
            if play_mode(dqn_agent):
                break
        elif user_input in ['b', 'learn and play']:
            # create a callback function to update the figure dynamically
            update_plot = create_training_plotter()
            
            print("Learn and Play mode selected.")
            if learn_and_play_mode(dqn_agent, update_plot):
                break
        else:
            print("Invalid input. Please enter 'a' for Play or 'b' for Learn and Play.")

def play_mode(dqn_agent: DQNAgent):
    """
    Return True if agent starts playing else False.
    """
    if load_model(dqn_agent):
        print("Previous model loaded successfully.")
        play_with_agent(dqn_agent, 
                        playing_rounds_per_display=playing_rounds_per_display, 
                        total_rounds=playing_total_rounds)
        return True
    else:
        prompt = "Without loaded weights, the agent's performance will be poor. Continue? (y/n): "
        if confirm_action(prompt):
            print("Continuing with untrained agent...")
            play_with_agent(dqn_agent, 
                            playing_rounds_per_display=playing_rounds_per_display, 
                            total_rounds=playing_total_rounds)
            return True
        else:
            print("Returning to mode selection...")
            return False

def learn_and_play_mode(dqn_agent: DQNAgent, update_plot):
    """
    Return True if agent starts learning and playing else False.
    """
    if not confirm_action("Start training from scratch? (y/n): "):
        print("Attempting to load previous model...")
        if load_model(dqn_agent):
            print("Previous model loaded successfully.")
        else:
            print("Failed to load previous model.")
            if not confirm_action("Continue with untrained model? (y/n): "):
                print("Returning to mode selection...")
                return False
    
    print("Starting training...")
    play_and_learn_with_learning_agent(dqn_agent, 
                                       total_episodes=learning_total_episodes, 
                                       update_plot_callback=update_plot, 
                                       learning_episodes_per_display=learning_episodes_per_display)
    return True

def load_model(dqn_agent: DQNAgent):
    """
    Load both main and target models.
    Return True if loaded successfully else False.
    """
    if dqn_agent.load():
        return True
    else:
        return False

def confirm_action(prompt: str):
    """
    If player input 'y' then return True, if input 'n' then return False, else loop.
    """
    while True:
        user_input = input(prompt).lower()
        if user_input in ['y', 'n']:
            return user_input == 'y'
        print("Invalid input. Please enter 'y' or 'n'.")

In [None]:
if __name__ == "__main__":
    main()

Agent initialized on device: cuda
Choose mode: (a) Play or (b) Learn and Play (a/b): b
Learn and Play mode selected.
Start training from scratch? (y/n): y
Starting training from scratch...
Play times: 1, Score: 1
Epsilon: 0.7990000000000002
Average loss: 1548.005615234375
Average score current 100 rounds: 1.0


Play times: 2, Score: 0
Epsilon: 0.7954000000000006
Average loss: 304.57593705919055
Average score current 100 rounds: 0.5


Play times: 3, Score: 0
Epsilon: 0.7946000000000006
Average loss: 316.0437240600586
Average score current 100 rounds: 0.3333333333333333


Play times: 4, Score: 0
Epsilon: 0.7838000000000018
Average loss: 114.0982185293127
Average score current 100 rounds: 0.25


Play times: 5, Score: 0
Epsilon: 0.773600000000003
Average loss: 32.803807800891356
Average score current 100 rounds: 0.2


Play times: 6, Score: 0
Epsilon: 0.7652000000000039
Average loss: 27.68225241842724
Average score current 100 rounds: 0.16666666666666666


Play times: 7, Score: 0
Epsilon: 0