In [1]:
import sys
import os

sys.path.append(os.path.abspath("../..")) #make the folder "automl" part of this


In [2]:
import torch

In [3]:
%load_ext autoreload
%autoreload 2

from automl.rl_components.rl_trainer_component import RLTrainerComponent
from automl.rl_components.agent_components import AgentComponent

### Logger

In [4]:
%load_ext autoreload
%autoreload 2

sys.path.append(os.path.abspath("../../..")) #make the folder "project" part of this

from project.logger import Log

lg = Log.openLog()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Opening a log... Log Dir: data\logs Log Name:
Log directory did not exist, creating it at: data\logs\log_5


## Device: GPU or CPU?

In [14]:
try:
                
    lg.writeLine("Trying to use cuda...")
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if torch.backends.mps.is_available():

        device = torch.device("mps")
        
    lg.writeLine("The model will trained and evaluated on: " + str(device))
    
except torch.cuda.OutOfMemoryError as e:
    device = torch.device("cpu")
       
    lg.writeLine("The model will be trained and evaluated without caring for device ")

Trying to use cuda...
The model will trained and evaluated on: cuda:0


### Environment wrapper

In [15]:
from pettingzoo.butterfly import cooperative_pong_v5

In [16]:
def state_translator(state):
    return torch.from_numpy(state).to(torch.float32).to(device)

class Env(object):
    
    def __init__(self):
        
        self.env = cooperative_pong_v5.env(render_mode="none")
        self.env.reset()
        
    def __str__(self):
        
        return "Petting zoo cooperative pong v5"
        
    def reset(self):
        return self.env.reset()
        
    def observe(self, *args):
        return state_translator(self.env.observe(*args))
        
    def agents(self):
        return self.env.agents
    
    def action_space(self, *args):
        return self.env.action_space(*args)
    
    def last(self):
        
        observation, reward, termination, truncation, info = self.env.last()
        
        #returns state, reward, done, info
        return state_translator(observation), reward, termination, info
    
    def agent_iter(self):
        
        return self.env.agent_iter()
    
    def step(self, *args):
        
        return self.env.step(*args)
    
    def rewards(self):
        return self.env.rewards

In [17]:
env = Env()

### Define Hyperparameters

In [9]:
leraning_rate = 0.001
num_episodes = 10
state_memory_size = 1
limit_steps = 100
optimization_interval = 50

### Agents

In [None]:
%load_ext autoreload
%autoreload 2

from simple_rl import initialize_agents_components

agents = initialize_agents_components(lg=lg, env=env, agents_input={"device" : device}, learning_rate=0.001, state_memory_size=state_memory_size)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Opening a log... Log Dir: data\logs\log_5 Log Name:agent_1
Log directory did not exist, creating it at: data\logs\log_5\agent_1
State for agent agent_1 has shape: Z: 280 Y: 480 X: 3
Action spac of agent paddle_0: Discrete(3)
Initializing model with input{'board_x': 3, 'board_y': 480, 'board_z': 280, 'output_size': 3, 'device': ''}
Created agent in training agent_1
Opening a log... Log Dir: data\logs\log_5 Log Name:agent_2
Log directory did not exist, creating it at: data\logs\log_5\agent_2
State for agent agent_2 has shape: Z: 280 Y: 480 X: 3
Action spac of agent paddle_1: Discrete(3)
Initializing model with input{'board_x': 3, 'board_y': 480, 'board_z': 280, 'output_size': 3, 'device': ''}
Created agent in training agent_2
Initialized {'paddle_0': <automl.rl_components.agent_components.AgentComponent object at 0x00000191461FCA60>, 'paddle_1': <automl.rl_components.agent_components.AgentComponent ob

### RL Trainer Component

In [None]:
%load_ext autoreload
%autoreload 2

from automl.rl_components.rl_trainer_component import RLTrainerComponent

rl_trainer_input = {
    "agents" : agents,
    "device" : device,
    "logger" : lg,
    "num_episodes" : num_episodes,
    "state_memory_size" : state_memory_size,
    "environment" : env,
    "limit_steps" : limit_steps ,
    "optimization_interval" : optimization_interval
}


rl_trainer = RLTrainerComponent(input=rl_trainer_input)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Connect Agents to RL Trainer

In [12]:
for agent in agents.values():
    agent.pass_input({"training_context" : rl_trainer.values})

### Do the training

In [13]:
rl_trainer.run_episodes()

Starting to run episodes of training
Starting to run episode 0
Batch size: 64 Gamma: 0.95 Tau: 0.05 Learning rate: 0.01
Initializing policy model...
Initializing target model...
Initializing model with input{'board_x': 3, 'board_y': 480, 'board_z': 280, 'output_size': 3, 'device': device(type='cuda', index=0)}
Cloning model
Initializing model with input{'board_x': 3, 'board_y': 480, 'board_z': 280, 'output_size': 3, 'device': device(type='cuda', index=0)}
Instantiating an empty memory with size 200
In episode 0, optimizing at step 0 that is the total step 0
Optimizing agent <automl.rl_components.agent_components.AgentComponent object at 0x00000191461FCA60>
Optimization took 0.04602813720703125 seconds
Optimizing agent <automl.rl_components.agent_components.AgentComponent object at 0x00000191461FC700>
Batch size: 64 Gamma: 0.95 Tau: 0.05 Learning rate: 0.01
Initializing policy model...
Initializing target model...
Initializing model with input{'board_x': 3, 'board_y': 480, 'board_z': 28

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor