In [1]:
import sys
import os

sys.path.append(os.path.abspath("../..")) #make the folder "automl" part of this


In [2]:
import torch

In [3]:
%load_ext autoreload
%autoreload 2

from automl.rl_components.rl_trainer_component import RLTrainerComponent
from automl.rl_components.agent_components import AgentComponent

### Logger

In [4]:
%load_ext autoreload
%autoreload 2

sys.path.append(os.path.abspath("../../..")) #make the folder "project" part of this

from project.logger import Log

lg = Log.openLog()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Opening a log... Log Dir: data\logs Log Name:
Log directory did not exist, creating it at: data\logs\log_20


### Environment wrapper

In [5]:
from pettingzoo.butterfly import cooperative_pong_v5

In [6]:
def state_translator(state, device):
    return torch.from_numpy(state).to(torch.float32).to(device)

class Env(object):
    
    def __init__(self):
        
        self.env = cooperative_pong_v5.env(render_mode="none")
        self.env.reset()
        
    def set_device(self, device):
        self.device = device
        
    def __str__(self):
        
        return "Petting zoo cooperative pong v5"
        
    def reset(self):
        return self.env.reset()
        
    def observe(self, *args):
        return state_translator(self.env.observe(*args), self.device)
        
    def agents(self):
        return self.env.agents
    
    def action_space(self, *args):
        return self.env.action_space(*args)
    
    def last(self):
        
        observation, reward, termination, truncation, info = self.env.last()
        
        #returns state, reward, done, info
        return state_translator(observation, self.device), reward, termination, info
    
    def agent_iter(self):
        
        return self.env.agent_iter()
    
    def step(self, *args):
        
        return self.env.step(*args)
    
    def rewards(self):
        return self.env.rewards

### Define Hyperparameters

In [7]:
num_episodes = 2
state_memory_size = 1
limit_steps = 60
optimization_interval = 50

### RL Trainer Component

In [8]:
%load_ext autoreload
%autoreload 2

from automl.rl_components.rl_pipeline import RLPipelineComponent

env = Env()

rl_pipeline_input = {
    "device" : "gpu",
    "logger" : lg,
    "num_episodes" : num_episodes,
    "state_memory_size" : state_memory_size,
    "environment" : env,
    "limit_steps" : limit_steps ,
    "optimization_interval" : optimization_interval
}


rl_pipeline = RLPipelineComponent(input=rl_pipeline_input)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Do the training

In [9]:
rl_pipeline.train()

RLPipelineComponent: Trying to use cuda...
RLPipelineComponent: There was an error trying to setup the device in 'gpu': Expected one of cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone device type at start of device string: gpu
RLPipelineComponent: The model will trained and evaluated on: cpu
RLPipelineComponent: Creating agents
Opening a log... Log Dir: data\logs\log_20 Log Name:agent_1
Log directory did not exist, creating it at: data\logs\log_20\agent_1
RLPipelineComponent: State for agent agent_1 has shape: Z: 280 Y: 480 X: 3
Action space of agent paddle_0: Discrete(3)
RLPipelineComponent: Created agent in training agent_1
Opening a log... Log Dir: data\logs\log_20 Log Name:agent_2
Log directory did not exist, creating it at: data\logs\log_20\agent_2
RLPipelineComponent: State for agent agent_2 has shape: Z: 280 Y: 480 X: 3
Action space of agent paddle_1: Discrete(3)
RLPipelineComponent: Created agent in 

In [10]:
system_design = rl_pipeline.get_design()


Making design for RLPipelineComponent
Looking at input with key device and type <class 'str'>
not component nor connector
is int, float or str
Looking at input with key logger and type <class 'project.logger.Log.LogClass'>
not component nor connector
Looking at input with key num_episodes and type <class 'int'>
not component nor connector
is int, float or str
Looking at input with key state_memory_size and type <class 'int'>
not component nor connector
is int, float or str
Looking at input with key environment and type <class '__main__.Env'>
not component nor connector
Looking at input with key limit_steps and type <class 'int'>
not component nor connector
is int, float or str
Looking at input with key optimization_interval and type <class 'int'>
not component nor connector
is int, float or str
Looking at input with key agents and type <class 'dict'>
not component nor connector
Looking at input with key save_interval and type <class 'int'>
not component nor connector
is int, float or s

In [11]:
from automl.component_designer import print_design

print_design(system_design)

("RLPipelineComponent",
{
  "device" : gpu
  "num_episodes" : 2
  "state_memory_size" : 1
  "limit_steps" : 60
  "optimization_interval" : 50
  "save_interval" : 100
  "rl_trainer" : 
},
{
},
{
})


In [None]:
from automl.component_designer import design

reconstructued_component = design(system_design)

In [12]:

reconstructued_design = reconstructued_component.get_design()


print_design(reconstructued_design)

Making design for RLPipelineComponent
Looking at input with key device and type <class 'str'>
not component nor connector
is int, float or str
Looking at input with key num_episodes and type <class 'int'>
not component nor connector
is int, float or str
Looking at input with key state_memory_size and type <class 'int'>
not component nor connector
is int, float or str
Looking at input with key limit_steps and type <class 'int'>
not component nor connector
is int, float or str
Looking at input with key optimization_interval and type <class 'int'>
not component nor connector
is int, float or str
Looking at input with key save_interval and type <class 'int'>
not component nor connector
is int, float or str
Looking at input with key rl_trainer and type <class 'str'>
not component nor connector
is int, float or str
("RLPipelineComponent",
{
  "device" : gpu
  "num_episodes" : 2
  "state_memory_size" : 1
  "limit_steps" : 60
  "optimization_interval" : 50
  "save_interval" : 100
  "rl_trainer