In [1]:
# Logger
import logging
logging.basicConfig(level=logging.INFO)

# Import local paths
import sys, os
sys.path.append(os.path.abspath('../../..'))
sys.path.append(os.path.abspath('../../../../openai-envs'))

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList

# Load environments
import gym
import gym_corridor

# Corridor

Actions:

    MOVE_LEFT = 0
    MOVE_RIGHT = 1


In [2]:
class CorridorAdapter(EnvironmentAdapter):
    @staticmethod
    def to_genotype(phenotype):
        return phenotype,

In [3]:
cfg = Configuration(
        classifier_length=1,
        number_of_possible_actions=2,
        epsilon=1.0,
        beta=0.05,
        gamma=0.95,
        theta_exp=50,
        theta_ga=50,
        do_ga=True,
        mu=0.03,
        u_max=1,
        metrics_trial_frequency=20,
        environment_adapter=CorridorAdapter)

In [4]:
def print_simple_stats(population, metrics):
    pop_size = len(population)
    
    # avg step in trial
    steps = []
    for m in metrics:
        steps.append(m['steps_in_trial'])

    print(f"Population of {pop_size}, avg steps {sum(steps)/len(steps)}")     

## Corridor 20

### Exploration

In [5]:
corridor = gym.make('corridor-20-v0')

In [6]:
%%time
agent = ACS2(cfg)
population, metrics = agent.explore(corridor, 1000)

INFO:lcs.agents.Agent:{'trial': 0, 'steps_in_trial': 33, 'reward': 1000}
INFO:lcs.agents.Agent:{'trial': 100, 'steps_in_trial': 200, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 200, 'steps_in_trial': 200, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 300, 'steps_in_trial': 200, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 400, 'steps_in_trial': 60, 'reward': 1000}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 200, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 600, 'steps_in_trial': 200, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 700, 'steps_in_trial': 103, 'reward': 1000}
INFO:lcs.agents.Agent:{'trial': 800, 'steps_in_trial': 200, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 900, 'steps_in_trial': 200, 'reward': 0}


CPU times: user 39.1 s, sys: 46.8 ms, total: 39.1 s
Wall time: 39.5 s


In [7]:
print_simple_stats(population, metrics)

Population of 38, avg steps 142.94


In [8]:
sorted(population, key=lambda cl: -cl.fitness)[:5]

[19 1 20               (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 567 tga: 129202 talp: 129204 tav: 2.2e+02 num: 19,
 18 1 19               (empty)               q: 1.0   r: 950.0  ir: 0.0    f: 950.0  exp: 1089 tga: 129179 talp: 129204 tav: 88.5   num: 19,
 17 1 18               (empty)               q: 1.0   r: 902.5  ir: 0.0    f: 902.5  exp: 1566 tga: 129178 talp: 129201 tav: 76.3   num: 19,
 19 0 18               (empty)               q: 1.0   r: 902.0  ir: 0.0    f: 902.0  exp: 567 tga: 129180 talp: 129195 tav: 1.83e+02 num: 19,
 16 1 17               (empty)               q: 1.0   r: 857.3  ir: 0.0    f: 857.3  exp: 1994 tga: 129177 talp: 129200 tav: 59.6   num: 19]

### Exploitation

In [9]:
%%time
agent = ACS2(cfg, population)
pop_exploit, metric_exploit = agent.exploit(corridor, 100)

INFO:lcs.agents.Agent:{'trial': 0, 'steps_in_trial': 21, 'reward': 1000}


CPU times: user 475 ms, sys: 0 ns, total: 475 ms
Wall time: 482 ms


In [10]:
print_simple_stats(pop_exploit, metric_exploit)

Population of 38, avg steps 14.4


In [11]:
sorted(pop_exploit, key=lambda cl: -cl.fitness)[:5]

[19 1 20               (empty)               q: 1.0   r: 1e+03  ir: 1e+03  f: 1e+03  exp: 567 tga: 129202 talp: 129204 tav: 2.2e+02 num: 19,
 18 1 19               (empty)               q: 1.0   r: 950.0  ir: 0.0    f: 950.0  exp: 1089 tga: 129179 talp: 129204 tav: 88.5   num: 19,
 17 1 18               (empty)               q: 1.0   r: 902.5  ir: 0.0    f: 902.5  exp: 1566 tga: 129178 talp: 129201 tav: 76.3   num: 19,
 19 0 18               (empty)               q: 1.0   r: 902.2  ir: 0.0    f: 902.2  exp: 567 tga: 129180 talp: 129195 tav: 1.83e+02 num: 19,
 16 1 17               (empty)               q: 1.0   r: 857.4  ir: 0.0    f: 857.4  exp: 1994 tga: 129177 talp: 129200 tav: 59.6   num: 19]