In [1]:
import matplotlib.pyplot as plt

import gym
from gym.envs.registration import register

fl_env = gym.make('FrozenLake-v1')

# Reset the state
state = fl_env.reset()

# Render the environment
fl_env.render()
register(
    id='FrozenLakeNotSlippery-v0',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name': '4x4', 'is_slippery': False},
    max_episode_steps=100,
    reward_threshold=0.78,  # optimum = .8196
)

fl_ns_env = gym.make('FrozenLakeNotSlippery-v0')

# Reset the state
state = fl_ns_env.reset()

# Render the environment
fl_ns_env.render()
# Import PyALCS code from local path
import sys, os
sys.path.append(os.path.abspath('./'))

from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration



CLASSIFIER_LENGTH = 16  # Because we are operating in 4x4 grid
POSSIBLE_ACTIONS = fl_env.action_space.n  # 4

  "You are calling render method without specifying any render mode. "
  "You are calling render method without specifying any render mode. "


In [2]:
class FrozenLakeAdapter(EnvironmentAdapter):
    @classmethod
    def to_genotype(cls, phenotype):
        genotype = ['0' for i in range(CLASSIFIER_LENGTH)]
        genotype[phenotype] = 'X'
        return ''.join(genotype)

FrozenLakeAdapter().to_genotype(4)

TypeError: module() takes at most 2 arguments (3 given)

In [None]:
from lcs.metrics import population_metrics


# We assume if the final state was with number 15 that the algorithm found the reward. Otherwise not
def fl_metrics(pop, env):
    metrics = {
        'found_reward': env.env.s == 15,
    }

    # Add basic population metrics
    metrics.update(population_metrics(pop, env))

    return metrics
def print_performance(population, metrics):
    population.sort(key=lambda cl: -cl.fitness)
    population_count = len(population)
    reliable_count = len([cl for cl in population if cl.is_reliable()])
    successful_trials = sum(m['found_reward'] for m in metrics)

    print("Number of classifiers: {}".format(population_count))
    print("Number of reliable classifiers: {}".format(reliable_count))
    print("Percentage of successul trials: {:.2f}%".format(successful_trials / EXPLOIT_TRIALS  * 100))
    print("\nTop 10 classifiers:")
    for cl in population[:10]:
        print("{!r} \tq: {:.2f} \tr: {:.2f} \tir: {:.2f} \texp: {}".format(cl, cl.q, cl.r, cl.ir, cl.exp))


In [None]:
def plot_success_trials(metrics, ax=None):
    if ax is None:
        ax = plt.gca()

    trials = [m['trial'] for m in metrics]
    success = [m['found_reward'] for m in metrics]

    ax.plot(trials, success)
    ax.set_title("Successful Trials")
    ax.set_xlabel("Trial")
    ax.set_ylabel("Agent found reward")

def plot_population(metrics, ax=None):
    if ax is None:
        ax = plt.gca()

    trials = [m['trial'] for m in metrics]

    population_size = [m['numerosity'] for m in metrics]
    reliable_size = [m['reliable'] for m in metrics]

    ax.plot(trials, population_size, 'b', label='all')
    ax.plot(trials, reliable_size, 'r', label='reliable')

    ax.set_title("Population size")
    ax.set_xlabel("Trial")
    ax.set_ylabel("Number of macroclassifiers")
    ax.legend(loc='best')

def plot_population(metrics, ax=None):
    if ax is None:
        ax = plt.gca()

    trials = [m['trial'] for m in metrics]

    population_size = [m['numerosity'] for m in metrics]
    reliable_size = [m['reliable'] for m in metrics]

    ax.plot(trials, population_size, 'b', label='all')
    ax.plot(trials, reliable_size, 'r', label='reliable')

    ax.set_title("Population size")
    ax.set_xlabel("Trial")
    ax.set_ylabel("Number of macroclassifiers")
    ax.legend(loc='best')

In [None]:
cfg = Configuration(
    classifier_length=CLASSIFIER_LENGTH,
    number_of_possible_actions=POSSIBLE_ACTIONS,
    environment_adapter=FrozenLakeAdapter(),
    metrics_trial_frequency=1,
    user_metrics_collector_fcn=fl_metrics,
    theta_i=0.3,
    epsilon=0.7)

print(cfg)

In [None]:
EXPLORE_TRIALS = 2000
EXPLOIT_TRIALS = 100

In [None]:
def perform_experiment(cfg, env):
    # explore phase
    agent = ACS2(cfg)
    population_explore, metrics_explore = agent.explore(env, EXPLORE_TRIALS)

    # exploit phase, reinitialize agent with population above
    agent = ACS2(cfg, population=population_explore)
    population_exploit, metrics_exploit = agent.exploit(env, EXPLOIT_TRIALS)

    return (population_explore, metrics_explore), (population_exploit, metrics_exploit)


In [None]:
explore_results, exploit_results = perform_experiment(cfg, fl_env)
print_performance(explore_results[0], explore_results[1])
print_performance(exploit_results[0], exploit_results[1])
explore_results_2, exploit_results_2 = perform_experiment(cfg, fl_ns_env)
print_performance(explore_results_2[0], explore_results_2[1])
print_performance(exploit_results_2[0], exploit_results_2[1])

In [None]:
def plot_population(metrics, ax=None):
    if ax is None:
        ax = plt.gca()

    trials = [m['trial'] for m in metrics]

    population_size = [m['numerosity'] for m in metrics]
    reliable_size = [m['reliable'] for m in metrics]

    ax.plot(trials, population_size, 'b', label='all')
    ax.plot(trials, reliable_size, 'r', label='reliable')

    ax.set_title("Population size")
    ax.set_xlabel("Trial")
    ax.set_ylabel("Number of macroclassifiers")
    ax.legend(loc='best')

In [None]:
original = explore_results[1]
modified = explore_results_2[1]

ax = plt.gca()

trials = [m['trial'] for m in original]

original_numerosity = [m['numerosity'] for m in original]
modified_numerosity = [m['numerosity'] for m in modified]

ax.plot(trials, original_numerosity, 'r')
ax.text(1000, 350, "Original environment", color='r')

ax.plot(trials, modified_numerosity, 'b')
ax.text(1000, 40, 'No-slippery setting', color='b')


ax.set_title('Classifier numerosity in FrozenLake environment')
ax.set_xlabel('Trial')
ax.set_ylabel('Number of macroclassifiers')

In [None]:
def plot_checkerboard(plot_filename=None):
    import gym_checkerboard  # noqa: F401
    checkerboard_env = gym.make('checkerboard-2D-3div-v0')
    checkerboard_env.reset()

    np_board = checkerboard_env.env._board.board

    fig = plt.figure(figsize=(7, 7))
    ax = fig.add_subplot(111)

    ax.matshow(np_board, cmap=plt.get_cmap('gray_r'), extent=(0, 1, 0, 1), alpha=.5)
    ax.set_xlabel("x")
    ax.set_ylabel("y")

    if plot_filename:
        fig.savefig(plot_filename, dpi=PLOT_DPI)

    return fig

glue('checkerboard-env', plot_checkerboard(f'{plot_dir}/checkerboard-env-visualization.png'), display=False)