In [1]:
import itertools
import pathlib

import bitstring
import gym
import gym_multiplexer  # noqa: F401
import lcs.agents.acs as acs
import lcs.agents.acs2 as acs2
import lcs.agents.yacs as yacs
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
import pandas as pd
from lcs import Perception

from src.basic_rl import dynaq
from src.decorators import repeat, get_from_cache_or_run
from src.visualization import discretized_algorithms_colors, PLOT_DPI

plt.ioff()  # turn off interactive plotting
plt.style.use('../../../src/phd.mplstyle')

cache_dir = f'{pathlib.Path().absolute()}/cache'
plot_dir = f'{pathlib.Path().absolute()}/plots'

# Experiment 3 - Single step

In [2]:
RMPX_BINS = 10
RMPX_SIZE = 3
CTRL_BITS = 1

rmpx = gym.make(f'real-multiplexer-{RMPX_SIZE}bit-v0')

_range, _low = (rmpx.observation_space.high - rmpx.observation_space.low, rmpx.observation_space.low)

RMPX_STEP = _range / RMPX_BINS


class RealMultiplexerUtils:
    def __init__(self, size, ctrl_bits, bins, _range, _threshold=0.5):
        self._size = size
        self._ctrl_bits = ctrl_bits
        self._bins = bins
        self._step = _range / bins
        self._threshold = _threshold

        self._attribute_values = [list(range(0, bins))] * (size) + [[0, bins]]
        self._input_space = itertools.product(*self._attribute_values)
        self.state_mapping = {idx: s for idx, s in enumerate(self._input_space)}
        self.state_mapping_inv = {v: k for k, v in self.state_mapping.items()}

    def discretize(self, obs, _type=int):
        r = (obs + np.abs(_low)) / _range
        b = (r * RMPX_BINS).astype(int)
        return b.astype(_type).tolist()

    def reverse_discretize(self, discretized):
        return discretized * self._step[:len(discretized)]

    def get_transitions(self):
        transitions = []

        initial_dstates = [list(range(0, self._bins))] * (self._size)
        for d_state in itertools.product(*initial_dstates):
            correct_answer = self._get_correct_answer(d_state)

            if correct_answer == 0:
                transitions.append((d_state + (0,), 0, d_state + (self._bins,)))
                transitions.append((d_state + (0,), 1, d_state + (0,)))
            else:
                transitions.append((d_state + (0,), 0, d_state + (0,)))
                transitions.append((d_state + (0,), 1, d_state + (self._bins,)))

        return transitions

    def _get_correct_answer(self, discretized):
        estimated_obs = self.reverse_discretize(discretized)
        bits = bitstring.BitArray(estimated_obs > self._threshold)
        _ctrl_bits = bits[:self._ctrl_bits]
        _data_bits = bits[self._ctrl_bits:]

        return int(_data_bits[_ctrl_bits.uint])


rmpx_utils = RealMultiplexerUtils(RMPX_SIZE, CTRL_BITS, RMPX_BINS, _range)
print(f"Possible states: {len(rmpx_utils.state_mapping)}")

Possible states: 2000


In [3]:
# metrics
def generalization_score(pop):
    # Compute proportion of wildcards in classifier condition across all classifiers
    wildcards = sum(1 for cl in pop for cond in cl.condition if
                    cond == '#' or (
                            hasattr(cond, 'symbol') and cond.symbol == '#'))
    all_symbols = sum(len(cl.condition) for cl in pop)
    return wildcards / all_symbols


def rmpx_knowledge(population, env):
    reliable = [c for c in population if c.is_reliable()]
    nr_correct = 0

    for start, action, end in rmpx_utils.get_transitions():
        p0 = Perception([str(el) for el in start])
        p1 = Perception([str(el) for el in end])

        if any([True for cl in reliable if cl.predicts_successfully(p0, action, p1)]):
            nr_correct += 1

    return nr_correct / len(rmpx_utils.get_transitions())


def rmpx_metrics_collector(agent, env):
    population = agent.population
    return {
        'pop': len(population),
        'knowledge': rmpx_knowledge(population, env),
        'generalization': generalization_score(population)
    }


# DynaQ helpers
def rmpx_perception_to_int(p0, discretize=True):
    if discretize:
        p0 = rmpx_utils.discretize(p0)

    return rmpx_utils.state_mapping_inv[tuple(p0)]


def dynaq_rmpx_knowledge_calculator(model, env):
    all_transitions = 0
    nr_correct = 0

    for p0, a, p1 in rmpx_utils.get_transitions():
        s0 = rmpx_perception_to_int(p0, discretize=False)
        s1 = rmpx_perception_to_int(p1, discretize=False)

        all_transitions += 1
        if s0 in model and a in model[s0] and model[s0][a][0] == s1:
            nr_correct += 1

    return nr_correct / len(rmpx_utils.get_transitions())


def parse_lcs_metrics(agent_name, metrics):
    data = [[agent_name, d['perf_time'], d['trial'], d['knowledge'], d['pop'],
             d['generalization'], d['steps_in_trial']] for d in metrics]

    df = pd.DataFrame(
        data,
        columns=['agent', 'time', 'trial', 'knowledge', 'population',
                 'generalization', 'trial_steps'])

    return df


def parse_dyna_metrics(agent, metrics):
    # (steps, model_size, time, knowledge) = metrics
    df = pd.DataFrame(metrics.T, columns=['trial_steps', 'population', 'time', 'knowledge'])

    # add derived columns
    df['trial'] = df.index
    df['agent'] = agent
    df['generalization'] = 0

    df = df.drop(df[df.time == 0.0].index)

    return df

In [4]:
def single_acs_experiment(
        env_provider,
        trials,
        classifier_length,
        possible_actions,
        learning_rate,
        metrics_trial_freq,
        metrics_fcn):
    env = env_provider()

    cfg = acs.Configuration(
        classifier_length=classifier_length,
        number_of_possible_actions=possible_actions,
        beta=learning_rate,
        metrics_trial_frequency=metrics_trial_freq,
        user_metrics_collector_fcn=metrics_fcn)

    agent = acs.ACS(cfg)
    metrics = agent.explore(env, trials)

    return agent, metrics


def single_acs2_experiment(
        env_provider,
        trials,
        classifier_length,
        possible_actions,
        learning_rate,
        do_ga,
        initial_q,
        metrics_trial_freq,
        metrics_fcn):
    env = env_provider()

    cfg = acs2.Configuration(
        classifier_length=classifier_length,
        number_of_possible_actions=possible_actions,
        beta=learning_rate,
        do_ga=do_ga,
        initial_q=initial_q,
        metrics_trial_frequency=metrics_trial_freq,
        user_metrics_collector_fcn=metrics_fcn)

    agent = acs2.ACS2(cfg)
    metrics = agent.explore(env, trials)

    return agent, metrics


def single_yacs_experiment(
        env_provider,
        trials,
        classifier_length,
        possible_actions,
        learning_rate,
        trace_length,
        estimate_expected_improvements,
        feature_possible_values,
        metrics_trial_freq,
        metrics_fcn):
    env = env_provider()

    cfg = yacs.Configuration(
        classifier_length=classifier_length,
        number_of_possible_actions=possible_actions,
        learning_rate=learning_rate,
        trace_length=trace_length,
        estimate_expected_improvements=estimate_expected_improvements,
        feature_possible_values=feature_possible_values,
        metrics_trial_frequency=metrics_trial_freq,
        user_metrics_collector_fcn=metrics_fcn)

    agent = yacs.YACS(cfg)
    metrics = agent.explore(env, trials)

    return agent, metrics


def single_dynaq_experiment(
        env_provider,
        trials,
        q_init,
        model_init,
        epsilon,
        learning_rate,
        knowledge_fcn,
        metrics_trial_freq):
    env = env_provider()
    Q, MODEL, metrics = dynaq(
        env,
        episodes=trials,
        Q=q_init,
        MODEL=model_init,  # maps state to actions to (reward, next_state) tuples
        epsilon=epsilon,
        learning_rate=learning_rate,
        gamma=0.9,
        planning_steps=5,
        knowledge_fcn=knowledge_fcn,
        metrics_trial_freq=metrics_trial_freq)

    return Q, MODEL, metrics

In [5]:
class DiscretizedWrapper(gym.ObservationWrapper):
    def observation(self, obs):
        return rmpx_utils.discretize(obs, _type=str)


class SingleStateWrapper(DiscretizedWrapper):
    def observation(self, obs):
        return rmpx_utils.state_mapping_inv[tuple(map(int, obs))]


common_params = {
    'classifier_length': RMPX_SIZE + 1,
    'possible_actions': 2,
    'learning_rate': 0.1,
    'metrics_trial_freq': 100,
    'metrics_fcn': rmpx_metrics_collector,
    'trials': 15_000
}

yacs_params = {
    'trace_length': 3,
    'estimate_expected_improvements': False,
    'feature_possible_values': [{str(i) for i in range(RMPX_BINS)}] * RMPX_SIZE + [{'0', '10'}]
}

dynaq_params = {
    'q_init': np.zeros((len(rmpx_utils.state_mapping), 2)),
    'model_init': {},
    'knowledge_fcn': dynaq_rmpx_knowledge_calculator,
    'epsilon': 0.5
}

NUM_EXPERIMENTS = 50
USE_RAY = True


@get_from_cache_or_run(cache_path=f'{cache_dir}/discretized/rmpx_{RMPX_SIZE}bit/acs.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_rmpx_with_acs():
    return single_acs_experiment(
        env_provider=lambda: DiscretizedWrapper(rmpx),
        trials=common_params['trials'],
        classifier_length=common_params['classifier_length'],
        possible_actions=common_params['possible_actions'],
        learning_rate=common_params['learning_rate'],
        metrics_trial_freq=common_params['metrics_trial_freq'],
        metrics_fcn=common_params['metrics_fcn'])


@get_from_cache_or_run(cache_path=f'{cache_dir}/discretized/rmpx_{RMPX_SIZE}bit/acs2.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_rmpx_with_acs2():
    return single_acs2_experiment(
        env_provider=lambda: DiscretizedWrapper(rmpx),
        trials=common_params['trials'],
        classifier_length=common_params['classifier_length'],
        possible_actions=common_params['possible_actions'],
        learning_rate=common_params['learning_rate'],
        do_ga=False,
        initial_q=0.5,
        metrics_trial_freq=common_params['metrics_trial_freq'],
        metrics_fcn=common_params['metrics_fcn']
    )


@get_from_cache_or_run(cache_path=f'{cache_dir}/discretized/rmpx_{RMPX_SIZE}bit/acs2_oiq.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_rmpx_with_acs2_oiq():
    return single_acs2_experiment(
        env_provider=lambda: DiscretizedWrapper(rmpx),
        trials=common_params['trials'],
        classifier_length=common_params['classifier_length'],
        possible_actions=common_params['possible_actions'],
        learning_rate=common_params['learning_rate'],
        do_ga=False,
        initial_q=0.8,
        metrics_trial_freq=common_params['metrics_trial_freq'],
        metrics_fcn=common_params['metrics_fcn']
    )


@get_from_cache_or_run(cache_path=f'{cache_dir}/discretized/rmpx_{RMPX_SIZE}bit/acs2_ga.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_rmpx_with_acs2_ga():
    return single_acs2_experiment(
        env_provider=lambda: DiscretizedWrapper(rmpx),
        trials=common_params['trials'],
        classifier_length=common_params['classifier_length'],
        possible_actions=common_params['possible_actions'],
        learning_rate=common_params['learning_rate'],
        do_ga=True,
        initial_q=0.5,
        metrics_trial_freq=common_params['metrics_trial_freq'],
        metrics_fcn=common_params['metrics_fcn']
    )


@get_from_cache_or_run(cache_path=f'{cache_dir}/discretized/rmpx_{RMPX_SIZE}bit/acs2_ga_oiq.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_rmpx_with_acs2_ga_oiq():
    return single_acs2_experiment(
        env_provider=lambda: DiscretizedWrapper(rmpx),
        trials=common_params['trials'],
        classifier_length=common_params['classifier_length'],
        possible_actions=common_params['possible_actions'],
        learning_rate=common_params['learning_rate'],
        do_ga=True,
        initial_q=0.8,
        metrics_trial_freq=common_params['metrics_trial_freq'],
        metrics_fcn=common_params['metrics_fcn']
    )


@get_from_cache_or_run(cache_path=f'{cache_dir}/discretized/rmpx_{RMPX_SIZE}bit/yacs.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_rmpx_with_yacs():
    return single_yacs_experiment(
        env_provider=lambda: DiscretizedWrapper(rmpx),
        trials=common_params['trials'],
        classifier_length=common_params['classifier_length'],
        possible_actions=common_params['possible_actions'],
        learning_rate=common_params['learning_rate'],
        trace_length=yacs_params['trace_length'],
        estimate_expected_improvements=yacs_params['estimate_expected_improvements'],
        feature_possible_values=yacs_params['feature_possible_values'],
        metrics_trial_freq=common_params['metrics_trial_freq'],
        metrics_fcn=common_params['metrics_fcn']
    )


@get_from_cache_or_run(cache_path=f'{cache_dir}/discretized/rmpx_{RMPX_SIZE}bit/dynaq.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_rmpx_with_dynaq():
    return single_dynaq_experiment(
        env_provider=lambda: SingleStateWrapper(DiscretizedWrapper(rmpx)),
        trials=common_params['trials'],
        q_init=dynaq_params['q_init'],
        model_init=dynaq_params['model_init'],
        epsilon=dynaq_params['epsilon'],
        learning_rate=common_params['learning_rate'],
        knowledge_fcn=dynaq_params['knowledge_fcn'],
        metrics_trial_freq=common_params['metrics_trial_freq']
    )


# Run computations
rmpx_acs_runs = run_rmpx_with_acs()
rmpx_acs2_runs = run_rmpx_with_acs2()
rmpx_acs2_oiq_runs = run_rmpx_with_acs2_oiq()
rmpx_acs2_ga_runs = run_rmpx_with_acs2_ga()
rmpx_acs2_ga_oiq_runs = run_rmpx_with_acs2_ga_oiq()
rmpx_yacs_runs = run_rmpx_with_yacs()
rmpx_dynaq_runs = run_rmpx_with_dynaq()

# Collect metrics to single dataframe
metrics_df = pd.concat([
    *[parse_lcs_metrics('acs', metrics) for _, metrics in rmpx_acs_runs],
    *[parse_lcs_metrics('acs2', metrics) for _, metrics in rmpx_acs2_runs],
    *[parse_lcs_metrics('acs2_oiq', metrics) for _, metrics in rmpx_acs2_oiq_runs],
    *[parse_lcs_metrics('acs2_ga', metrics) for _, metrics in rmpx_acs2_ga_runs],
    *[parse_lcs_metrics('acs2_ga_oiq', metrics) for _, metrics in rmpx_acs2_ga_oiq_runs],
    *[parse_lcs_metrics('yacs', metrics) for _, metrics in rmpx_yacs_runs],
    *[parse_dyna_metrics('dynaq', metrics) for _, _, metrics in rmpx_dynaq_runs],
])
metrics_df.set_index(['agent', 'trial'], inplace=True)

# Average them by agent and trial
metrics_averaged_df = metrics_df.groupby(['agent', 'trial']).mean()

## Plot

In [6]:
# style
def plot_comparison(df, plot_filename=None):
    COLORS = discretized_algorithms_colors()
    ALGS_NO = len(COLORS)

    fig, axs = plt.subplots(2, 2, figsize=(22, 16))

    # Line styles
    # marker = metrics.index.get_level_values(1).max() / 10
    marker = 10
    mark_every = (np.linspace(0, marker, ALGS_NO) + marker).astype(int)
    line_props = {
        'linewidth': 3,
        'markersize': 9
    }

    acs_line_props = {'label': 'ACS', 'color': COLORS['acs'], 'marker': 'x', 'markevery': mark_every[0], **line_props}
    acs2_line_props = {'label': 'ACS2', 'color': COLORS['acs2'], 'marker': 'v', 'markevery': mark_every[1],
                       **line_props}
    acs2_oiq_line_props = {'label': 'ACS2_OIQ', 'color': COLORS['acs2_oiq'], 'marker': 'v', 'markevery': mark_every[2],
                           **line_props}
    acs2_ga_line_props = {'label': 'ACS2_GA', 'color': COLORS['acs2_ga'], 'marker': 's', 'markevery': mark_every[3],
                          **line_props}
    acs2_ga_oiq_line_props = {'label': 'ACS2_GA_OIQ', 'color': COLORS['acs2_ga_oiq'], 'marker': 's',
                              'markevery': mark_every[4], **line_props}
    yacs_line_props = {'label': 'YACS', 'color': COLORS['yacs'], 'marker': 'o', 'markevery': mark_every[5],
                       **line_props}
    dynaq_line_props = {'label': 'DynaQ', 'color': COLORS['dynaq'], 'marker': 'D', 'markevery': mark_every[6],
                        **line_props}

    df['knowledge_100'] = df['knowledge'] * 100
    df['generalization_100'] = df['generalization'] * 100

    # Population
    df.loc['acs']['population'].plot(ax=axs[0, 0], **acs_line_props)
    df.loc['acs2']['population'].plot(ax=axs[0, 0], **acs2_line_props)
    df.loc['acs2_oiq']['population'].plot(ax=axs[0, 0], **acs2_oiq_line_props)
    df.loc['acs2_ga']['population'].plot(ax=axs[0, 0], **acs2_ga_line_props)
    df.loc['acs2_ga_oiq']['population'].plot(ax=axs[0, 0], **acs2_ga_oiq_line_props)
    df.loc['yacs']['population'].plot(ax=axs[0, 0], **yacs_line_props)
    df.loc['dynaq']['population'].plot(ax=axs[0, 0], **dynaq_line_props)
    axs[0, 0].set_title('Population size')
    axs[0, 0].set_ylabel('Number of rules/classifiers')
    #     axs[0, 0].legend(loc='best', frameon=False)

    # Knowledge
    axs[0, 1].set_title('Knowledge')
    df.loc['acs']['knowledge_100'].plot(ax=axs[0, 1], **acs_line_props)
    df.loc['acs2']['knowledge_100'].plot(ax=axs[0, 1], **acs2_line_props)
    df.loc['acs2_oiq']['knowledge_100'].plot(ax=axs[0, 1], **acs2_oiq_line_props)
    df.loc['acs2_ga']['knowledge_100'].plot(ax=axs[0, 1], **acs2_ga_line_props)
    df.loc['acs2_ga_oiq']['knowledge_100'].plot(ax=axs[0, 1], **acs2_ga_oiq_line_props)
    df.loc['yacs']['knowledge_100'].plot(ax=axs[0, 1], **yacs_line_props)
    df.loc['dynaq']['knowledge_100'].plot(ax=axs[0, 1], **dynaq_line_props)
    #     axs[0, 1].legend(loc='lower right', frameon=False)
    axs[0, 1].yaxis.set_major_formatter(mtick.PercentFormatter())

    # Generalization
    axs[1, 0].set_title('Generalization')
    df.loc['acs']['generalization_100'].plot(ax=axs[1, 0], **acs_line_props)
    df.loc['acs2']['generalization_100'].plot(ax=axs[1, 0], **acs2_line_props)
    df.loc['acs2_oiq']['generalization_100'].plot(ax=axs[1, 0], **acs2_oiq_line_props)
    df.loc['acs2_ga']['generalization_100'].plot(ax=axs[1, 0], **acs2_ga_line_props)
    df.loc['acs2_ga_oiq']['generalization_100'].plot(ax=axs[1, 0], **acs2_ga_oiq_line_props)
    df.loc['yacs']['generalization_100'].plot(ax=axs[1, 0], **yacs_line_props)
    df.loc['dynaq']['generalization_100'].plot(ax=axs[1, 0], **dynaq_line_props)
    #     axs[1, 0].legend(loc='best', frameon=False)
    axs[1, 0].yaxis.set_major_formatter(mtick.PercentFormatter())

    # Trial time
    times = df.groupby('agent')['time'].mean().to_dict()

    labels = ['ACS', 'ACS2', 'ACS2_OIQ', 'ACS2_GA', 'ACS2_GA_OIQ', 'YACS', 'DynaQ']
    values = [times['acs'], times['acs2'], times['acs2_oiq'], times['acs2_ga'], times['acs2_ga_oiq'], times['yacs'],
              times['dynaq']]
    colors = [COLORS['acs'], COLORS['acs2'], COLORS['acs2_oiq'], COLORS['acs2_ga'], COLORS['acs2_ga_oiq'],
              COLORS['yacs'], COLORS['dynaq']]

    axs[1, 1].bar(labels, values, color=colors)
    axs[1, 1].set_xticklabels(labels, rotation=60)
    axs[1, 1].set_title('Average trial time')
    axs[1, 1].set_ylabel('Seconds [s]')

    # create some space below the plots by increasing the bottom-value
    fig.subplots_adjust(top=0.9, left=0.1, right=0.9, bottom=0.16)

    # Global legend
    handles, labels = axs[1, 0].get_legend_handles_labels()
    fig.legend(handles, labels, ncol=len(values), loc='lower center', prop={'size': 23})

    if plot_filename:
        fig.savefig(plot_filename, dpi=PLOT_DPI, bbox_inches='tight')


plot_comparison(metrics_averaged_df, plot_filename=f'{plot_dir}/rmpx_3bit_discretized_performance.png')

  axs[1, 1].set_xticklabels(labels, rotation=60)


:::{figure-md} rmpx3bit-discretized-performance-fig
<img src="plots/rmpx_3bit_discretized_performance.png">

Performance of 3bit discretized rMPX
:::