In [1]:
import sys

# put custom scripts to module path
sys.path.append('../../..')

In [3]:
import pathlib

import gym
import gym_multiplexer  # noqa: F401
import lcs.agents.racs as racs
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
from lcs.representations.RealValueEncoder import RealValueEncoder
from src.decorators import repeat, get_from_cache_or_run

from typing import List

plt.ioff()  # turn off interactive plotting

cache_dir = f'{pathlib.Path().absolute()}/cache'
plot_dir = f'{pathlib.Path().absolute()}/plots'
PLOT_DPI = 200


def encode(p, bits):
    return int(RealValueEncoder(bits).encode(p))

def metrics_to_df(metrics: List) -> pd.DataFrame:
    lst = [[d['trial'], d['reward'], d['population'], d['reliable']] for d in metrics]

    df = pd.DataFrame(lst, columns=['trial', 'reward', 'population', 'reliable'])
    df = df.set_index('trial')
    df['phase'] = df.index.map(lambda t: "explore" if t % 2 == 0 else "exploit")

    return df

def average_experiment_runs(runs_dfs: List[pd.DataFrame]) -> pd.DataFrame:
    return pd.concat(runs_dfs).groupby(['trial', 'phase']).mean().reset_index(level='phase')

def single_rmpx_experiment(env_provider, encoder_bits, trials):
    env = env_provider()
    env.reset()

    def _metrics(agent, environment):
        population = agent.population
        return {
            'population': len(population),
            'numerosity': sum(cl.num for cl in population),
            'reliable': len([cl for cl in population if cl.is_reliable()])
        }

    cfg = racs.Configuration(
        classifier_length=env.observation_space.shape[0],
        number_of_possible_actions=env.action_space.n,
        encoder=RealValueEncoder(encoder_bits),
        metrics_trial_frequency=5,
        user_metrics_collector_fcn=_metrics,
        epsilon=1.0,  # no biased exploration
        do_ga=True,
        theta_r=0.9,
        theta_i=0.2,
        theta_ga=100,
        cover_noise=0,
        mutation_noise=0.25,
        chi=1.0,
        mu=0.1)

    # create agent
    agent = racs.RACS(cfg)

    # run computations
    metrics = agent.explore_exploit(env, trials)

    return metrics_to_df(metrics)

def plot(df, fig_title, plot_filename=None):
    major_ticker_freq = 4000

    # separate explore/exploit data
    explore_df = df[df['phase'] == 'explore']
    exploit_df = df[df['phase'] == 'exploit']

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
    fig.suptitle(fig_title, fontsize=14)

    # plot 1 - average reward
    explore_df['reward'].rolling(window=50).mean().plot(label='explore', ax=ax1)
    exploit_df['reward'].rolling(window=50).mean().plot(label='exploit', ax=ax1)

    ax1.axhline(y=500, color='black', linestyle='--', linewidth=1)
    ax1.axhline(y=1000, color='black', linestyle='--', linewidth=1)
    ax1.set_title('Average reward')
    ax1.set_xlabel('Trial')
    ax1.set_ylabel('Average reward')
    ax1.set_ylim(400, 1100)
    ax1.xaxis.set_major_locator(ticker.MultipleLocator(major_ticker_freq))
    ax1.legend()

    # plot 2 - number of classifiers
    df['population'].plot(label='population', ax=ax2)
    df['reliable'].plot(label='reliable', ax=ax2)

    ax2.set_title("Classifiers evolution")
    ax2.set_xlabel('Trial')
    ax2.set_ylabel('# Classifiers')
    ax2.xaxis.set_major_locator(ticker.MultipleLocator(major_ticker_freq))
    ax2.legend()

    if plot_filename:
        fig.savefig(plot_filename, dpi=PLOT_DPI)

# Experiment 1 - Encoding precision
Info about algorithms, environments used, metrics collected, methods, reproducibility, statistical verification

Due to the increased complexity for larger MPX the questions (impact of bits) will be validated on smaller instance


## Setup
..

In [4]:
NUM_EXPERIMENTS = 50
TRIALS = 20_000
USE_RAY = True

def rmpx3bit_env_provider():
    import gym_multiplexer  # noqa: F401
    return gym.make('real-multiplexer-3bit-v0')

@get_from_cache_or_run(cache_path=f'{cache_dir}/rmpx_3bit/encoding_1bit.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_3bit_rmpx_1bit_encoding():
    return single_rmpx_experiment(rmpx3bit_env_provider, encoder_bits=1, trials=TRIALS)

@get_from_cache_or_run(cache_path=f'{cache_dir}/rmpx_3bit/encoding_2bit.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_3bit_rmpx_2bit_encoding():
    return single_rmpx_experiment(rmpx3bit_env_provider, encoder_bits=2, trials=TRIALS)

@get_from_cache_or_run(cache_path=f'{cache_dir}/rmpx_3bit/encoding_3bit.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_3bit_rmpx_3bit_encoding():
    return single_rmpx_experiment(rmpx3bit_env_provider, encoder_bits=3, trials=TRIALS)

@get_from_cache_or_run(cache_path=f'{cache_dir}/rmpx_3bit/encoding_4bit.dill')
@repeat(num_times=NUM_EXPERIMENTS, use_ray=USE_RAY)
def run_3bit_rmpx_4bit_encoding():
    return single_rmpx_experiment(rmpx3bit_env_provider, encoder_bits=4, trials=TRIALS)

# run computations
rmpx3bit_encoding_1bit_results = run_3bit_rmpx_1bit_encoding()
rmpx3bit_encoding_2bit_results = run_3bit_rmpx_2bit_encoding()
rmpx3bit_encoding_3bit_results = run_3bit_rmpx_3bit_encoding()
rmpx3bit_encoding_4bit_results = run_3bit_rmpx_4bit_encoding()

In [5]:
%%capture

plot(average_experiment_runs(rmpx3bit_encoding_1bit_results), '3bit rMPX with 1bit UBR encoding', plot_filename=f'{plot_dir}/rmpx_3bit_encoding_1bit.png')
plot(average_experiment_runs(rmpx3bit_encoding_2bit_results), '3bit rMPX with 2bit UBR encoding', plot_filename=f'{plot_dir}/rmpx_3bit_encoding_2bit.png')
plot(average_experiment_runs(rmpx3bit_encoding_3bit_results), '3bit rMPX with 3bit UBR encoding', plot_filename=f'{plot_dir}/rmpx_3bit_encoding_3bit.png')
plot(average_experiment_runs(rmpx3bit_encoding_4bit_results), '3bit rMPX with 4bit UBR encoding', plot_filename=f'{plot_dir}/rmpx_3bit_encoding_4bit.png')

:::{figure-md} rmpx3bit-enc1bit-fig
:class: full-width
<img src="plots/rmpx_3bit_encoding_1bit.png">

Performance in 3bit rMPX UBR with 1bit
:::

:::{figure-md} rmpx3bit-enc2bit-fig
:class: full-width
<img src="plots/rmpx_3bit_encoding_2bit.png">

Performance in 3bit rMPX UBR with 2bit
:::

:::{figure-md} rmpx3bit-enc3bit-fig
:class: full-width
<img src="plots/rmpx_3bit_encoding_3bit.png">

Performance in 3bit rMPX UBR with 3bit
:::

:::{figure-md} rmpx3bit-enc4bit-fig
:class: full-width
<img src="plots/rmpx_3bit_encoding_4bit.png">

Performance in 3bit rMPX UBR with 4bit
:::

## Observations
What conclusions can be drawn?

In [6]:
def encode_array(arr, bits):
    return np.fromiter((encode(x, bits=bits) for x in arr), int)


def plot_checkerboard_splits(splits, bits, points=100):
    fig = plt.figure(figsize=(12, 5))
    ax = fig.add_subplot(111)

    # Visualize splits
    for k in np.linspace(0, 1, splits + 1):
        ax.axvline(x=k, ymin=0, ymax=1, linewidth=1, linestyle=':', color='black')

    # Add some points
    x = np.random.random(points)
    y = np.random.random(points)
    colors = encode_array(x, bits)
    ax.scatter(x, y, c=colors, s=20, alpha=.8)

    for i, txt in enumerate(colors):
        ax.annotate(txt, xy=(x[i] + .005, y[i] + .005), size=8, alpha=.8)

    ax.set_title(f"{bits}-bit encoder mapping on {splits} checkerboard splits")
    ax.set_xlabel("Perception value")
    ax.set_ylim(-0.2, 1.2)
    ax.get_yaxis().set_visible(False)

    fig.savefig(f'{plot_dir}/checkerboard_{splits}_splits_{bits}_bits.png', dpi=PLOT_DPI)

In [7]:
%%capture
plot_checkerboard_splits(splits=3, bits=4)

:::{figure-md} checkerboard-3-splits-4-bits
:class: full-width
<img src="plots/checkerboard_3_splits_4_bits.png">

Checkerboard splits
:::

To highlight the impact of selecting proper bit number for the encoding the following table highlights the number of distinct values for each perception step (increased by `0.1`).

In [8]:
r = np.arange(0, 1.1, .1)

df = pd.DataFrame(r, columns=['Perception'])

for bit in [1, 2, 3, 4, 5, 6, 7]:
    df[f'{bit}-bit'] = df.apply(lambda row: encode(row['Perception'], bit), axis=1)

df.set_index('Perception', inplace=True)
df

Unnamed: 0_level_0,1-bit,2-bit,3-bit,4-bit,5-bit,6-bit,7-bit
Perception,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,0,0,0,0,0,0,0
0.1,0,0,0,1,3,6,12
0.2,0,0,1,3,6,12,25
0.3,0,1,2,4,9,19,38
0.4,0,1,3,6,12,25,51
0.5,1,2,4,8,16,32,64
0.6,1,2,4,9,19,38,76
0.7,1,2,5,11,22,44,89
0.8,1,3,6,12,25,51,102
0.9,1,3,7,14,28,57,115
