In [1]:
# ACS2 in Heist environment

In [2]:
from __future__ import unicode_literals
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
from pylab import cm
import pathlib
import numpy as np
import pandas as pd

import logging
logging.basicConfig(level=logging.WARN)

from lcs import Perception
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList
from lcs.metrics import population_metrics

import gym

In [3]:
from utils.cache import get_from_cache_or_run

%run utils/experiments.py
%run utils/metrics.py
%run utils/plots.py

In [4]:
cache_prefix = f'{pathlib.Path().absolute()}/cache/'

## Heist Environment

In [5]:
env_maxpool = gym.make('gym_autoencoder.heist.envs:heist-auto-maxpool-v0')
env_maxpool.reset()
env_maxpool.render()

  "Future gym versions will require that `Env.reset` can be passed a `seed` instead of using `Env.seed` for resetting the environment random number generator."
  "Future gym versions will require that `Env.reset` can be passed `options` to allow the environment initialisation to be passed additional information."
  f"The result returned by `env.reset()` was not a tuple of the form `(obs, info)`, where `obs` is a observation and `info` is a dictionary containing additional information. Actual type: `{type(result)}`"
  "No render modes was declared in the environment (env.metadata['render_modes'] is None or not defined), you may have trouble when calling `.render()`."


In [6]:
env_maxpoolbig = gym.make('gym_autoencoder:heist-auto-maxpool-big-v0')
env_maxpoolbig.reset()
env_maxpoolbig.render()


In [7]:
env_vaealex = gym.make('gym_autoencoder:heist-vae-alex-v0')
env_vaealex.reset()
env_vaealex.render()


In [8]:
state, reward, done, _ = env_maxpool.step(env_maxpool.action_space.sample())

  "Core environment is written in old step API which returns one bool instead of two. "
  f"{pre} was expecting numpy array dtype to be {observation_space.dtype}, actual type: {obs.dtype}"


## Environment setup

In [9]:
class DiscretizedWrapper(gym.ObservationWrapper):
    def __init__(self, env, bins=10):
        super().__init__(env)
        self.bins = bins
        self.observation_space = gym.spaces.Discrete(bins)
        
        if isinstance(self.env.observation_space, gym.spaces.Box):
            self._range = np.ptp(self.env.observation_space.high - self.env.observation_space.low)
        elif isinstance(self.env.observation_space, gym.spaces.Discrete):
            self._range = self.env.observation_space.n - 1
        else:
            raise NotImplementedError("Observation space type not supported for discretization")

    def discretize(self, obs):
        # Ensure obs is a NumPy array
        obs = np.asarray(obs)

        # Normalize observation values to [0, 1]
        normalized_obs = (obs - self.env.observation_space.low) / self._range

        # Discretize normalized values
        discretized_obs = (normalized_obs * self.bins).astype(int)

        return discretized_obs

    def observation(self, obs):
        # Handle tuple observations
        if isinstance(obs, tuple):
            obs = obs[0]

        return self.discretize(obs)


In [10]:
print("Observation Space from environment:", env_maxpool.observation_space)  # Add this line
env_maxpool = DiscretizedWrapper(env_maxpool)
print("Observation Space from environment:", env_maxpoolbig.observation_space)  # Add this line
env_maxpoolbig = DiscretizedWrapper(env_maxpoolbig)
print("Observation Space from environment:", env_vaealex.observation_space)  # Add this line
env_vaealex = DiscretizedWrapper(env_vaealex)

Observation Space from environment: Box(-inf, inf, (32,), float64)
Observation Space from environment: Box(-inf, inf, (32,), float64)
Observation Space from environment: Box(-inf, inf, (64,), float64)


  out


## Basic Metrics

In [11]:
learning_rate = 0.2
discount_factor = 0.95
epsilon = 0.2
zeta = 0.0001

basic_cfg = {
    'perception_bits': 16,
    'possible_actions': 9,
    'do_ga': False,
    'beta': learning_rate,
    'epsilon': epsilon,
    'gamma': discount_factor,
    'zeta': zeta,
    'user_metrics_collector_fcn': common_metrics,
    'biased_exploration_prob': 0,
    'metrics_trial_freq': 1
}

trials = 4000
experiments = 5

In [12]:
result = get_from_cache_or_run(
    r'..\cache\4000trials-maxpool-maxpoolbig-vaealex-5exp.dill',
    avg_experiments(experiments, envs={"MaxPool": env_maxpool, "MaxPoolBig": env_maxpoolbig, "VAEAlex": env_vaealex}, trials=trials, params=basic_cfg,fun=run_experiments_alternating)
)



ValueError: too many values to unpack (expected 2)

### MaxPool

In [None]:
result["MaxPool"]["agg_df"]

In [None]:
plot_exploit_results(result["MaxPool"]["agg_df"])

### MaxPoolBig

In [None]:
result["MaxPoolBig"]["agg_df"]

In [None]:
plot_exploit_results(result["MaxPoolBig"]["agg_df"])

### VAEAlex

In [None]:
result["VAEAlex"]["agg_df"]

In [None]:
plot_exploit_results(result["VAEAlex"]["agg_df"])