# Wrapper Debugging

Debug and play around with wrappers in the oil extraction environment

In [2]:
import os
import sys
sys.path.append(os.path.dirname((os.path.abspath(""))))

import numpy as np

from src.envs.oil_extraction import OilField
from src.utils.utils import point_from_gaussian_process
from src.wrapper.random_replacement import RandomReplacementWrapper
from src.wrapper.euclidean_projection import EuclideanProjectionWrapper
from src.wrapper.discretization import DiscretizationWrapper
from src.wrapper.continuous_masking import ContinuousMaskingWrapper
from src.wrapper.parametrized_discretization import ParametrizedDiscretizationWrapper
from src.wrapper.hierarchical import HierarchicalWrapper

In [2]:
from src.utils.utils import sample_from_gaussian_process
from sklearn.gaussian_process.kernels import RBF
import numpy as np

x_sample, y_sample = sample_from_gaussian_process(0.0, 40.0, 600, RBF(3), 18)

env_config = {
    'LENGTH': 40.0,
    'LENGTH_PUMP': 4.0,
    'LENGTH_PUMP_STD': 1.0,
    'LENGTH_PUMP_MIN': 1.0,
    'LENGTH_PUMP_MAX': 6.0,
    'STEPS_PER_EPISODE': 100,
    'EFFECTIVENESS_MEAN': 1.0,
    'EFFECTIVENESS_STD': 0.3,
    'EFFECTIVENESS_MIN': 0.5,
    'EFFECTIVENESS_MAX': 2.0,
    'DURATION_MEAN': 3,
    'DURATION_MIN': 1,
    'DURATION_MAX': 5,
    'GAUSSIAN_NOISE': 0.2,
    'valid_action_reward': lambda x: np.interp(x, x_sample, y_sample.T[0]),
    'invalid_action_penalty': lambda x: -10 * x,
    'END_ON_COLLISION': False
}

### Random Replacement

In [3]:
env = RandomReplacementWrapper(OilField(env_config))
env.reset()

{'observation': {'effectiveness': array([1.2475559], dtype=float32),
  'duration': 5,
  'length': array([3.7147079], dtype=float32)},
 'allowed_actions': array([[ 1.8573539, 38.142647 ]], dtype=float32)}

In [4]:
env.step(np.array([5.0], dtype=np.float32))

({'observation': {'effectiveness': array([0.7881487], dtype=float32),
   'duration': 4,
   'length': array([4.6994157], dtype=float32)},
  'allowed_actions': array([[ 9.207062, 37.65029 ]], dtype=float32)},
 -1.8503860215094703,
 False,
 {'fraction_allowed_actions': 0.9071323,
  'allowed_interval_length': 36.285294,
  'number_intervals': 1,
  'interval_avg': 36.28529357910156,
  'interval_min': 36.285294,
  'interval_max': 36.285294,
  'interval_variance': 0.0,
  'Executed': array([5.], dtype=float32),
  'invalid': False})

In [5]:
env.step(np.array([6.0], dtype=np.float32))

({'observation': {'effectiveness': array([0.88949645], dtype=float32),
   'duration': 3,
   'length': array([5.2511244], dtype=float32)},
  'allowed_actions': array([[16.055637, 37.37444 ]], dtype=float32)},
 -2.4930010216514273,
 False,
 {'fraction_allowed_actions': 0.71108073,
  'allowed_interval_length': 28.44323,
  'number_intervals': 1,
  'interval_avg': 28.44322967529297,
  'interval_min': 28.44323,
  'interval_max': 28.44323,
  'interval_variance': 0.0,
  'Executed': array([11.080368], dtype=float32),
  'invalid': True})

### Euclidean Projection

In [7]:
env = EuclideanProjectionWrapper(OilField(env_config))
env.reset()

{'observation': {'effectiveness': array([0.763445], dtype=float32),
  'duration': 3,
  'length': array([4.317965], dtype=float32)},
 'allowed_actions': array([[ 2.1589825, 37.84102  ]], dtype=float32)}

In [8]:
env.step(np.array([5.0], dtype=np.float32))

({'observation': {'effectiveness': array([0.7028257], dtype=float32),
   'duration': 1,
   'length': array([3.2217672], dtype=float32)},
  'allowed_actions': array([[ 8.769866, 38.38912 ]], dtype=float32)},
 -1.2091880357407405,
 False,
 {'fraction_allowed_actions': 0.8920509,
  'allowed_interval_length': 35.682037,
  'number_intervals': 1,
  'interval_avg': 35.682037353515625,
  'interval_min': 35.682037,
  'interval_max': 35.682037,
  'interval_variance': 0.0,
  'Executed': array([5.], dtype=float32),
  'invalid': False})

In [9]:
env.step(np.array([6.0], dtype=np.float32))

({'observation': {'effectiveness': array([1.3312049], dtype=float32),
   'duration': 5,
   'length': array([3.3980525], dtype=float32)},
  'allowed_actions': array([[ 8.858008, 38.300972]], dtype=float32)},
 -7.287262704380737e-07,
 False,
 {'fraction_allowed_actions': 0.74048126,
  'allowed_interval_length': 29.619251,
  'number_intervals': 1,
  'interval_avg': 29.619251251220703,
  'interval_min': 29.619251,
  'interval_max': 29.619251,
  'interval_variance': 0.0,
  'Executed': array([8.769866], dtype=float32),
  'invalid': True})

### Discretization

In [10]:
env = DiscretizationWrapper(OilField(env_config), 9)

In [11]:
env.discrete_actions

array([ 0.,  5., 10., 15., 20., 25., 30., 35., 40.])

In [18]:
env.reset()

{'observation': {'effectiveness': array([1.1463504], dtype=float32),
  'duration': 5,
  'length': array([3.3532012], dtype=float32)},
 'allowed_actions': array([0., 1., 1., 1., 1., 1., 1., 1., 0.])}

In [19]:
env.step(3)

({'observation': {'effectiveness': array([1.2515774], dtype=float32),
   'duration': 2,
   'length': array([4.2507544], dtype=float32)},
  'allowed_actions': array([0., 1., 1., 0., 1., 1., 1., 1., 0.])},
 -0.5668426757953623,
 False,
 {'fraction_allowed_actions': 0.91616994,
  'allowed_interval_length': 36.646797,
  'number_intervals': 1,
  'interval_avg': 36.64679718017578,
  'interval_min': 36.646797,
  'interval_max': 36.646797,
  'interval_variance': 0.0,
  'invalid': False,
  'atomic_action': [15.0]})

In [20]:
env.step(4)

({'observation': {'effectiveness': array([1.0555031], dtype=float32),
   'duration': 1,
   'length': array([4.2611084], dtype=float32)},
  'allowed_actions': array([0., 1., 1., 0., 0., 1., 1., 1., 0.])},
 -1.5146670967368172,
 False,
 {'fraction_allowed_actions': 0.70363224,
  'allowed_interval_length': 28.14529,
  'number_intervals': 2,
  'interval_avg': 14.07264518737793,
  'interval_min': 9.072645,
  'interval_max': 19.072645,
  'interval_variance': 25.0,
  'invalid': False,
  'atomic_action': [20.0]})

### Parametrized Discretization

In [21]:
env = ParametrizedDiscretizationWrapper(OilField(env_config), 9)

In [22]:
env.reset()

{'observation': {'effectiveness': array([0.9077971], dtype=float32),
  'duration': 4,
  'length': array([3.5186286], dtype=float32)},
 'allowed_actions': array([0., 1., 1., 1., 1., 1., 1., 1., 0.])}

In [23]:
env.step([28.61])

({'observation': {'effectiveness': array([0.9116158], dtype=float32),
   'duration': 2,
   'length': array([2.9137537], dtype=float32)},
  'allowed_actions': array([0., 1., 1., 1., 1., 0., 0., 0., 0.])},
 1.0752163814665883,
 False,
 {'fraction_allowed_actions': array([0.9120342], dtype=float32),
  'allowed_interval_length': 36.48137,
  'number_intervals': 1,
  'interval_avg': 36.48136901855469,
  'interval_min': 36.48137,
  'interval_max': 36.48137,
  'interval_variance': 0.0,
  'invalid': False})

In [24]:
env.step([12.5])

({'observation': {'effectiveness': array([1.2406287], dtype=float32),
   'duration': 1,
   'length': array([3.3321443], dtype=float32)},
  'allowed_actions': array([0., 1., 0., 0., 1., 0., 0., 0., 0.])},
 0.42905687725091135,
 False,
 {'fraction_allowed_actions': array([0.7663466], dtype=float32),
  'allowed_interval_length': 30.653862,
  'number_intervals': 2,
  'interval_avg': 15.32693099975586,
  'interval_min': 6.7169304,
  'interval_max': 23.936932,
  'interval_variance': 74.13211,
  'invalid': False})

### Continuous Action Masking

In [25]:
env = ContinuousMaskingWrapper(OilField(env_config))

In [26]:
env.reset()

{'observation': {'effectiveness': array([1.0422693], dtype=float32),
  'duration': 2,
  'length': array([3.2395933], dtype=float32)},
 'allowed_actions': array([[ 1.6197966, 38.380203 ]], dtype=float32)}

In [27]:
env.step([1.0])

({'observation': {'effectiveness': array([1.1286398], dtype=float32),
   'duration': 4,
   'length': array([2.6818686], dtype=float32)},
  'allowed_actions': array([[ 5.499538, 38.659065]], dtype=float32)},
 -1.1564225538015311,
 False,
 {'fraction_allowed_actions': 0.91901016,
  'allowed_interval_length': 36.760406,
  'number_intervals': 1,
  'interval_avg': 36.760406494140625,
  'interval_min': 36.760406,
  'interval_max': 36.760406,
  'interval_variance': 0.0,
  'Executed': array([2.5388068]),
  'invalid': False})

In [28]:
env.step([39.0])

({'observation': {'effectiveness': array([1.4951355], dtype=float32),
   'duration': 5,
   'length': array([2.571849], dtype=float32)},
  'allowed_actions': array([[ 1.2859246, 35.20322  ]], dtype=float32)},
 0.20850990220575127,
 False,
 {'fraction_allowed_actions': 0.8289882,
  'allowed_interval_length': 33.159527,
  'number_intervals': 1,
  'interval_avg': 33.15952682495117,
  'interval_min': 33.159527,
  'interval_max': 33.159527,
  'interval_variance': 0.0,
  'Executed': array([37.83007753]),
  'invalid': False})