In [2]:
import collections
import numpy as np 
import gymnasium as gym

#### $ Discretization : $ 
- Discretization is the process of transforming continuous data or variables into discrete or categorical data or variables. In other words, it involves breaking down a continuous variable into distinct groups or categories. This is often done in order to simplify data analysis, as working with discrete values can be more efficient and easier to interpret than working with continuous values. Discretization is commonly used in fields such as statistics, data analysis, and machine learning, where continuous data must be converted into a form that can be processed by algorithms or models. There are various methods of discretization, including binning, clustering, and decision trees.

#### $ Classes \ and \ Functions $

In [55]:
class DiscretizedEnvironment(gym.Wrapper):
    """
    class that discretize continuos data into categorical 

    arguments:
    - env : gym.make environment object
    - n_bins: int, number of bins to discretize

    return :

    
    """

    def __init__(self, env: gym.make, n_bins : int = 10):
        super().__init__(env)
        self.n_bins = n_bins 

        # discretize observation space
        high = env.observation_space.high
        low = env.observation_space.low
        self.observation_space = gym.spaces.Discrete(n_bins ** len(high))   # ============ Reason ===========
                                                                            # We calculate the total number of possible states by taking the product of the number of bins for each dimension.
                                                                            # For example, if n_bins is 10 and there are two dimensions in the observation space, then the total number of possible states is 10 ** 2 = 100.
                                                                            # ===============================
        # Define Bins for each dimension         
        self.observation_bins = [np.linspace(low[i], high[i], n_bins +1)[1:-1] for i in range(len(low))]    # ============ Reason ===========
                                                                                                            # low and high are arrays that represent the lower and upper bounds of each dimension
                                                                                                            # np.linspace takes three arguments: the start value (low[i]), the end value (high[i]), and the number of intervals (n_bins + 1) between the start and end values. 
                                                                                                            # We add 1 to n_bins because we want to include both the lower and upper bounds in the bins.
                                                                                                            # We then select only the inner bins (excluding the lower and upper bounds) using the slicing notation [1:-1].
                                                                                                            # We do this for each dimension in the observation space by iterating over range(len(low))                                                                                        
        # Define action space                                                                               # ===========================================================================================
        self.action_space = gym.spaces.Discrete(3) # 3 discrete actions: push left, do nothing, push right
         
    def _discretize_observation(self, obs):
        """ discretize the space
        
        arguments:
        - obs : observation space
        
        return 
        - state
        
        """

        # convert continuous spaces to discrete
        state = 0
        for i, b in enumerate(self.observation_bins):
            state += np.digitize(obs[i], b) * ((self.n_bins) ** i)
        return state
    
    def reset(self):
        obs = self.env.reset()[0]
        return self._discretize_observation(obs)
    
    def step(self, action):
        observation, reward, terminated, truncated, info = self.env.step(action-1)
        return self._discretize_observation(observation), reward, terminated, truncated, info
        


#### $ Solving \ MountainCar \ Problem $

In [56]:
ENV = {'id' :"MountainCar-v0", 'render_mode':'human'}
env = gym.make(**ENV)

env_ = DiscretizedEnvironment(gym.make(**ENV))

In [57]:
env_.reset()
env_.step(1)

  if not isinstance(terminated, (bool, np.bool8)):


(43, -1.0, False, False, {})

In [53]:
env.reset()
env.step(1)

  if not isinstance(terminated, (bool, np.bool8)):


(array([-5.2406925e-01,  3.5550747e-06], dtype=float32),
 -1.0,
 False,
 False,
 {})