A reinforcement learning environment is basically a simulator or a real world system  designed mainly for to support the model based algorithm to make optimal decisions and Model-based Reinforcement Learning  represents of the environment's dynamics to plan actions. The environment must provide a structure that enables the agent to simulate future outcomes and make informed decisions based on predictions.

In [20]:
import gym
import numpy as np
from gym import spaces 
import matplotlib.pyplot as plt


In [None]:
class mountain_car(gym.env):
    def __init__(self):
        super(mountain_car,self).__init__()

In [22]:
import numpy as np
import gym
from gym import spaces

class MountainCarEnv(gym.Env):
    def __init__(self):
        super(MountainCarEnv, self).__init__()
        self.gravity = -0.0025  # Constant gravity force
        self.mass = 1.0         # Car mass (affects acceleration)
        self.max_speed = 0.07   # Max speed of the car
        self.max_position = 0.6 # Rightmost position (goal)
        self.min_position = -1.2 # Leftmost position
        self.goal_position = 0.5 # The goal position (on top of the hill)

        # State: [position, velocity]
        self.state = np.array([0.0, 0.0])
        
        # Action space: 3 discrete actions (left, right, no action)
        self.action_space = spaces.Discrete(3)
        
        # Observation space: position and velocity
        self.observation_space = spaces.Box(low=np.array([self.min_position, -self.max_speed]),
                                            high=np.array([self.max_position, self.max_speed]),
                                            dtype=np.float32)
    
    def step(self, action):
        # Get current state
        position, velocity = self.state
        
        # Apply action: -1 (left), 0 (no action), 1 (right)
        if action == 0:
            acceleration = -0.001
        elif action == 2:
            acceleration = 0.001
        else:
            acceleration = 0.0
        
        # Update velocity and position based on gravity and action
        velocity += acceleration + self.gravity * np.cos(3 * position)
        velocity = np.clip(velocity, -self.max_speed, self.max_speed)  # Limit speed
        position += velocity
        position = np.clip(position, self.min_position, self.max_position)  # Limit position
        
        # Update the state
        self.state = np.array([position, velocity])
        
        # Reward: +1 if we reach the goal, -1 otherwise
        reward = -1  # Penalty for each step to encourage faster solutions
        done = False
        if position >= self.goal_position:
            reward = 1  # Reached the goal
            done = True
        
        return self.state, reward, done, {}
    
    def reset(self):
        # Start with a random position and velocity
        self.state = np.array([np.random.uniform(-0.6, -0.4), 0.0])
        return self.state
    
    def render(self, mode="human"):
        # Simple render that just prints the state
        position, velocity = self.state
        print(f"Position: {position:.3f}, Velocity: {velocity:.3f}")
        
# Example usage
env = MountainCarEnv()

# Reset the environment
state = env.reset()
done = False

while not done:
    # Random action selection (for illustration)
    action = env.action_space.sample()
    state, reward, done, _ = env.step(action)
    env.render()




Position: -0.579, Velocity: 0.001
Position: -0.578, Velocity: 0.002
Position: -0.576, Velocity: 0.001
Position: -0.576, Velocity: 0.001
Position: -0.575, Velocity: 0.001
Position: -0.574, Velocity: 0.000
Position: -0.573, Velocity: 0.001
Position: -0.572, Velocity: 0.001
Position: -0.572, Velocity: 0.001
Position: -0.571, Velocity: 0.001
Position: -0.571, Velocity: 0.000
Position: -0.570, Velocity: 0.001
Position: -0.568, Velocity: 0.002
Position: -0.566, Velocity: 0.002
Position: -0.562, Velocity: 0.004
Position: -0.558, Velocity: 0.004
Position: -0.553, Velocity: 0.005
Position: -0.547, Velocity: 0.006
Position: -0.540, Velocity: 0.007
Position: -0.534, Velocity: 0.007
Position: -0.526, Velocity: 0.008
Position: -0.517, Velocity: 0.009
Position: -0.507, Velocity: 0.010
Position: -0.499, Velocity: 0.009
Position: -0.491, Velocity: 0.007
Position: -0.483, Velocity: 0.008
Position: -0.476, Velocity: 0.007
Position: -0.469, Velocity: 0.008
Position: -0.462, Velocity: 0.007
Position: -0.4