<a href="https://colab.research.google.com/github/aayu-7/MountainCarGame-Using-ReinforcementLearning/blob/main/MountainCarGame.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 **Problem Description**

In the MountainCar environment, a car starts at the bottom of a valley. The agent must learn to drive up a steep hill to reach the flag. Since the car’s engine is not powerful enough to climb the hill directly, it must first build momentum by driving back and forth.
The car's engine isn't powerful enough to drive directly up the hill. Instead, the agent must learn to build momentum by first moving left to gain speed and then driving right to reach the goal.

    State Space: Continuous values for position and velocity.
    Action Space: Three discrete actions:
        0: Push left
        1: No push
        2: Push right
    Reward: The agent gets -1 for each time step until it reaches the goal.

Training:

    The agent is trained over 5000 episodes.
    Epsilon-greedy policy ensures exploration initially, which decays over time to favor exploitation.

Testing:

    The trained agent is evaluated and its performance recorded as a video.

Video Display:

    The RecordVideo wrapper saves the video, and it is displayed using HTML in Colab.

In [None]:
# Install the required library
!pip install gym

# Import required libraries
import gym
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import HTML
import glob
import io
from base64 import b64encode
from gym.wrappers import RecordVideo

# Function to discretize state space
def discretize_state(state, bins):
    position_bins = np.linspace(-1.2, 0.6, bins)  # Discretize position
    velocity_bins = np.linspace(-0.07, 0.07, bins)  # Discretize velocity
    position_idx = np.digitize(state[0], position_bins) - 1
    velocity_idx = np.digitize(state[1], velocity_bins) - 1
    return (position_idx, velocity_idx)

# Function to display video
def show_video():
    video_path = glob.glob('./video/*.mp4')[0]
    video = io.open(video_path, 'r+b').read()
    encoded = b64encode(video)
    return HTML(data=f'''
        <video width="640" height="480" controls>
            <source src="data:video/mp4;base64,{encoded.decode('ascii')}" type="video/mp4">
        </video>''')

# Initialize the environment
env = gym.make("MountainCar-v0")

# Hyperparameters
n_bins = 20  # Number of bins for discretization
alpha = 0.1  # Learning rate
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.995
epsilon_min = 0.01
n_episodes = 5000

# Initialize Q-table
n_actions = env.action_space.n
q_table = np.zeros((n_bins, n_bins, n_actions))

# Training loop
for episode in range(n_episodes):
    state = discretize_state(env.reset(), n_bins)
    done = False
    total_reward = 0

    while not done:
        # Choose action: exploration vs exploitation
        if np.random.random() < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(q_table[state])

        # Perform action
        next_state_raw, reward, done, _ = env.step(action)
        next_state = discretize_state(next_state_raw, n_bins)

        # Update Q-value
        best_next_action = np.argmax(q_table[next_state])
        td_target = reward + gamma * q_table[next_state][best_next_action]
        q_table[state][action] += alpha * (td_target - q_table[state][action])

        state = next_state
        total_reward += reward

    # Decay epsilon
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

    if episode % 100 == 0:
        print(f"Episode {episode}, Total Reward: {total_reward}")

print("Training Complete!")

# Wrap environment for recording
env = RecordVideo(env, "./video", episode_trigger=lambda x: True)

# Test the trained agent
state = discretize_state(env.reset(), n_bins)
done = False
while not done:
    action = np.argmax(q_table[state])
    next_state_raw, _, done, _ = env.step(action)
    state = discretize_state(next_state_raw, n_bins)

env.close()

# Display the video
show_video()




  deprecation(
  deprecation(
  if not isinstance(terminated, (bool, np.bool8)):


Episode 0, Total Reward: -200.0
Episode 100, Total Reward: -200.0
Episode 200, Total Reward: -200.0
Episode 300, Total Reward: -200.0
Episode 400, Total Reward: -200.0
Episode 500, Total Reward: -200.0
Episode 600, Total Reward: -200.0
Episode 700, Total Reward: -200.0
Episode 800, Total Reward: -200.0
Episode 900, Total Reward: -200.0
Episode 1000, Total Reward: -200.0
Episode 1100, Total Reward: -164.0
Episode 1200, Total Reward: -151.0
Episode 1300, Total Reward: -200.0
Episode 1400, Total Reward: -200.0
Episode 1500, Total Reward: -200.0
Episode 1600, Total Reward: -200.0
Episode 1700, Total Reward: -200.0
Episode 1800, Total Reward: -160.0
Episode 1900, Total Reward: -200.0
Episode 2000, Total Reward: -121.0
Episode 2100, Total Reward: -200.0
Episode 2200, Total Reward: -152.0
Episode 2300, Total Reward: -200.0
Episode 2400, Total Reward: -168.0
Episode 2500, Total Reward: -162.0
Episode 2600, Total Reward: -200.0
Episode 2700, Total Reward: -200.0
Episode 2800, Total Reward: -188

  logger.warn(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
