<a href="https://colab.research.google.com/github/simonchen1108/AI_projects/blob/main/Another_copy_of_Deep_Q_Learning_for_Lunar_Landing_Partial_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Q-Learning for Lunar Landing

## Part 0 - Installing the required packages and importing the libraries

### Installing Gymnasium

In [None]:
!pip install gymnasium
!pip install "gymnasium[atari, accept-rom-license]"
!apt-get install -y swig
!pip install gymnasium[box2d]

### Importing the libraries

In [None]:
import os
import random
import numpy as np
import torch
import torch.nn as nn   #//import the NN Neural Network Module
import torch.optim as optim
import torch.nn.functional as F
import torch.autograd as autograd
from torch.autograd import Variable
from collections import deque, namedtuple

## Part 1 - Building the AI

### Creating the architecture of the Neural Network

In [None]:
# create a new class represent the brain of AI
class Network(nn.Module):

  #the class above, it inherinit the nn. Module class
  #State size represent the number of inputs in a state
  #Action size represent the direction
  #Seed represent the randomness


  def __init__(self, state_size, action_size, seed =42):
      super(Network,self).__init__()
      self.seed=torch.manual.seed(seed)

      #Sets the seed for generating random numbers.
      #fc1 represent the input layer and first connected layer
      #fc2 the first number 64 represent the number of neurons in the previous fully connect neurons
      #fc3 the first number 64 represent the number of neurons in the previous fully connect neurons,
      #we keep the second variable equals to action size, which is 4


      self.fc1=nn.Linear(state_size,64)
      self.fc2=nn.Linear(64,64)
      self.fc3=nn.Linear(64,action_size)


  def forward(self,state):
      x=self.fc1(state)
      x=F.relu
      x=self.fc2(x)
      x=F.relu
      return self.fc3(x)




## Part 2 - Training the AI

### Setting up the environment

In [None]:
import gymnasium as gym
env=gym.make('LunarLander-v2')
state_shape=env.observation_space.shape
state_size=env.observation_space.shape[0]
number_action=env.action_space.n
print("State Shape: ", state_shape)
print("State size: ", state_size)
print("Number of actions: ", number_action)

State Shape:  (8,)
State size:  8
Number of actions:  4


### Initializing the hyperparameters

In [None]:
# discount factor=gamma rate
#number of observations used in one step of training to update the model parameters
#Replay_buffer_size represent the memory of AI

learning_rate=5e-4
minibatch_size=100
discount_factor =0.99
replay_buffer_size=int(1e5)
interpolation_parameter=1e-3

### Implementing Experience Replay

In [None]:
class ReplayMemory(object):

  def __init__(self,capacity):
    self.device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    self.capacity=capacity
    self.memory=[]

  def push(self,event):
    self.memory.append(event)
    if len(self.memory) > self.capacity:
      del self.memory[0]




### Implementing the DQN class

### Initializing the DQN agent

### Training the DQN agent

## Part 3 - Visualizing the results

In [None]:
import glob
import io
import base64
import imageio
from IPython.display import HTML, display
from gym.wrappers.monitoring.video_recorder import VideoRecorder

def show_video_of_model(agent, env_name):
    env = gym.make(env_name, render_mode='rgb_array')
    state, _ = env.reset()
    done = False
    frames = []
    while not done:
        frame = env.render()
        frames.append(frame)
        action = agent.act(state)
        state, reward, done, _, _ = env.step(action.item())
    env.close()
    imageio.mimsave('video.mp4', frames, fps=30)

show_video_of_model(agent, 'LunarLander-v2')

def show_video():
    mp4list = glob.glob('*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        display(HTML(data='''<video alt="test" autoplay
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
    else:
        print("Could not find video")

show_video()