### Lunar Lander Environment
Observation Space: 8-dimensional vector: the coordinates of the lander, its linear velocities, its angle, angular velocity, and bools representing whether each leg is touching the ground  
[x, y, vx, vy, angle, angle_vel, left_leg_on_ground, right_leg_on_ground]  
Action Space: 4 Discrete actions: do nothin, fire left orinetation engine, fire main engine, fire right orientation engine.



### Theory
V(s) = max(Q(s,a)  
Q(s,a) = R(s,a) + gV(s')  
p(s) = max Q(s, a)

In [7]:
import gym

### Imports


In [19]:
import numpy as np
import gym
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [36]:
class LLAgent:
    """A learning Agent for the Lunar Landar environment"""
    
    def __init__(self):
        """
        Create a new LLAgent
        """
    
    def get_action(state):
        """
        Return an optimal action based on the state
        """
    
    def train(history):
        """
        Based on new history data, train the model
        """
    
    # HELPER METHODS
    def create_q_function(self, 
                         input_size: tuple, 
                         output_size: tuple,
                         num_layers: int = 3,
                         layer_sizes: list[int] = [64, 32, 16],
                         activation: str = "relu") -> keras.Model:
        """
        Create a neural net to represent the q-function
        
        Params:
        input_size: tuple
            the size/dimensions of the function input (should be the shape of the observation space)
        output_size: tuple
            the size/dimensions of the function output (should be the shape of the action space)
        num_layers: int
            the number of hidden layers in the neural network
        layer_sizes: list[int]
            the sizes of each hidden layer: [hidden layer 1 size, hiddden layer 2 size...hidden layer -num-layyers- size]
        activation: str
            the activation function of the neural network
        """
        # Assertions
        assert num_layers == len(layer_sizes), f"Number of layers must be the same as the length of layer sizes: num: {num_layers} != sizes: {len(layer_sizes)}"
        
        # Build Neural Net
        inputs = layers.Input(shape=input_size) 
        layer = inputs
        for layer_num in range(len(layer_sizes)):
            layer = layers.Dense(layer_sizes[layer_num], activation = "relu")(layer)
        output = layers.Dense(output_size, activation = "softmax")(layer)
        
        return keras.Model(inputs = inputs, outputs = output)
        
        
        
    
        

In [38]:
a = LLAgent()
m = a.create_q_function(4, 5)
m.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 4)]               0         
                                                                 
 dense_8 (Dense)             (None, 64)                320       
                                                                 
 dense_9 (Dense)             (None, 32)                2080      
                                                                 
 dense_10 (Dense)            (None, 16)                528       
                                                                 
 dense_11 (Dense)            (None, 5)                 85        
                                                                 
Total params: 3,013
Trainable params: 3,013
Non-trainable params: 0
_________________________________________________________________
