## Single agent Model

This script contains only the model of the single agent model and a means to collect trajectories

#### 'Continuous_car' 

Instantiated with a controller K, and boolean variables defining whether you want system noise or controller noise

#### 'RunEpisode'

Will return lists of states, inputs, costs for a trajectory following the policy defined by K above




In [1]:
import numpy as np
import pdb
import matplotlib.pyplot as plt


# class for the agent
class Continuous_car():

    
    def __init__(self,K,noisy_model = False, noisy_controller=False):
        
        #system
        self.A = np.array([[1,1],[0, 1]])
        self.B = np.array([[0.5],[1]])

        #cost matrices
        self.E = np.array([[1,0],[0, 0.5]])
        self.F = np.array([1])

        # Arbitrary gain choice used (stability checked)
        self.K = K
        
        # Optimal Gains found using dLQR on Matlab:
        # K_Optimal = [ 0.4634, 1.0170 ]

        self.disc_fact = 0.99
        self.n_states = 2
        
        if noisy_model == True:
            self.sigma_model = 0.25
        else:
            self.sigma_model = 0
            
        self.sigma_controller = 0.2
        self.noisy_controller = noisy_controller
    
    def GetPolicyInput(self,x):
        #For a given state, return the input u according to a defined policy
        # x: 2x1 array
        # K: 1x2 array
        
        inp = np.matmul(self.K,x)[0] #scalar
        
        if self.noisy_controller:
            inp += self.GetControllerNoise() #scalar
        
        return inp #scalar
    
    def GetControllerNoise(self):
        # returns scalar value of noise
        
        contr_noise = self.sigma_controller * np.random.randn(1)[0] # scalar
        
        return contr_noise
    
    def GetCost(self, x, u):
        #For a given state, return the one step cost of this new state
        # x is a 2x1 array
        # u is a scalar
        
        # x'Ex
        cost1 = np.matmul(np.matmul(x.transpose(),self.E),x)[0][0]
        
        # u'Fu
        cost2 = u*u*self.F[0]
        
        return cost1+cost2 #scalar
        
    def GetNoise(self):
        # Returns a vector with noise for the model only for velocity state
        
        w = np.array([0,self.sigma_model*np.random.randn(1)[0]]).reshape(2,1)
        
        return w # 2x1 array
    
    def GetNextState(self,current_state,current_input):
        # returns the next state, x using the model given a current state and input           
        
        x_next_1 = np.matmul(self.A,current_state)
        
        x_next_2 = self.B * current_input
        x_next_3 = self.GetNoise()
        #pdb.set_trace()
        x_next = x_next_1 + x_next_2 + x_next_3
        return x_next
    

    def RunEpisode(self, episode_length, state_initial):
        #function will return lists of the states, inputs and costs for a trajectory of chosen length given an initial state
        '''
        length: integer
        state_initial: list form, e.g. [3,2] for position of 3 and velocity of 2
        '''

        x = np.array(state_initial).reshape(2,1)

        state_list = [x]
        cost_list = []
        input_list = []
        pos_list = [x[0][0]]
        vel_list = [x[1][0]]
            


       
        #input_list.append(sys.GetInput(x))

        for k in range(episode_length):
            
            u = self.GetPolicyInput(x)
                

            input_list.append(u)
            
            #pdb.set_trace()

            x = self.GetNextState(x,u)
            cost = self.GetCost(x,u)

            state_list.append(x)
            cost_list.append(cost)


            pos_list.append(x[0][0])
            vel_list.append(x[1][0])
            
            #pdb.set_trace()

        return state_list, cost_list, input_list, pos_list, vel_list