## Librairies

In [23]:
from gym import Env
import gym
import numpy as np
import os
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete
from cargame import Car, Track, Vector


## Load Environment

In [59]:
tt = Dict({
            'Grid' : Box(low = 0, high = 255, shape = (96, 96)),
            'CarAngle' : Box(low = 0, high = 1, shape = (1,1)),
            'TireAngle' : Box(low = -1, high = 1, shape = (1,1))
        })

speed_size = 1
angle = 1

# ttttt = np.concatenate([i.low.flatten() for i in tt.values()])
# print(ttttt.sample)
te = Box(
    low = np.concatenate([
        tt['Grid'].low.flatten(),
        tt['CarAngle'].low.flatten()
    ]),
    high = np.concatenate([
        tt['Grid'].high.flatten(),
        tt['CarAngle'].high.flatten()
    ]),
    shape = (speed_size + angle + 96*96 -1,)
)

print(te.sample())

[2.2773973e+02 2.2283751e+02 2.3580814e+02 ... 7.3112976e+01 1.8350497e+02
 5.2181561e-02]


In [9]:
from time import time

class CarEnvironment(Env):
    X_pixels = 96
    Y_pixels = 96
    MAX_EPOCH_TIME = 3 * 60

    def __init__(self,
                #  track
                 ):
        self.action_space = Box(-1.0, 1.0, shape = (1,2), dtype = np.float32)
        
        # TODO Connect to environment 
        self.observation_space = Dict({
            'GridR' : Box(low = 0, high = 255, shape = (CarEnvironment.X_pixels, CarEnvironment.Y_pixels)),
            'GridG' : Box(low = 0, high = 255, shape = (CarEnvironment.X_pixels, CarEnvironment.Y_pixels)),
            'GridB' : Box(low = 0, high = 255, shape = (CarEnvironment.X_pixels, CarEnvironment.Y_pixels)),
            'CarPositionX' : Box(low = 0, high = CarEnvironment.X_pixels, shape=(1,1)),
            'CarPositionY' : Box(low = 0, high = CarEnvironment.Y_pixels, shape=(1,1)),
            'CarSpeed' : Box(low = 0, high = 1, shape = (1,1)),
            'CarAngle' : Box(low = 0, high = 1, shape = (1,1)),
            'TireAngle' : Box(low = -1, high = 1, shape = (1,1))
        })

        grid_size = self.observation_space['Grid'].shape[0] * self.observation_space['Grid'].shape[1] 
        one_dimension_size = 5
        
        
        self.observation_space = Box(
            low = np.concatenate([
                self.observation_space['Grid'].low.flatten(),
                self.observation_space['CarPositionX'].low.flatten(), 
                self.observation_space['CarPositionY'].low.flatten(),
                self.observation_space['CarSpeed'].low.flatten(),
                self.observation_space['CarAngle'].low.flatten(), 
                self.observation_space['TireAngle'].low.flatten() 
            ]),
                
            high = np.concatenate([
                self.observation_space['Grid'].high.flatten(), 
                self.observation_space['CarPositionX'].low.flatten(),
                self.observation_space['CarPositionY'].low.flatten(),
                self.observation_space['CarSpeed'].high.flatten(), 
                self.observation_space['CarAngle'].high.flatten(), 
                self.observation_space['TireAngle'].high.flatten()                            
            ]),
            shape = (grid_size + one_dimension_size,)
        )
        pixels = np.array()
        track = Track(pixels)
        self.timestep = 0.1
        self.car = Car(10, 10, 10, 30, track, 0.1, Vector(0, 5), Vector(0, -5))
        self.start_time = time()
    
    def step(self, action):
        # Apply Action 
        angle = action[0]
        speed = action[1]

        # Action
        self.car.set_front_tire_angle(angle)
        self.car.set_speed(speed)
        self.car.update()
        
        # Terminated
        truncated = (time() - self.start_time) > CarEnvironment.MAX_EPOCH_TIME
        return self.car.get_state(), self.car.reward, self.car.check_if_done(), self.car.check_if_terminated(), truncated
    
    def reset(self):
        self.car.reset()
        self.start_time = time()

        return self.car
    

## Train RL Model

In [None]:
env = CarEnvironment()
env = DummyVecEnv([lambda : env])

In [None]:
log_path = os.path.join('Training', 'Logs')
model = PPO('CnnPolicy', env, verbose = 1, tensorboard_log=log_path)
wrapped_env = FlattenObservationWrapper(env)

## Save Model