# waypoints

In [1]:
import numpy as np

with open('ss1-wp.np', 'rb') as f:
    wps = np.load(f)
    
dist = np.linalg.norm(wps[0][:2] - wps[100][:2])
print(dist)

print(len(wps)//100)

3.43225402646071
93


# environment

In [5]:
import gym
import numpy as np
import time
import yaml
import gym
import numpy as np


class F110RaceEnv(gym.Env):
    def __init__(self, env_config,
                 deterministic=False,
                 test_map_name=None,
                 scan_range=10.0,
                 max_v=12.0,
                 n_cps = 100,
                 cp_reward = 0.1,
                 min_cp_dist=2.0,
                 train_map_name='SILVERSTONE_TRAIN'
                ):
        
        self.deterministic = deterministic
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(110,), dtype=np.float32)
        self.max_v = max_v
        self.test_map_name = test_map_name
        self.min_cp_dist = min_cp_dist
        self.n_cps = n_cps
        self.cp_reward = cp_reward
        self.scan_range = scan_range
        
        self.train_map_name = train_map_name
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)
    
    def _render_callback(self, env_renderer):
        # custom extra drawing function

        e = env_renderer

        # update camera to follow car
        x = e.cars[0].vertices[::2]
        y = e.cars[0].vertices[1::2]
        top, bottom, left, right = max(y), min(y), min(x), max(x)
        e.score_label.x = left
        e.score_label.y = top - 700
        e.left = left - 800
        e.right = right + 800
        e.top = top + 800
        e.bottom = bottom - 800
  
    def load_train_setup(self):
        no = np.random.choice([1, 2])
        map_path = f"./f1tenth_gym/examples/{self.train_map_name}{no}"
        with open(f'./ss{no}-wp.np', 'rb') as f:
            wps = np.load(f)
            
        return map_path, wps
    
    def load_test_setup(self):
        map_path = f"./f1tenth_gym/examples/{self.test_map_name}"
        with open('./ss-wp.np', 'rb') as f:
            wps = np.load(f)
            
        return map_path, wps

    def build_checkpoints(self, wps, cp_dist):
        
        start = np.random.randint(0, len(wps)-1)
        checkpoints = []
        
        i = 0
        while i < len(wps):
            if i % cp_dist == 0:
                checkpoints.append(wps[(start+i)%len(wps)])
            i += 1
        return checkpoints
    
    def reset(self):
        if self.test_map_name is not None:
            map_path, wps = self.load_test_setup()
        else:
            map_path, wps = self.load_train_setup()
        
#         print(map_path, len(wps))
        
        self.env = gym.make('f110_gym:f110-v0', map=map_path, map_ext='.png', num_agents=1)
        self.env.add_render_callback(self._render_callback)
        
        cp_dist = len(wps)//self.n_cps
        
        self.checkpoints = self.build_checkpoints(wps, cp_dist)
#         print(f"number of checkpoints: {len(self.checkpoints)}")

        random_idx = np.random.randint(0, len(self.checkpoints)-1)
        start_point = self.checkpoints[random_idx]
    
        obs, step_reward, done, info = self.env.reset(
            np.array([
                start_point
            ])
        )
        
        self.next_cp_idx = random_idx + 1
        self.t = 0

        return self.to_vector_state(obs)
    
    def to_vector_state(self, obs):
        
        scanner = np.clip(obs['scans'][0], 0, self.scan_range)
        
        buck = 10
        size = 1080//buck
        agg_scanner = np.zeros(size,)
        for i in range(size):
            agg_scanner[i] = np.min(scanner[i*buck: i*buck+buck])
        
        agg_scanner /= self.scan_range
        state = np.concatenate([
            agg_scanner,
            np.array(obs['linear_vels_x'][:1])/self.max_v,
            np.array(obs['ang_vels_z'][:1])/3.0,
        ])
        
        noise = np.random.normal(loc=0.0, scale=0.03, size=state.shape)
        state = state + noise
        
        return state
    
    def checkpoint(self, position):
        dist = np.linalg.norm(position - self.checkpoints[self.next_cp_idx][:2])
        reward = 0
        if dist < self.min_cp_dist:
            reward = self.cp_reward
    
            self.next_cp_idx = (self.next_cp_idx + 1)%len(self.checkpoints)
        return reward
        
        
    def step(self, action):
        reward = 0

        act_v = action[0]*(self.max_v/2)+(self.max_v/2)
        th = action[1]*np.pi/6
        act = np.array([[th, act_v]])

        obs, step_reward, done, info = self.env.step(act)
        pose_x = obs['poses_x'][0]
        pose_y = obs['poses_y'][0]


        position = np.array([pose_x, pose_y])

        if obs['collisions'][0] == 1.0:
            reward = -1
            done = True
            if self.test_map_name is not None:
                print('CRASHED')
            
        cp_reward = self.checkpoint(position)
        next_state = self.to_vector_state(obs)
        reward += cp_reward
        self.t += 1
        
        if done and self.test_map_name is not None:
            print(obs['lap_times'])
        
        return next_state, reward, done, info
    
    def render(self):
        self.env.render()

e = F110RaceEnv({})
e.reset()

array([0.33133683, 0.33715568, 0.2789363 , 0.26730084, 0.27783558,
       0.25131348, 0.18717651, 0.26272024, 0.20131201, 0.21702657,
       0.22933016, 0.27686221, 0.20493082, 0.19033509, 0.21765053,
       0.25655249, 0.26454268, 0.18620252, 0.22913007, 0.19603247,
       0.27394254, 0.22005166, 0.24040377, 0.23238348, 0.17177792,
       0.25728935, 0.21784019, 0.26235549, 0.25749418, 0.26047834,
       0.2614054 , 0.27636353, 0.25049943, 0.21066345, 0.30938045,
       0.3193564 , 0.31234092, 0.34076173, 0.32729207, 0.31842827,
       0.46620134, 0.39337403, 0.43864189, 0.48565297, 0.53889876,
       0.63525426, 0.70439087, 0.78542394, 1.02013366, 1.03592106,
       1.0172587 , 1.01931304, 1.05088912, 0.9953354 , 1.00799235,
       1.0053505 , 0.97386807, 1.01849488, 0.95585187, 0.82723608,
       0.68907613, 0.63591835, 0.59702855, 0.56926326, 0.55046791,
       0.43649147, 0.43212818, 0.41601932, 0.38496124, 0.37786633,
       0.32285136, 0.34055656, 0.24469266, 0.30046297, 0.24067

# training

In [6]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [None]:
import ray
from ray.rllib.agents import ppo
from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG

ray.shutdown()
ray.init()

# with half cheeta configs

configs = {
    'framework': 'torch',
    'num_workers': 16,
    'num_gpus': 1.0,
    'kl_coeff': 1.0,
    'clip_param': 0.2,
    'num_envs_per_worker': 16,
    'train_batch_size': 100000,
    'sgd_minibatch_size': 4096,
    'batch_mode': 'truncate_episodes',
    'lr': .0003,
}


trainer = ppo.PPOTrainer(env=F110RaceEnv, config=configs)
# print("FINE TUNING")
# trainer.restore('./checkpoints/race_vfh/checkpoint_000031/checkpoint-31')

print(trainer.config)
rewards = []
best_reward = -100

import pickle

for i in range(300):
    result = trainer.train()
    episode_r = result['episode_reward_mean']
    print(f"episode: {i} reward:{episode_r}")
    rewards.append(episode_r)
    with open('./checkpoints/race_v1.0_r', 'wb') as f:
        pickle.dump(rewards, f)
        
    if (i%5 == 0 and episode_r > best_reward) or (i > 50 and episode_r > best_reward):
        best_reward = episode_r
        cp = trainer.save("./checkpoints/v1.0")
        print("checkpoint saved at", cp)


2022-04-06 17:47:11,067	INFO trainable.py:125 -- Trainable.setup took 13.091 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'num_workers': 16, 'num_envs_per_worker': 16, 'create_env_on_driver': False, 'rollout_fragment_length': 390, 'batch_mode': 'truncate_episodes', 'gamma': 0.99, 'lr': 0.0003, 'train_batch_size': 100000, 'model': {'_use_default_native_models': False, '_disable_preprocessor_api': False, 'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': None, 'conv_activation': 'relu', 'post_fcnet_hiddens': [], 'post_fcnet_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action': False, 'lstm_use_prev_reward': False, '_time_major': False, 'use_attention': False, 'attention_num_transformer_units': 1, 'attention_dim': 64, 'attention_num_heads': 1, 'attention_head_dim': 32, 'attention_memory_inference': 50, 'attention_memory_training': 50, 'attention_position_wise_mlp_dim': 32, 'attention_init_gru_gate_bias': 2.0, 'attention_use_n_prev_actions': 0, 'attention_use_n_



episode: 0 reward:-0.910286320254507
checkpoint saved at ./checkpoints/v1.0/checkpoint_000001/checkpoint-1
episode: 1 reward:-0.8899699097291878
episode: 2 reward:-0.8301075268817206
episode: 3 reward:-0.7534579439252337
episode: 4 reward:-0.6063953488372092
episode: 5 reward:-0.4241635687732342
checkpoint saved at ./checkpoints/v1.0/checkpoint_000006/checkpoint-6
episode: 6 reward:-0.29508928571428567
episode: 7 reward:-0.1158730158730158
episode: 8 reward:0.20441176470588257
episode: 9 reward:0.6104838709677423
episode: 10 reward:0.4284671532846717
checkpoint saved at ./checkpoints/v1.0/checkpoint_000011/checkpoint-11
episode: 11 reward:0.5104761904761906
episode: 12 reward:0.6412280701754386
episode: 13 reward:0.9219999999999999
episode: 14 reward:0.981
episode: 15 reward:1.296
checkpoint saved at ./checkpoints/v1.0/checkpoint_000016/checkpoint-16
episode: 16 reward:1.5739999999999998
