# waypoints

In [1]:
import numpy as np

with open('ss1-wp.np', 'rb') as f:
    wps = np.load(f)
    
dist = np.linalg.norm(wps[0][:2] - wps[100][:2])
print(dist)

print(len(wps)//100)

3.43225402646071
93


# environment

In [2]:
import gym
import numpy as np
import time
import yaml
import gym
import numpy as np


class F110RaceEnv(gym.Env):
    def __init__(self, env_config,
                 deterministic=False,
                 test_map_name=None,
                 scan_range=10.0,
                 max_v=12.0,
                 n_cps = 100,
                 cp_reward = 0.1,
                 min_cp_dist=2.0,
                 train_map_name='SILVERSTONE_TRAIN'
                ):
        
        self.deterministic = deterministic
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(110,), dtype=np.float32)
        self.max_v = max_v
        self.test_map_name = test_map_name
        self.min_cp_dist = min_cp_dist
        self.n_cps = n_cps
        self.cp_reward = cp_reward
        self.scan_range = scan_range
        
        self.train_map_name = train_map_name
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)
    
    def _render_callback(self, env_renderer):
        # custom extra drawing function

        e = env_renderer

        # update camera to follow car
        x = e.cars[0].vertices[::2]
        y = e.cars[0].vertices[1::2]
        top, bottom, left, right = max(y), min(y), min(x), max(x)
        e.score_label.x = left
        e.score_label.y = top - 700
        e.left = left - 800
        e.right = right + 800
        e.top = top + 800
        e.bottom = bottom - 800
  
    def load_train_setup(self):
        no = np.random.choice([1, 2])
        map_path = f"./f1tenth_gym/examples/{self.train_map_name}{no}"
        with open(f'./ss{no}-wp.np', 'rb') as f:
            wps = np.load(f)
            
        return map_path, wps
    
    def load_test_setup(self):
        map_path = f"./f1tenth_gym/examples/{self.test_map_name}"
        with open('./ss-wp.np', 'rb') as f:
            wps = np.load(f)
            
        return map_path, wps

    def build_checkpoints(self, wps, cp_dist):
        
        start = np.random.randint(0, len(wps)-1)
        checkpoints = []
        
        i = 0
        while i < len(wps):
            if i % cp_dist == 0:
                checkpoints.append(wps[(start+i)%len(wps)])
            i += 1
        return checkpoints
    
    def reset(self):
        if self.test_map_name is not None:
            map_path, wps = self.load_test_setup()
        else:
            map_path, wps = self.load_train_setup()
        
#         print(map_path, len(wps))
        
        self.env = gym.make('f110_gym:f110-v0', map=map_path, map_ext='.png', num_agents=1)
        self.env.add_render_callback(self._render_callback)
        
        cp_dist = len(wps)//self.n_cps
        
        self.checkpoints = self.build_checkpoints(wps, cp_dist)
#         print(f"number of checkpoints: {len(self.checkpoints)}")

        random_idx = np.random.randint(0, len(self.checkpoints)-1)
        start_point = self.checkpoints[random_idx]
    
        obs, step_reward, done, info = self.env.reset(
            np.array([
                start_point
            ])
        )
        
        self.next_cp_idx = random_idx + 1
        self.t = 0

        return self.to_vector_state(obs)
    
    def to_vector_state(self, obs):
        
        scanner = np.clip(obs['scans'][0], 0, self.scan_range)
        
        buck = 10
        size = 1080//buck
        agg_scanner = np.zeros(size,)
        for i in range(size):
            agg_scanner[i] = np.min(scanner[i*buck: i*buck+buck])
        
        agg_scanner /= self.scan_range
        state = np.concatenate([
            agg_scanner,
            np.array(obs['linear_vels_x'][:1])/self.max_v,
            np.array(obs['ang_vels_z'][:1])/3.0,
        ])
        
        noise = np.random.normal(loc=0.0, scale=0.03, size=state.shape)
        state = state + noise
        
        return state
    
    def checkpoint(self, position):
        dist = np.linalg.norm(position - self.checkpoints[self.next_cp_idx][:2])
        reward = 0
        if dist < self.min_cp_dist:
            reward = self.cp_reward
    
            self.next_cp_idx = (self.next_cp_idx + 1)%len(self.checkpoints)
        return reward
        
        
    def step(self, action):
        reward = 0

        act_v = action[0]*(self.max_v/2)+(self.max_v/2)
        th = action[1]*np.pi/6
        act = np.array([[th, act_v]])

        obs, step_reward, done, info = self.env.step(act)
        pose_x = obs['poses_x'][0]
        pose_y = obs['poses_y'][0]


        position = np.array([pose_x, pose_y])

        if obs['collisions'][0] == 1.0:
            reward = -1
            done = True
            if self.test_map_name is not None:
                print('CRASHED')
            
        cp_reward = self.checkpoint(position)
        next_state = self.to_vector_state(obs)
        reward += cp_reward
        self.t += 1
        
        if done and self.test_map_name is not None:
            print(obs['lap_times'])
        
        return next_state, reward, done, info
    
    def render(self):
        self.env.render()

e = F110RaceEnv({})
e.reset()

array([ 0.66834791,  0.45441887,  0.37898251,  0.32896951,  0.31620638,
        0.27483334,  0.33416766,  0.23812504,  0.2521027 ,  0.26175057,
        0.17213161,  0.21549836,  0.17158347,  0.20795971,  0.25238498,
        0.1802596 ,  0.22524195,  0.22698367,  0.25730639,  0.24837562,
        0.19735248,  0.2229526 ,  0.20620509,  0.23651499,  0.22596061,
        0.26767855,  0.19399068,  0.22036053,  0.20009244,  0.29340423,
        0.28707077,  0.27332401,  0.25395602,  0.29050788,  0.35961271,
        0.40669155,  0.40930535,  0.58597246,  0.98624991,  1.01788384,
        0.9837994 ,  1.01559468,  1.00275337,  0.98993134,  0.97820224,
        0.97972696,  0.99959854,  0.97479915,  0.9541768 ,  0.80846333,
        0.75679768,  0.73159477,  0.69905846,  0.67649177,  0.64022776,
        0.56434261,  0.57882954,  0.47979701,  0.51126869,  0.48456299,
        0.47277399,  0.42561587,  0.43929064,  0.47143743,  0.4485709 ,
        0.39184885,  0.35991331,  0.41677637,  0.37890016,  0.31

# training

In [3]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [4]:
import ray
from ray.rllib.agents import ppo
from ray.rllib.agents.ppo.ppo import DEFAULT_CONFIG

ray.shutdown()
ray.init()

# with half cheeta configs

configs = {
    'framework': 'torch',
    'num_workers': 16,
    'num_gpus': 1.0,
    'kl_coeff': 1.0,
    'clip_param': 0.2,
    'num_envs_per_worker': 16,
    'train_batch_size': 100000,
    'sgd_minibatch_size': 4096,
    'batch_mode': 'truncate_episodes',
    'lr': .0003,
}


trainer = ppo.PPOTrainer(env=F110RaceEnv, config=configs)
# print("FINE TUNING")
# trainer.restore('./checkpoints/race_vfh/checkpoint_000031/checkpoint-31')

print(trainer.config)
rewards = []
best_reward = -100

import pickle

for i in range(300):
    result = trainer.train()
    episode_r = result['episode_reward_mean']
    print(f"episode: {i} reward:{episode_r}")
    rewards.append(episode_r)
    with open('./checkpoints/race_v1.0_r', 'wb') as f:
        pickle.dump(rewards, f)
        
    if (i%5 == 0 and episode_r > best_reward) or (i > 50 and episode_r > best_reward):
        best_reward = episode_r
        cp = trainer.save("./checkpoints/v1.0")
        print("checkpoint saved at", cp)


2022-04-06 17:45:40,223	INFO ppo.py:249 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2022-04-06 17:45:40,224	INFO trainer.py:790 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2022-04-06 17:45:56,502	INFO trainable.py:125 -- Trainable.setup took 16.281 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'num_workers': 16, 'num_envs_per_worker': 16, 'create_env_on_driver': False, 'rollout_fragment_length': 390, 'batch_mode': 'truncate_episodes', 'gamma': 0.99, 'lr': 0.0003, 'train_batch_size': 100000, 'model': {'_use_default_native_models': False, '_disable_preprocessor_api': False, 'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': None, 'conv_activation': 'relu', 'post_fcnet_hiddens': [], 'post_fcnet_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action': False, 'lstm_use_prev_reward': False, '_time_major': False, 'use_attention': False, 'attention_num_transformer_units': 1, 'attention_dim': 64, 'attention_num_heads': 1, 'attention_head_dim': 32, 'attention_memory_inference': 50, 'attention_memory_training': 50, 'attention_position_wise_mlp_dim': 32, 'attention_init_gru_gate_bias': 2.0, 'attention_use_n_prev_actions': 0, 'attention_use_n_



RayTaskError(AttributeError): [36mray::RolloutWorker.par_iter_next()[39m (pid=46882, ip=10.64.91.46, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x2bbd74e095e0>)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/util/iter.py", line 1151, in par_iter_next
    return next(self.local_it)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 382, in gen_rollouts
    yield self.sample()
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 761, in sample
    batches = [self.input_reader.next()]
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 104, in next
    batches = [self.get_data()]
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 266, in get_data
    item = next(self._env_runner)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 683, in _env_runner
    base_env.send_actions(actions_to_send)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/env/vector_env.py", line 302, in send_actions
    self.vector_env.vector_step(action_vector)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/env/vector_env.py", line 233, in vector_step
    obs, r, done, info = self.envs[i].step(actions[i])
  File "/scratch/10072480/ipykernel_46602/1481093392.py", line 157, in step
  File "/scratch/10072480/ipykernel_46602/1481093392.py", line 130, in checkpoint
AttributeError: 'F110RaceEnv' object has no attribute 'min_cp_dist'

2022-04-06 17:46:07,158	ERROR worker.py:85 -- Unhandled error (suppress with RAY_IGNORE_UNHANDLED_ERRORS=1): [36mray::RolloutWorker.par_iter_next()[39m (pid=46868, ip=10.64.91.46, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x2b0ae04a85e0>)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/util/iter.py", line 1151, in par_iter_next
    return next(self.local_it)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 382, in gen_rollouts
    yield self.sample()
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 761, in sample
    batches = [self.input_reader.next()]
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 104, in next
    batches = [self.get_data()]
  File "/projects/academ

2022-04-06 17:46:07,161	ERROR worker.py:85 -- Unhandled error (suppress with RAY_IGNORE_UNHANDLED_ERRORS=1): [36mray::RolloutWorker.par_iter_next()[39m (pid=46881, ip=10.64.91.46, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x2b6b754785e0>)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/util/iter.py", line 1151, in par_iter_next
    return next(self.local_it)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 382, in gen_rollouts
    yield self.sample()
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 761, in sample
    batches = [self.input_reader.next()]
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 104, in next
    batches = [self.get_data()]
  File "/projects/academ

2022-04-06 17:46:07,163	ERROR worker.py:85 -- Unhandled error (suppress with RAY_IGNORE_UNHANDLED_ERRORS=1): [36mray::RolloutWorker.par_iter_next()[39m (pid=46874, ip=10.64.91.46, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x2b1a2178e5e0>)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/util/iter.py", line 1151, in par_iter_next
    return next(self.local_it)
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 382, in gen_rollouts
    yield self.sample()
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 761, in sample
    batches = [self.input_reader.next()]
  File "/projects/academic/kjoseph/navid/anaconda3/envs/py38/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 104, in next
    batches = [self.get_data()]
  File "/projects/academ