In [1]:
import sys
import os
import pickle
import argparse
import pandas as pd
from sklearn.ensemble.forest import ExtraTreesRegressor

from fqi.et_tuning import run_tuning
from fqi.reward_function import *
from fqi.sars_creator import to_SARS
from fqi.utils import *

from trlib.policies.valuebased import ValueBased, EpsilonGreedy, Softmax
from trlib.policies.qfunction import ZeroQ
from trlib.algorithms.reinforcement.fqi_driver import FQIDriver, DoubleFQIDriver
from trlib.environments.trackEnv import TrackEnv
from trlib.utilities.ActionDispatcher import *

In [2]:
track_file_name = 'dataset_offroad'
rt_file_name = 'ref_traj'
data_path = './trajectory/'

In [3]:
simulations = pd.read_csv(os.path.join(data_path, track_file_name + '.csv'),
                              dtype={'isReference': bool, 'is_partial':bool})
ref_tr = pd.read_csv(os.path.join(data_path, rt_file_name + '.csv'))

In [57]:
n_laps = simulations.tail(1).NLap.values.item(0)
simulations = simulations[(simulations.NLap == 17) | (simulations.NLap >= n_laps - 1000)]

Unnamed: 0,NLap,time,isReference,is_partial,xCarWorld,yCarWorld,nYawBody,nEngine,NGear,prevaSteerWheel,...,actualAccelerationX,actualAccelerationY,referenceAccelerationX,referenceAccelerationY,accelerationDiffX,accelerationDiffY,trackPos,aSteerWheel,pBrakeF,rThrottlePedal
11893,17.0,0.055996,True,False,632.738159,1167.866089,0.019937,18257.00928,7.0,-0.001475,...,1.355255,-3.966545,1.173112,-8.450459,-0.182143,-4.483914,-0.290883,-0.006909,0.0,1.0
11894,17.0,0.155996,True,False,641.394775,1168.036987,0.016761,18284.10156,7.0,-0.006909,...,1.315348,-2.214881,1.253555,-0.681178,-0.061793,1.533703,-0.259810,-0.002224,0.0,1.0
11895,17.0,0.255996,True,False,650.064575,1168.186279,0.016794,18309.77905,7.0,-0.002224,...,1.315140,-0.185807,1.173112,-8.450459,-0.142028,-8.264652,-0.232666,0.000000,0.0,1.0
11896,17.0,0.355996,True,False,658.747070,1168.332764,0.016888,18335.33691,7.0,0.000000,...,1.291191,0.008633,1.173112,-8.450459,-0.118079,-8.459092,-0.206033,0.000000,0.0,1.0
11897,17.0,0.455996,True,False,667.441956,1168.479248,0.016886,18360.43213,7.0,0.000000,...,1.266658,0.000317,1.173112,-8.450459,-0.093546,-8.450776,-0.179399,0.000000,0.0,1.0
11898,17.0,0.555996,True,False,676.149048,1168.625732,0.016886,18385.04517,7.0,0.000000,...,1.248418,-0.000128,1.173112,-8.450459,-0.075306,-8.450331,-0.152765,0.000000,0.0,1.0
11899,17.0,0.655996,True,False,684.868042,1168.772217,0.016886,18409.16504,7.0,0.000000,...,1.217150,-0.000074,1.173112,-8.450459,-0.044038,-8.450385,-0.126132,0.000000,0.0,1.0
11900,17.0,0.755996,True,False,693.598755,1168.918701,0.016886,18432.93701,7.0,0.000000,...,1.190268,-0.000060,1.173112,-8.450459,-0.017156,-8.450399,-0.099498,0.000000,0.0,1.0
11901,17.0,0.855996,True,False,702.340820,1169.065186,0.016886,18456.39771,7.0,0.000000,...,1.160735,-0.000112,1.173112,-8.450459,0.012377,-8.450347,-0.072865,0.000000,0.0,1.0
11902,17.0,0.955996,True,False,711.094299,1169.211670,0.016886,18478.44971,7.0,0.000000,...,1.174851,0.000094,1.173112,-8.450459,-0.001739,-8.450553,-0.046231,0.000000,0.0,1.0


In [34]:
right_laps = np.array([ 1.,  8.,  9., 11., 14., 16., 17., 20., 45., 46., 49.,  59., 62.])
penalty = LikelihoodPenalty(bandwidth = 1, kernel = 'gaussian')
penalty.fit(simulations[simulations.NLap.isin(right_laps)][penalty_cols].values, 10)

In [39]:
rf = Temporal_projection(ref_tr, penalty=penalty, clip_range=(-np.inf, np.inf))
dataset = to_SARS(simulations, rf)

In [42]:
nmin = 1

# Create environment
state_dim = len(state_cols)
action_dim = len(action_cols)
mdp = TrackEnv(state_dim, action_dim, 0.99999, 'continuous')

# Parameters of ET regressor
regressor_params = {'n_estimators': 100,
                    'criterion': 'mse',
                    'min_samples_split': 2,
                    'min_samples_leaf': nmin,
                    'n_jobs': 10,
                    'random_state': 42}
regressor = ExtraTreesRegressor

In [43]:
pi = ValueBased([], ZeroQ())

In [44]:
cols = ['t'] + state_cols + action_cols + ['r'] + state_prime_cols + ['absorbing']
# Define the masks used by the action dispatcher
state_mask = [i for i, s in enumerate(state_cols) if s in knn_state_cols]
data_mask = [i for i, c in enumerate(cols) if c in knn_state_cols]

In [45]:
action_dispatcher = RadialKDTActionDispatcher
alg_actions = dataset[action_cols].values

In [46]:
fqi = DoubleFQIDriver

In [53]:
algorithm = fqi(mdp=mdp,
                    policy=pi,
                    actions=alg_actions,
                    max_iterations=100,
                    regressor_type=regressor,
                    data=dataset[cols].values,
                    action_dispatcher=action_dispatcher,
                    state_mask=state_mask,
                    data_mask=data_mask,
                    s_norm=False,
                    filter_a_outliers=False,
                    ad_n_jobs=10,
                    ad_param=10,
                    verbose=True,
                    **regressor_params)

In [54]:
algorithm.step()

Step 1
Finding nearest actions for each state prime
Time for action list 0.01907205581665039
Time for action set 0.007899761199951172
Time for sprime a mat 0.021032333374023438
Iteration 0
fitQ 0.26702094078063965
Elapsed time 0.26702094078063965
Iteration 1
maxQ 0.4305572509765625
fitQ 0.39272427558898926
Elapsed time 0.8232815265655518
Iteration 2
maxQ 0.45353078842163086
fitQ 0.33298754692077637
Elapsed time 0.7865183353424072
Iteration 3
maxQ 0.4490773677825928
fitQ 0.3931453227996826
Elapsed time 0.8422226905822754
Iteration 4
maxQ 0.4480905532836914
fitQ 0.39978790283203125
Elapsed time 0.8478784561157227
Iteration 5
maxQ 0.46179914474487305
fitQ 0.4234282970428467
Elapsed time 0.8852274417877197
Iteration 6
maxQ 0.4478750228881836
fitQ 0.3959217071533203
Elapsed time 0.8437967300415039
Iteration 7
maxQ 0.5544466972351074
fitQ 0.5538663864135742
Elapsed time 1.1083130836486816
Iteration 8


KeyboardInterrupt: 