In [17]:
%load_ext autoreload
%aimport os, pandas, numpy, pickle
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
import sys
import os
import pickle
import argparse
import pandas as pd
from sklearn.ensemble.forest import ExtraTreesRegressor

from fqi.et_tuning import run_tuning
from fqi.reward_function import *
from fqi.sars_creator import to_SARS
from fqi.utils import *

from trlib.policies.valuebased import EpsilonGreedy, Softmax
from trlib.policies.qfunction import ZeroQ
from trlib.algorithms.reinforcement.fqi_driver import FQIDriver, DoubleFQIDriver
from trlib.environments.trackEnv import TrackEnv
from trlib.utilities.ActionDispatcher import *

from fqi.fqi_evaluate import run_evaluation

In [14]:
simulations = pd.read_csv('./trajectory/dataset_70_laps.csv',
                              dtype={'isReference': bool, 'is_partial':bool})

#simulations = simulations[(simulations['NLap'] == 1) | (simulations['NLap'] == 9)]

ref_tr = pd.read_csv('./trajectory/ref_traj.csv')
ref_tr.columns = ['time', 'Acceleration_x', 'Acceleration_y', 'speed_x', 'speed_y',
       'xCarWorld', 'yCarWorld', 'alpha_step']

In [49]:
simulations

Unnamed: 0,NLap,time,isReference,is_partial,xCarWorld,yCarWorld,nYawBody,nEngine,NGear,prevaSteerWheel,...,speedDifferenceOfModules,actualAccelerationX,actualAccelerationY,referenceAccelerationX,referenceAccelerationY,accelerationDiffX,accelerationDiffY,aSteerWheel,pBrakeF,rThrottlePedal
0,1,0.307996,False,False,654.799744,1169.202148,0.013237,18466.84082,7,-0.001959,...,-2.462826,1.172228,-0.845928,1.300892,0.003601,0.128664,0.849529,-0.001715,0.0,1.0
1,1,0.407996,False,False,663.557922,1169.316528,0.012194,18489.47144,7,-0.001715,...,-2.416658,1.150649,-0.874358,1.277093,0.001861,0.126444,0.876219,-0.001714,0.0,1.0
2,1,0.507996,False,False,672.327209,1169.422852,0.011190,18511.70898,7,-0.001714,...,-2.371231,1.130063,-0.905710,1.252617,-0.000061,0.122554,0.905649,-0.001469,0.0,1.0
3,1,0.607996,False,False,681.107422,1169.519897,0.010168,18533.46802,7,-0.001469,...,-2.326653,1.105262,-0.868164,1.226950,-0.000073,0.121688,0.868091,-0.001469,0.0,1.0
4,1,0.707996,False,False,689.898254,1169.608398,0.009167,18554.90967,7,-0.001469,...,-2.282955,1.080983,-0.905741,1.200744,-0.000058,0.119761,0.905683,-0.001955,0.0,1.0
5,1,0.807996,False,False,698.699524,1169.687988,0.008132,18575.24536,7,-0.001955,...,-2.229615,1.002264,-0.913655,1.175719,0.000528,0.173455,0.914183,-0.001467,0.0,1.0
6,1,0.907996,False,False,707.510803,1169.758301,0.006860,18595.27344,7,-0.001467,...,-2.187247,1.047858,-1.253670,1.159796,0.000023,0.111938,1.253693,-0.002725,0.0,1.0
7,1,1.007996,False,False,716.331848,1169.815918,0.004919,18614.62280,7,-0.002725,...,-2.131727,1.009651,-1.718997,1.122303,0.000020,0.112652,1.719017,-0.003268,0.0,1.0
8,1,1.107996,False,False,725.162415,1169.856689,0.002823,18634.58374,7,-0.003268,...,-2.053136,0.976601,-1.813381,1.112550,0.000780,0.135949,1.814161,-0.003267,0.0,1.0
9,1,1.207996,False,False,734.002502,1169.879517,0.000749,18653.26904,7,-0.003267,...,-2.012541,1.055580,-1.839402,1.078666,0.000126,0.023086,1.839528,-0.003265,0.0,1.0


In [9]:
all_laps = np.unique(simulations.NLap)
lap_times = map(lambda lap: simulations[simulations.NLap == lap]['time'].values[-1], all_laps)
ref_time = ref_tr['time'].values[-1]
perc_deltas = list(map(lambda t: (abs(t - ref_time) / ref_time * 100) <= 1.5, lap_times))
right_laps = all_laps[perc_deltas]
right_laps

array([ 9, 14, 16, 17, 20, 47, 49, 55, 59, 60, 61, 62, 63, 65, 68])

In [None]:
penalty = LikelihoodPenalty()
penalty.fit(simulations[simulations.NLap.isin(right_laps)][state_cols].values, n_jobs=10)

In [None]:
file_name = 'penalty.pkl'
with open('./' + file_name, 'wb') as output:
    pickle.dump(penalty, output, pickle.HIGHEST_PROTOCOL)

In [15]:
with open('./' + file_name, 'rb') as pol:
     penalty = pickle.load(pol)

In [17]:
rf = Temporal_projection(ref_tr, penalty=penalty, clip_range=(-np.inf, np.inf))

In [18]:
dataset = to_SARS(simulations, rf)

In [20]:
nmin = 1
n_jobs =10
# Create environment
state_dim = len(state_cols)
action_dim = len(action_cols)
mdp = TrackEnv(state_dim, action_dim, 0.99999, 'continuous')

# Parameters of ET regressor
regressor_params = {'n_estimators': 100,
                    'criterion': 'mse',
                    'min_samples_split': 2,
                    'min_samples_leaf': nmin,
                    'n_jobs': n_jobs,
                    'random_state': 42}
regressor = ExtraTreesRegressor

In [23]:
#first step
temperature = 0.5  # no exploration
pi = Softmax([], ZeroQ(), temperature)

In [24]:
# Define the order of the columns to pass to the algorithm
cols = ['t'] + state_cols + action_cols + ['r'] + state_prime_cols + ['absorbing']
# Define the masks used by the action dispatcher
state_mask = [i for i, s in enumerate(state_cols) if s in knn_state_cols]
data_mask = [i for i, c in enumerate(cols) if c in knn_state_cols]
action_dispatcher = RadialKDTActionDispatcher
alg_actions = dataset[action_cols].values

In [25]:
fqi = DoubleFQIDriver

In [30]:
max_iterations =100
kdt_norm = False
filt_a_outliers = False
kdt_param = 10
algorithm = fqi(mdp=mdp,
                policy=pi,
                actions=alg_actions,
                max_iterations=max_iterations,
                regressor_type=regressor,
                data=dataset[cols].values,
                action_dispatcher=action_dispatcher,
                state_mask=state_mask,
                data_mask=data_mask,
                s_norm=kdt_norm,
                filter_a_outliers=filt_a_outliers,
                ad_n_jobs=n_jobs,
                ad_param=kdt_param,
                verbose=True,
                **regressor_params)

In [31]:
print('Starting execution')
algorithm.step()

output_path = './model_file/'
reward_function = 'temporal_penalty'
output_name = 'temporal_penalty' + '_reward_model'#reward_function + '_reward_model'#'first_model'
# save algorithm object
algorithm_name = output_name + '.pkl'
with open(output_path + '/' + algorithm_name, 'wb') as output:
    pickle.dump(algorithm, output, pickle.HIGHEST_PROTOCOL)

# save policy object
policy_name = 'policy_' + algorithm_name
with open(output_path + '/' + policy_name, 'wb') as output:
    pickle.dump(algorithm._policy, output, pickle.HIGHEST_PROTOCOL)
print('Saved policy object')

# save action dispatcher object
AD_name = 'AD_' + algorithm_name
with open(output_path + '/' + AD_name, 'wb') as output:
    pickle.dump(algorithm._action_dispatcher, output, pickle.HIGHEST_PROTOCOL)
print('Saved Action Dispatcher')

Starting execution
Step 1
Finding nearest actions for each state prime
Time for action list 8.195560693740845
Time for action set 0.7957553863525391
Time for sprime a mat 18.168553590774536
Iteration 0
fitQ 10.200401544570923
Elapsed time 10.200401544570923
Iteration 1
maxQ 16.778680086135864
fitQ 10.902239799499512
Elapsed time 27.680919885635376
Iteration 2
maxQ 15.474468469619751
fitQ 10.63949203491211
Elapsed time 26.11396050453186
Iteration 3
maxQ 17.73989248275757
fitQ 10.590643167495728
Elapsed time 28.330535650253296
Iteration 4
maxQ 15.683396339416504
fitQ 10.171521663665771
Elapsed time 25.854918003082275
Iteration 5
maxQ 14.695566892623901
fitQ 10.16788935661316
Elapsed time 24.86345624923706
Iteration 6
maxQ 15.815593957901001
fitQ 10.28238034248352
Elapsed time 26.09797430038452
Iteration 7
maxQ 14.994332075119019
fitQ 9.851092100143433
Elapsed time 24.84542417526245
Iteration 8
maxQ 17.323466777801514
fitQ 10.115386009216309
Elapsed time 27.438852787017822
Iteration 9
max

maxQ 13.4150071144104
fitQ 9.100330114364624
Elapsed time 22.515337228775024
Iteration 89
maxQ 12.857937097549438
fitQ 9.670830965042114
Elapsed time 22.528768062591553
Iteration 90
maxQ 13.320350885391235
fitQ 9.225333452224731
Elapsed time 22.545684337615967
Iteration 91
maxQ 13.08823275566101
fitQ 9.025013446807861
Elapsed time 22.113246202468872
Iteration 92
maxQ 12.776918649673462
fitQ 9.542154312133789
Elapsed time 22.31907296180725
Iteration 93
maxQ 12.680822849273682
fitQ 9.90872859954834
Elapsed time 22.58955144882202
Iteration 94
maxQ 13.59368109703064
fitQ 8.893761396408081
Elapsed time 22.48744249343872
Iteration 95
maxQ 13.1796555519104
fitQ 9.20890212059021
Elapsed time 22.38855767250061
Iteration 96
maxQ 12.94585371017456
fitQ 9.017089128494263
Elapsed time 21.962942838668823
Iteration 97
maxQ 12.465965270996094
fitQ 9.251380205154419
Elapsed time 21.717345476150513
Iteration 98
maxQ 13.180299997329712
fitQ 9.97423791885376
Elapsed time 23.15453791618347
Iteration 99
max