In [8]:
import pandas as pd
ds = pd.read_csv('./trajectory/dataset.csv')
rt = pd.read_csv('./trajectory/ref_traj.csv')

In [9]:
import pandas as pd
import sys
import os
import pickle
import argparse
from trlib.policies.valuebased import EpsilonGreedy
from trlib.policies.qfunction import ZeroQ
from sklearn.ensemble.forest import ExtraTreesRegressor
from trlib.algorithms.reinforcement.fqi_driver import FQIDriver, DoubleFQIDriver
from trlib.environments.trackEnv import TrackEnv
from trlib.utilities.ActionDispatcher import *
from fqi.dataset_preprocessing import *
from fqi.fqi_evaluate import run_evaluation
"""from fqi.et_tuning import run_tuning"""
from fqi.utils import *
from fqi.reward_function import *
from fqi.sars_creator import *
sys.setrecursionlimit(3000)

ref_df = pd.read_csv('./trajectory/ref_traj.csv')
data_df = pd.read_csv('./trajectory/dataset.csv')

In [10]:
output_name = 'model_r_speed_50laps_pc'#'first_model'
reward_function = 'speed'

In [11]:
def run_experiment(track_file_name, rt_file_name, data_path, max_iterations, output_path, n_throttle,
               n_brake, n_steer, n_jobs, output_name, reward_function, delta_t,
               filter_actions, ad_type, tuning, kdt_norm, kdt_param, filt_a_outliers, double_fqi, evaluation):


    # build SARS
    if reward_function == 'speed':
        reward_function = Speed_projection(ref_df)
        sars_data = to_SARS(data_df, reward_function)
    elif reward_function == 'progress':
        sars_data = prepare_dataset(os.path.join('./trajectory/dataset.csv'),
                              os.path.join('./trajectory/ref_traj.csv'),
                              reward_function=reward_function, delta_t=delta_t)
    
    print('SARS prepared')
    nmin = 5

    # Create environment
    state_dim = len(state_cols)
    action_dim = len(action_cols)
    mdp = TrackEnv(state_dim, action_dim, 0.99999, 'continuous')

    # Parameters of ET regressor
    regressor_params = {'n_estimators': 100,
                        'criterion': 'mse',
                        'min_samples_split': 2,
                        'min_samples_leaf': nmin,
                        'n_jobs': n_jobs,
                        'random_state': 42}
    regressor = ExtraTreesRegressor

    if ad_type == 'fkdt':
        action_dispatcher = FixedKDTActionDispatcher
        alg_actions = sars_data[action_cols].values

    elif ad_type == 'rkdt':
        action_dispatcher = RadialKDTActionDispatcher
        alg_actions = sars_data[action_cols].values

    elif ad_type == 'discrete':
        action_dispatcher = ConstantActionDispatcher
        actions, sub_actions = create_action_combinations(sars_data, n_throttle, n_brake, n_steer, filter_actions)
        alg_actions = sub_actions
    else:
        action_dispatcher = None
        alg_actions = None

    # Create policy instance
    epsilon = 0  # no exploration
    pi = EpsilonGreedy([], ZeroQ(), epsilon)

    # Define the order of the columns to pass to the algorithm
    # state_prime_cols: colonne dello stato successivo
    cols = ['t'] + state_cols + action_cols + ['r'] + state_prime_cols + ['absorbing']
    # Define the masks used by the action dispatcher
    state_mask = [i for i, s in enumerate(state_cols) if s in knn_state_cols]
    data_mask = [i for i, c in enumerate(cols) if c in knn_state_cols]

    if double_fqi:
        fqi = DoubleFQIDriver
    else:
        fqi = FQIDriver

    algorithm = fqi(mdp=mdp,
                    policy=pi,
                    actions=alg_actions,
                    max_iterations=max_iterations,
                    regressor_type=regressor,
                    data=sars_data[cols].values,
                    action_dispatcher=action_dispatcher,
                    state_mask=state_mask,
                    data_mask=data_mask,
                    s_norm=kdt_norm,
                    filter_a_outliers=filt_a_outliers,
                    ad_n_jobs=n_jobs,
                    ad_param=kdt_param,
                    verbose=True,
                    **regressor_params)

    result = algorithm.step()
    

    # save algorithm object
    algorithm_name = output_name + '.pkl'
    with open(output_path + '/' + algorithm_name, 'wb') as output:
        pickle.dump(algorithm, output, pickle.HIGHEST_PROTOCOL)
    print('Saved algorithm object')
    
    # save policy object
    policy_name = 'policy_' + algorithm_name
    with open(output_path + '/' + policy_name, 'wb') as output:
        pickle.dump(algorithm._policy, output, pickle.HIGHEST_PROTOCOL)
    print('Saved policy object')

    # save action dispatcher object
    AD_name = 'AD_' + algorithm_name
    with open(output_path + '/' + AD_name, 'wb') as output:
        pickle.dump(algorithm._action_dispatcher, output, pickle.HIGHEST_PROTOCOL)
    print('Saved Action Dispatcher')
    
    

    if evaluation:

        print('*** Evaluation ***')
        run_evaluation(output_path+'/'+algorithm_name, track_file_name, data_path, n_jobs, output_path,
                       'eval_'+output_name, filter_actions,
                       output_path + '/' + AD_name)
        
    return algorithm

In [12]:
algo = run_experiment('dataset', 'ref_traj', './trajectory/', 100, './model_file/', 3,3,3, 10, output_name, 
               reward_function, 2, False, 'rkdt', False, False, 10, False, True, True)

SARS prepared
Step 1
Finding nearest actions for each state prime
Time for action list 0.8989112377166748
Time for action set 0.2554903030395508
Time for sprime a mat 3.3429853916168213
Iteration 0
fitQ 5.395434856414795
Elapsed time 5.395434856414795
Iteration 1
maxQ 6.660641670227051
fitQ 5.84182071685791
Elapsed time 12.502462387084961
Iteration 2
maxQ 6.250849723815918
fitQ 5.524359226226807
Elapsed time 11.775208950042725
Iteration 3
maxQ 6.030641794204712
fitQ 5.653618574142456
Elapsed time 11.684260368347168
Iteration 4
maxQ 6.404860019683838
fitQ 5.7829670906066895
Elapsed time 12.187827110290527
Iteration 5
maxQ 6.3289008140563965
fitQ 5.7648162841796875
Elapsed time 12.093717098236084
Iteration 6
maxQ 6.339550495147705
fitQ 5.805216312408447
Elapsed time 12.144766807556152
Iteration 7
maxQ 6.418385744094849
fitQ 5.735654830932617
Elapsed time 12.154040575027466
Iteration 8
maxQ 6.272672653198242
fitQ 5.7591872215271
Elapsed time 12.031859874725342
Iteration 9
maxQ 6.499958992

maxQ 6.112706184387207
fitQ 5.357100963592529
Elapsed time 11.469807147979736
Iteration 90
maxQ 6.2307446002960205
fitQ 5.487267732620239
Elapsed time 11.71801233291626
Iteration 91
maxQ 6.170833110809326
fitQ 5.409209251403809
Elapsed time 11.580042362213135
Iteration 92
maxQ 6.191063165664673
fitQ 5.316702365875244
Elapsed time 11.507765531539917
Iteration 93
maxQ 6.075950622558594
fitQ 5.666794061660767
Elapsed time 11.74274468421936
Iteration 94
maxQ 6.1401190757751465
fitQ 5.6621387004852295
Elapsed time 11.802257776260376
Iteration 95
maxQ 6.368204355239868
fitQ 5.645165681838989
Elapsed time 12.013370037078857
Iteration 96
maxQ 6.06588077545166
fitQ 5.612913370132446
Elapsed time 11.678794145584106
Iteration 97
maxQ 6.201336145401001
fitQ 5.7720677852630615
Elapsed time 11.973403930664062
Iteration 98
maxQ 6.16750168800354
fitQ 5.628684759140015
Elapsed time 11.796186447143555
Iteration 99
maxQ 6.066746711730957
fitQ 5.404998064041138
Elapsed time 11.471744775772095
Total elapse

In [1]:
with open('model_file/policy_first_model.pkl', 'rb') as pol:
    policy = pickle.load(pol)
with open('model_file/AD_first_model.pkl', 'rb') as adfm:
    action_disp = pickle.load(adfm)



In [14]:
pickle.format_version

'4.0'