# Parameter Optimizer 

In [1]:
import warnings
warnings.filterwarnings('ignore')
from csrank.dataset_reader import SyntheticDatasetGenerator
from sklearn.utils import check_random_state
from sklearn.model_selection import ShuffleSplit
from csrank.callbacks import DebugOutput, LRScheduler
from csrank.tuning import ParameterOptimizer
import logging
from csrank.util import configure_logging_numpy_keras, get_duration_seconds
import numpy as np
from keras import backend as K
import os
from skopt import load
from csrank.objectranking import RankNet
import logging

Using TensorFlow backend.


## Defining constants for the experiments

Configuring the keras and tensorflow. Defining the parameters for dataset reader. Defining the splits for the optimizer.

In [2]:
configure_logging_numpy_keras(log_path=os.path.join(os.getcwd(), 'logs' ,"test_models.log"))
logger = logging.getLogger('Experiment')
n_features = 2
n_instances = 10000
n_objects = 5
random_state = check_random_state(42)
skf = ShuffleSplit(n_splits=2, test_size=0.5, random_state=random_state)
epochs = 5
optimizer_path = os.path.join(os.getcwd(), 'logs',"optimizer")
n_iter = 4

In [3]:
import re
def get_duration_microsecond(duration):
    time = int(re.findall(r'\d+', duration)[0])
    d = duration.split(str(time))[1].upper()
    options = {"D": 24 * 60 * 60 * 1e6, "H": 60 * 60 * 1e6, "M": 60 * 1e6}
    return options[d] * time

Defining Ranker initializing and fitting parameters.

In [4]:
ranker_params = {'n_objects': n_objects,
                 'n_features':n_features, 
                 'n_object_features':n_features}
ranker = RankNet(**ranker_params)
debugOutput = DebugOutput()
lrScheduler = LRScheduler()

fit_params = {'epochs': epochs,
              'callbacks':[debugOutput, lrScheduler]}

Defining the Parameter optimizer initializing and fitting parameters

In [5]:
optimizer_fit_params = {'n_iter': n_iter, 
                        'cv_iter': skf, 
                        "total_duration":get_duration_seconds("10h")}

ranker_tunables = dict(n_hidden=(2,20), n_units=(20,40), 
                        learning_rate=(1e-5, 1e-2, 'log-uniform'),
                        reg_strength=(1e-10, 1e-1, 'log-uniform'),
                        batch_size=(64, 1024))

lrScheduler_tunables = dict(epochs_drop=(300,600), drop=(1e-2, 1e-1, 'log-uniform'))

tunable_parameter_ranges = {ranker: ranker_tunables, lrScheduler: lrScheduler_tunables}

optimizer_params = {'learner': ranker,
                    'optimizer_path':optimizer_path,
                    'tunable_parameter_ranges': tunable_parameter_ranges,
                    'fit_params': fit_params,
                    'random_state': random_state}

Generating the medoid test and train dataset with defined parameters

In [6]:
medoids_params = {'dataset_type': "medoid",
                  'n_test_instances': n_instances,
                  'n_train_instances': n_instances,
                  'n_features': n_features,
                  'n_objects': n_objects,
                  'random_state': random_state}
dr = SyntheticDatasetGenerator(**medoids_params)
X, Y, X_test, Y_test = dr.get_single_train_test_split()

Fitting the optimizer

In [7]:
optimizer_model = ParameterOptimizer(**optimizer_params)
optimizer_model.fit(X, Y, **optimizer_fit_params)

Evaluate the Ranker with best parameters found by the optimizer on the test dataset
Predict Scores for the best model

In [8]:
predicted = optimizer_model.predict_scores(X_test)
predicted

array([[0.06708024, 0.64009005, 0.05916843, 0.41978246, 0.5680258 ],
       [0.61247456, 0.49110284, 0.51891345, 0.3321164 , 0.4347266 ],
       [0.55813664, 0.3096677 , 0.6031822 , 0.5784402 , 0.56980395],
       ...,
       [0.6519454 , 0.48743787, 0.6692742 , 0.49136415, 0.6077139 ],
       [0.07445394, 0.6299911 , 0.02071192, 0.35829696, 0.4195008 ],
       [0.5752736 , 0.06263375, 0.57080674, 0.28884593, 0.49449039]],
      dtype=float32)