# Parameter Optimizer 

In [1]:
import warnings
warnings.filterwarnings('ignore')
from csrank.dataset_reader import SyntheticDatasetGenerator
from sklearn.utils import check_random_state
from sklearn.model_selection import ShuffleSplit
from csrank.callbacks import DebugOutput, LRScheduler
from csrank.tuning import ParameterOptimizer
import logging
from csrank.util import configure_logging_numpy_keras
import numpy as np
from keras import backend as K
import os
from skopt import load
from csrank.objectranking import RankNet

Using TensorFlow backend.


## Load the current optimizer and check if some iterations are already done and start after that

Function to convert the duration in string to the time required in microseconds

In [2]:
def get_optimizer(logger, optimizer_path, n_iter):
    logger.info('Retrieving model stored at: {}'.format(optimizer_path))
    try:
        optimizer = load(optimizer_path)
        logger.info('Loading model stored at: {}'.format(optimizer_path))

    except KeyError:
        logger.error('Cannot open the file {}'.format(optimizer_path))
        optimizer = None

    except ValueError:
        logger.error('Cannot open the file {}'.format(optimizer_path))
        optimizer = None
    except FileNotFoundError:
        logger.error('No such file or directory: {}'.format(optimizer_path))
        optimizer = None
    if optimizer is not None:
        finished_iterations = np.array(optimizer.yi).shape[0]
        if finished_iterations == 0:
            optimizer = None
            logger.info('Optimizer did not finish any iterations so setting optimizer to null')
        else:
            n_iter = n_iter - finished_iterations
            if n_iter < 0:
                n_iter = 0
            logger.info('Iterations already done: {} and running iterations {}'.format(finished_iterations, n_iter))
    return optimizer, n_iter

## Defining constants for the experiments

Configuring the keras and tensorflow. Defining the parameters for dataset reader. Defining the splits for the optimizer.

In [3]:
logger = configure_logging_numpy_keras(log_path=os.path.join(os.getcwd(), 'logs' ,"test_models.log"), name='Experiment')
n_features = 2
n_instances = 10000
n_objects = 5
random_state = check_random_state(42)
skf = ShuffleSplit(n_splits=2, test_size=0.5, random_state=random_state)
epochs = 5
optimizer_path = os.path.join(os.getcwd(), 'logs',"optimizer")
n_iter = 4
optimizer, n_iter = get_optimizer(logger, optimizer_path, n_iter)

In [4]:
import re
def get_duration_microsecond(duration):
    time = int(re.findall(r'\d+', duration)[0])
    d = duration.split(str(time))[1].upper()
    options = {"D": 24 * 60 * 60 * 1e6, "H": 60 * 60 * 1e6, "M": 60 * 1e6}
    return options[d] * time

Defining Ranker initializing and fitting parameters.

In [5]:
ranker_params = {'n_objects': n_objects,
                 'n_features':n_features, 
                 'n_object_features':n_features}
ranker = RankNet(**ranker_params)
debugOutput = DebugOutput()
lrScheduler = LRScheduler(initial_lr=K.get_value(ranker.optimizer.lr))

fit_params = {'epochs': epochs,
              'callbacks':[debugOutput, lrScheduler]}

Defining the Parameter optimizer initializing and fitting parameters

In [6]:
optimizer_fit_params = {'n_iter': n_iter, 
                        'cv_iter': skf, 
                        'optimizer': optimizer, 
                        "total_duration":get_duration_microsecond("10h")}

ranker_tunables = dict(n_hidden=(2,20), n_units=(20,40), 
                        learning_rate=(1e-5, 1e-2, 'log-uniform'),
                        reg_strength=(1e-10, 1e-1, 'log-uniform'),
                        batch_size=(64, 1024))

lrScheduler_tunables = dict(epochs_drop=(300,600), drop=(1e-2, 1e-1, 'log-uniform'))

tunable_parameter_ranges = {ranker: ranker_tunables, lrScheduler: lrScheduler_tunables}

optimizer_params = {'ranker': ranker,
                    'tunable_parameter_ranges': tunable_parameter_ranges,
                    'fit_params': fit_params,
                    'random_state': random_state, 
                    "optimizer_path": optimizer_path}

Generating the medoid test and train dataset with defined parameters

In [7]:
medoids_params = {'dataset_type': "medoid",
                  'n_test_instances': n_instances,
                  'n_train_instances': n_instances,
                  'n_features': n_features,
                  'n_objects': n_objects,
                  'random_state': random_state}
dr = SyntheticDatasetGenerator(**medoids_params)
X, Y, X_test, Y_test = dr.get_single_train_test_split()

Fitting the optimizer

In [8]:
optimizer_model = ParameterOptimizer(**optimizer_params)
optimizer_model.fit(X, Y, **optimizer_fit_params)

Evaluate the Ranker with best parameters found by the optimizer on the test dataset
Predict Scores for the best model

In [9]:
predicted = optimizer_model.predict_scores(X_test)
predicted

array([[0.34003744, 0.40530878, 0.31550464, 0.63641626, 0.63163394],
       [0.40763062, 0.48692006, 0.355434  , 0.33623344, 0.4165258 ],
       [0.49906975, 0.3731012 , 0.430352  , 0.5236497 , 0.4388348 ],
       ...,
       [0.50546443, 0.534902  , 0.4652732 , 0.4417104 , 0.4568018 ],
       [0.38909936, 0.51279926, 0.34461373, 0.51644784, 0.6094    ],
       [0.4558252 , 0.31391865, 0.62172145, 0.6636104 , 0.5680445 ]],
      dtype=float32)