# Parameter Optimizer 

In [1]:
import warnings
warnings.filterwarnings('ignore')
from csrank.dataset_reader import SyntheticDatasetGenerator
from sklearn.utils import check_random_state
from sklearn.model_selection import ShuffleSplit
from csrank.fate_ranking import FATEObjectRanker
from csrank.callbacks import DebugOutput
from csrank.tuning import ParameterOptimizer
import logging
from csrank.util import configure_logging_numpy_keras
import numpy as np
from keras import backend as K
import os
from skopt import load

Using TensorFlow backend.


## Load the current optimizer and check if some iterations are already done and start after that

Function to convert the duration in string to the time required in microseconds

In [2]:
def get_optimizer(logger, optimizer_path, n_iter):
    logger.info('Retrieving model stored at: {}'.format(optimizer_path))
    try:
        optimizer = load(optimizer_path)
        logger.info('Loading model stored at: {}'.format(optimizer_path))

    except KeyError:
        logger.error('Cannot open the file {}'.format(optimizer_path))
        optimizer = None

    except ValueError:
        logger.error('Cannot open the file {}'.format(optimizer_path))
        optimizer = None
    except FileNotFoundError:
        logger.error('No such file or directory: {}'.format(optimizer_path))
        optimizer = None
    if optimizer is not None:
        finished_iterations = np.array(optimizer.yi).shape[0]
        if finished_iterations == 0:
            optimizer = None
            logger.info('Optimizer did not finish any iterations so setting optimizer to null')
        else:
            n_iter = n_iter - finished_iterations
            if n_iter < 0:
                n_iter = 0
            logger.info('Iterations already done: {} and running iterations {}'.format(finished_iterations, n_iter))
    return optimizer, n_iter

## Defining constants for the experiments

Configuring the keras and tensorflow. Defining the parameters for dataset reader. Defining the splits for the optimizer.

In [3]:
logger = configure_logging_numpy_keras(log_path=os.path.join(os.getcwd(), 'logs' ,"test_models.log"), name='Experiment')
n_features = 2
n_instances = 10000
n_objects = 5
random_state = check_random_state(42)
skf = ShuffleSplit(n_splits=2, test_size=0.5, random_state=random_state)
epochs = 100
optimizer_path = os.path.join(os.getcwd(), 'logs',"optimizer")
n_iter = 5
optimizer, n_iter = get_optimizer(logger, optimizer_path, n_iter)

In [4]:
import re
def get_duration_microsecond(duration):
    time = int(re.findall(r'\d+', duration)[0])
    d = duration.split(str(time))[1].upper()
    options = {"D": 24 * 60 * 60 * 1e6, "H": 60 * 60 * 1e6, "M": 60 * 1e6}
    return options[d] * time

Defining Ranker initializing and fitting parameters.

In [5]:
ranker_params = {'n_objects': n_objects,
                 'n_features':n_features, 
                 'n_object_features':n_features}

fit_params = {'epochs': epochs,
              'log_callbacks':[DebugOutput()]}

Defining the Parameter optimizer initializing and fitting parameters

In [6]:
optimizer_fit_params = {'n_iter': n_iter, 
                        'cv_iter': skf, 
                        'optimizer': optimizer, 
                        "total_duration":get_duration_microsecond("10h")}

optimizer_params = {'ranker_class': FATEObjectRanker, 
                    'fit_params': fit_params,
                    'ranker_params': ranker_params,
                    'random_state': random_state, 
                    "optimizer_path":optimizer_path}

Generating the medoid test and train dataset with defined parameters

In [7]:
medoids_params = {'dataset_type':"medoid",
                  'n_test_instances': n_instances,
                  'n_train_instances': n_instances,
                  'n_features': n_features,
                  'n_objects': n_objects,
                  'random_state': random_state}
dr = SyntheticDatasetGenerator(**medoids_params)
X,Y,X_test,Y_test = dr.get_single_train_test_split()

Fitting the optimizer

In [8]:
optimizer_model = ParameterOptimizer(**optimizer_params)
optimizer_model.fit(X, Y, **optimizer_fit_params)

Evaluate the Ranker with best parameters found by the optimizer on the test dataset

In [11]:
if optimizer_model.model is None:
    optimizer_model.model = optimizer_model._ranker_class(random_state=optimizer_model.random_state,
                                                          **optimizer_model._ranker_params)
    best_point = optimizer_model.optimizer.Xi[np.argmin(optimizer_model.optimizer.yi)]
    optimizer_model.model.set_tunable_parameters(best_point)
    logger.info(optimizer_model.model.__dict__)
    optimizer_model.model.fit(X, Y, **optimizer_model._fit_params)

Predict Scores for the best model

In [13]:
predicted = optimizer_model.predict_scores(X_test)
predicted

array([[-0.60119748, -3.00899553, -1.15418839, -1.89158905,  3.21277809],
       [-2.29909468, -2.76785064, -3.73014832, -0.86105549, -4.87870646],
       [-1.33258545, -3.09852505, -3.92024136, -0.89974248, -3.95369244],
       ..., 
       [-3.43628454, -0.66553152, -2.91205454, -1.99111378, -1.79274499],
       [ 2.20938802,  1.50373054, -0.01428777, -1.63791025,  3.09338284],
       [-2.47337866, -4.68289852, -0.53654063, -1.74233592, -6.30659676]], dtype=float32)