# Parameter Optimizer 

In [1]:
import warnings
warnings.filterwarnings('ignore')
from csrank.dataset_reader import SyntheticDatasetGenerator
from sklearn.utils import check_random_state
from sklearn.model_selection import ShuffleSplit
from csrank.callbacks import DebugOutput, LRScheduler
from csrank.tuning import ParameterOptimizer
import logging
from csrank.util import configure_logging_numpy_keras, get_duration_seconds
import numpy as np
from keras import backend as K
import os
from skopt import load
from csrank.objectranking import RankNet

Using TensorFlow backend.


## Defining constants for the experiments

Configuring the keras and tensorflow. Defining the parameters for dataset reader. Defining the splits for the optimizer.

In [2]:
logger = configure_logging_numpy_keras(log_path=os.path.join(os.getcwd(), 'logs' ,"test_models.log"), name='Experiment')
n_features = 2
n_instances = 10000
n_objects = 5
random_state = check_random_state(42)
skf = ShuffleSplit(n_splits=2, test_size=0.5, random_state=random_state)
epochs = 5
optimizer_path = os.path.join(os.getcwd(), 'logs',"optimizer")
n_iter = 4

In [3]:
import re
def get_duration_microsecond(duration):
    time = int(re.findall(r'\d+', duration)[0])
    d = duration.split(str(time))[1].upper()
    options = {"D": 24 * 60 * 60 * 1e6, "H": 60 * 60 * 1e6, "M": 60 * 1e6}
    return options[d] * time

Defining Ranker initializing and fitting parameters.

In [4]:
ranker_params = {'n_objects': n_objects,
                 'n_features':n_features, 
                 'n_object_features':n_features}
ranker = RankNet(**ranker_params)
debugOutput = DebugOutput()
lrScheduler = LRScheduler()

fit_params = {'epochs': epochs,
              'callbacks':[debugOutput, lrScheduler]}

Defining the Parameter optimizer initializing and fitting parameters

In [5]:
optimizer_fit_params = {'n_iter': n_iter, 
                        'cv_iter': skf, 
                        "total_duration":get_duration_seconds("10h")}

ranker_tunables = dict(n_hidden=(2,20), n_units=(20,40), 
                        learning_rate=(1e-5, 1e-2, 'log-uniform'),
                        reg_strength=(1e-10, 1e-1, 'log-uniform'),
                        batch_size=(64, 1024))

lrScheduler_tunables = dict(epochs_drop=(300,600), drop=(1e-2, 1e-1, 'log-uniform'))

tunable_parameter_ranges = {ranker: ranker_tunables, lrScheduler: lrScheduler_tunables}

optimizer_params = {'learner': ranker,
                    'optimizer_path':optimizer_path,
                    'tunable_parameter_ranges': tunable_parameter_ranges,
                    'fit_params': fit_params,
                    'random_state': random_state}

NameError: name 'get_duration_second' is not defined

Generating the medoid test and train dataset with defined parameters

In [8]:
medoids_params = {'dataset_type': "medoid",
                  'n_test_instances': n_instances,
                  'n_train_instances': n_instances,
                  'n_features': n_features,
                  'n_objects': n_objects,
                  'random_state': random_state}
dr = SyntheticDatasetGenerator(**medoids_params)
X, Y, X_test, Y_test = dr.get_single_train_test_split()

In [13]:
import json
#del medoids_params['random_state']
medoids_params['a_d'] = {'a':2, 'b':3, 'D':4}
o = json.dumps(medoids_params)
o

'{"dataset_type": "medoid", "n_test_instances": 10000, "n_train_instances": 10000, "n_features": 2, "n_objects": 5, "a_d": {"a": 2, "b": 3, "D": 4}}'

In [25]:
d = {"epochs": 5, "callbacks": {"debugoutput": {"delta": 200}, "lrScheduler": {"epochs_drop": 200, "drop": 0.1}}}
json.loads(json.dumps(d))["callbacks"]

{'debugoutput': {'delta': 200},
 'lrScheduler': {'drop': 0.1, 'epochs_drop': 200}}

Fitting the optimizer

In [10]:
optimizer_model = ParameterOptimizer(**optimizer_params)
optimizer_model.fit(X, Y, **optimizer_fit_params)

TypeError: __init__() missing 1 required positional argument: 'learner'

Evaluate the Ranker with best parameters found by the optimizer on the test dataset
Predict Scores for the best model

In [9]:
predicted = optimizer_model.predict_scores(X_test)
predicted

array([[0.34003744, 0.40530878, 0.31550464, 0.63641626, 0.63163394],
       [0.40763062, 0.48692006, 0.355434  , 0.33623344, 0.4165258 ],
       [0.49906975, 0.3731012 , 0.430352  , 0.5236497 , 0.4388348 ],
       ...,
       [0.50546443, 0.534902  , 0.4652732 , 0.4417104 , 0.4568018 ],
       [0.38909936, 0.51279926, 0.34461373, 0.51644784, 0.6094    ],
       [0.4558252 , 0.31391865, 0.62172145, 0.6636104 , 0.5680445 ]],
      dtype=float32)