Skip to content

Commit

Permalink
Cleaned up generalization and parameter optimizer notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
prithagupta committed Feb 15, 2018
1 parent 4c572cb commit 61f7b90
Show file tree
Hide file tree
Showing 8 changed files with 1,439 additions and 146 deletions.
2 changes: 1 addition & 1 deletion csrank/tuning.py
Expand Up @@ -179,13 +179,13 @@ def splitter_dict(itr_dict):
))
if "use_early_stopping" in self._ranker_params:
self._ranker_class._use_early_stopping = self._ranker_params["use_early_stopping"]
param_ranges = self._ranker_class.set_tunable_parameter_ranges(parameters_ranges)

if (optimizer is not None):
opt = optimizer
self.logger.debug('Setting the provided optimizer')
self.log_best_params(opt)
else:
param_ranges = self._ranker_class.set_tunable_parameter_ranges(parameters_ranges)
transformed = []
for param in param_ranges:
transformed.append(check_dimension(param))
Expand Down
30 changes: 3 additions & 27 deletions experiments/experiment_cv.py
Expand Up @@ -40,7 +40,7 @@
get_loss_for_array)
from experiments.util import get_ranker_and_dataset_functions, get_ranker_parameters, ERROR_OUTPUT_STRING, \
lp_metric_dict, get_duration_microsecond, get_applicable_ranker_dataset, get_dataset_str, \
log_test_train_data
log_test_train_data, get_optimizer

DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))

Expand Down Expand Up @@ -97,32 +97,8 @@
optimizer_path = os.path.join(DIR_PATH, OPTIMIZER_FOLDER,
(FILE_FORMAT).format(dataset_str, ranker_name, cluster_index))
create_dir_recursively(optimizer_path, True)
logger.info('Retrieving model stored at: {}'.format(optimizer_path))
try:
optimizer = load(optimizer_path)
logger.info('Loading model stored at: {}'.format(optimizer_path))

except KeyError:
logger.error('Cannot open the file {}'.format(optimizer_path))
optimizer = None

except ValueError:
logger.error('Cannot open the file {}'.format(optimizer_path))
optimizer = None
except FileNotFoundError:
logger.error('No such file or directory: {}'.format(optimizer_path))
optimizer = None
if optimizer is not None:
finished_iterations = np.array(optimizer.yi).shape[0]
if finished_iterations == 0:
optimizer = None
logger.info('Optimizer did not finish any iterations so setting optimizer to null')
else:
n_iter = n_iter - finished_iterations
if n_iter < 0:
n_iter = 0
logger.info(
'Iterations already done: {} and running iterations {}'.format(finished_iterations, n_iter))

optimizer, n_iter = get_optimizer(logger, optimizer_path, n_iter)

optimizer_fit_params = {'n_iter': n_iter, 'cv_iter': inner_cv, 'optimizer': optimizer,
"parameters_ranges": parameter_ranges, 'acq_func': 'EIps'}
Expand Down
30 changes: 3 additions & 27 deletions experiments/experiment_script.py
Expand Up @@ -28,14 +28,14 @@
import pandas as pd
from docopt import docopt
from sklearn.model_selection import ShuffleSplit
from skopt import load

from csrank.tuning import ParameterOptimizer
from csrank.util import create_dir_recursively, configure_logging_numpy_keras, \
duration_tillnow, microsec_to_time, get_mean_loss_for_dictionary, \
get_loss_for_array
from experiments.util import get_ranker_and_dataset_functions, get_ranker_parameters, ERROR_OUTPUT_STRING, \
lp_metric_dict, get_duration_microsecond, get_applicable_ranker_dataset, get_dataset_str, log_test_train_data
lp_metric_dict, get_duration_microsecond, get_applicable_ranker_dataset, get_dataset_str, log_test_train_data, \
get_optimizer

DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))

Expand Down Expand Up @@ -96,31 +96,7 @@
create_dir_recursively(optimizer_path, True)
create_dir_recursively(pred_file, is_file_path=True)

logger.info('Retrieving model stored at: {}'.format(optimizer_path))
try:
optimizer = load(optimizer_path)
logger.info('Loading model stored at: {}'.format(optimizer_path))

except KeyError:
logger.error('Cannot open the file {}'.format(optimizer_path))
optimizer = None

except ValueError:
logger.error('Cannot open the file {}'.format(optimizer_path))
optimizer = None
except FileNotFoundError:
logger.error('No such file or directory: {}'.format(optimizer_path))
optimizer = None
if optimizer is not None:
finished_iterations = np.array(optimizer.yi).shape[0]
if finished_iterations == 0:
optimizer = None
logger.info('Optimizer did not finish any iterations so setting optimizer to null')
else:
n_iter = n_iter - finished_iterations
if n_iter < 0:
n_iter = 0
logger.info('Iterations already done: {} and running iterations {}'.format(finished_iterations, n_iter))
optimizer, n_iter = get_optimizer(logger, optimizer_path, n_iter)
if not (n_iter == 0 and os.path.isfile(pred_file)):
optimizer_fit_params = {'n_iter': n_iter, 'cv_iter': cv, 'optimizer': optimizer,
"parameters_ranges": parameter_ranges, 'acq_func': 'EIps'}
Expand Down
31 changes: 31 additions & 0 deletions experiments/util.py
@@ -1,9 +1,11 @@
import re
from collections import OrderedDict

import numpy as np
from keras.losses import categorical_crossentropy
from keras.metrics import categorical_accuracy
from keras.optimizers import SGD
from skopt import load

from csrank.callbacks import DebugOutput, LRScheduler
from csrank.constants import OBJECT_RANKING, LABEL_RANKING, DYAD_RANKING, DISCRETE_CHOICE, BATCH_SIZE, LEARNING_RATE, \
Expand Down Expand Up @@ -185,3 +187,32 @@ def log_test_train_data(X_train, X_test, logger):
n_instances, n_objects, n_features = X_train.shape
logger.info("Train Set instances {} objects {} features {}".format(n_instances, n_objects, n_features))
return n_features, n_objects


def get_optimizer(logger, optimizer_path, n_iter):
logger.info('Retrieving model stored at: {}'.format(optimizer_path))
try:
optimizer = load(optimizer_path)
logger.info('Loading model stored at: {}'.format(optimizer_path))

except KeyError:
logger.error('Cannot open the file {}'.format(optimizer_path))
optimizer = None

except ValueError:
logger.error('Cannot open the file {}'.format(optimizer_path))
optimizer = None
except FileNotFoundError:
logger.error('No such file or directory: {}'.format(optimizer_path))
optimizer = None
if optimizer is not None:
finished_iterations = np.array(optimizer.yi).shape[0]
if finished_iterations == 0:
optimizer = None
logger.info('Optimizer did not finish any iterations so setting optimizer to null')
else:
n_iter = n_iter - finished_iterations
if n_iter < 0:
n_iter = 0
logger.info('Iterations already done: {} and running iterations {}'.format(finished_iterations, n_iter))
return optimizer, n_iter

0 comments on commit 61f7b90

Please sign in to comment.