In [1]:
import inspect
import os

import numpy as np
import pandas as pd
from docopt import docopt
from csrank.fate_ranking import FATEObjectRanker
from csrank.objectranking.feta_ranker import FETANetwork
from csrank.callbacks import DebugOutput
from csrank.metrics import zero_one_rank_loss_for_scores
from csrank.util import rename_file_if_exist, configure_logging_numpy_keras, get_tensor_value
from csrank.dataset_reader import SyntheticDatasetGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


Defining the Constants

In [2]:
MODEL = "aModel"
ERROR_OUTPUT_STRING = 'Out of sample error {} : {} for n_objects {}'

Generate the Medoid sythentic dataset for defined number of objects

In [3]:
def generate_dataset(n_objects=5, random_state=42):
    parameters = {"n_features": 2, "n_objects": n_objects, "n_train_instances": 10000, "n_test_instances": 100000,
                 "dataset_type": "medoid","random_state":random_state}
    generator = SyntheticDatasetGenerator(**parameters)
    return generator.get_single_train_test_split()

Fit the given ranker and predict on rankings with different sizes and check the zero one rank loss for them

In [4]:
def get_evaluation_result(gor, X_train, Y_train, epochs):
    gor.fit(X_train, Y_train, log_callbacks=[DebugOutput(delta=10)], verbose=False, epochs=epochs)
    eval_results = {}
    for n_objects in np.arange(3, 15):
        _, _, X_test, Y_test = generate_dataset(n_objects=n_objects, random_state=seed + n_objects * 5)
        y_pred_scores = gor.predict_scores(X_test, batch_size=X_test.shape[0])
        metric_loss = get_tensor_value(zero_one_rank_loss_for_scores(Y_test, y_pred_scores))
        logger.info(ERROR_OUTPUT_STRING.format("zero_one_rank_loss", str(np.mean(metric_loss)), n_objects))
        eval_results[n_objects] = metric_loss
    return eval_results

Initialize the log file path and the dataframe path.

In [5]:
n_objects = 5
dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
log_path = os.path.join(dirname, "logs", "generalizing_mean_{}.log".format(n_objects))
df_path = os.path.join(dirname, "logs", "generalizing_mean_{}.csv".format(n_objects))
random_state = np.random.RandomState(seed=42)
seed = random_state.randint(2 ** 32)

Initialize tensorflow and keras with the seed and initialize the log file path

In [6]:
rows_list = []
logger = configure_logging_numpy_keras(seed=seed, log_path=log_path)

X_train, Y_train, _, _ = generate_dataset(n_objects=n_objects, random_state=seed)
n_instances, n_objects, n_features = X_train.shape

epochs = 50
params = {"n_objects": n_objects, 
          "n_features": n_features, 
          "n_object_features": n_features, 
          "use_early_stopping": True, 
          "metrics":[zero_one_rank_loss_for_scores]}

Evaluate the FETANetwork with best parameters and check the generalization

In [None]:
logger.info("############################# With Best Parameters FETA ##############################")
best_point =  [1, 16, 4.2054947998521569e-05, 2.6263496065703243e-10, 777]
gor = FETANetwork(**params)
gor.set_tunable_parameter_ranges({})
gor.set_tunable_parameters(best_point)
result = get_evaluation_result(gor, X_train, Y_train, epochs)
result[MODEL] = "FETARanker"
rows_list.append(result)

Evaluate the FATEObjectRanker with best parameters and check the generalization

In [None]:
from csrank.losses import smooth_rank_loss
logger.info("############################# With Best Parameters FATE ##############################")
best_point =   [1003, 0.0002908115170179143, 16, 132, 6, 247, 3.4195015492773324e-05]
gor = FATEObjectRanker(**params)
gor.set_tunable_parameter_ranges({})
gor.set_tunable_parameters(best_point)
result = get_evaluation_result(gor, X_train, Y_train, epochs)
result[MODEL] = "FATERanker"
rows_list.append(result)

In [None]:
df = pd.DataFrame(rows_list)
df

In [None]:
cols = list(df.columns.values)
cols = cols[-7:] + cols[:-7]
MODEL = "aModel"
for x in ['Unnamed: 0', 'aModel']:
    if x in cols:
        cols.remove(x)
        cols.insert(0, x)
df = df[cols]
#del df['Unnamed: 0']
df = df.set_index(MODEL).T

In [None]:
df.to_csv(df_path)

In [None]:
df