In [1]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from csrank import *
from csrank.util import setup_logging
from csrank.tensorflow_util import configure_numpy_keras
import os
import logging
from csrank.callbacks import DebugOutput

Using TensorFlow backend.


In [2]:
configure_numpy_keras(seed=42)
setup_logging(log_path=os.path.join(os.getcwd() ,"letor.log"))
logger = logging.getLogger('Experiment')
n_objects = 5
letor = LetorRankingChoiceDatasetReader(n_objects=n_objects, year=2007)
X_train, Y_train, X_test, Y_test = letor.get_single_train_test_split()

In [3]:
X_train.shape, X_test.shape, 

((14076, 5, 46), (3472, 5, 46))

In [4]:
from csrank.metrics_np import *
from csrank.tensorflow_util import configure_numpy_keras, get_mean_loss_for_dictionary, get_loss_for_array
def eval_l(learner):
    choice_metrics = {'F1Score': f1_measure, 'Precision': precision, 'Recall': recall,
                      'Subset01loss': subset_01_loss, 'HammingLoss': hamming, 'Informedness': instance_informedness,
                      "AucScore": auc_score, "AveragePrecisionScore": average_precision}
    ERROR_OUTPUT_STRING = 'Out of sample error %s : %0.4f'
    metrics_on_predictions = [f1_measure, precision, recall, subset_01_loss, hamming, instance_informedness]
    s_pred = leaner.predict_scores(X_test)
    y_pred = leaner.predict_for_scores(s_pred)
    for name, evaluation_metric in choice_metrics.items():
        predictions = s_pred
        if evaluation_metric in metrics_on_predictions:
            logger.info("Metric on predictions")
            predictions = y_pred
        if isinstance(Y_test, dict):
            metric_loss = get_mean_loss_for_dictionary(evaluation_metric, Y_test, predictions)
        else:
            metric_loss = get_loss_for_array(evaluation_metric, Y_test, predictions)
        logger.info(ERROR_OUTPUT_STRING % (name, metric_loss))

In [5]:
leaner = PairwiseSVMChoiceFunction(n_object_features=X_train.shape[-1])
leaner.fit(X_train, Y_train)
eval_l(leaner)

In [6]:
leaner = RankNetChoiceFunction(n_object_features=X_train.shape[-1])
leaner.fit(X_train, Y_train)
eval_l(leaner)

In [21]:
leaner = FETAChoiceFunction(add_zeroth_order_model=True,n_object_features=X_train.shape[-1], n_objects=n_objects, epochs=100)
leaner.fit(X_train, Y_train)
eval_l(leaner)

In [8]:
params = {'n_hidden_set_units': 8, 'n_hidden_set_layers': 1, 'n_hidden_joint_units': 64, 'n_hidden_joint_layers': 3, 
          'reg_strength': 4.247757035900819e-05, 'learning_rate': 0.003563146920998131, 'batch_size': 89}
leaner = FATEChoiceFunction(n_object_features=X_train.shape[-1], **params)
leaner.fit(X_train, Y_train, epochs=10)
eval_l(leaner)

In [23]:
import pymc3 as pm
params = {"diff": "absolute","tolerance": 0.01, "every":50}
fit_params = {"sampler":"vi","sample_params":{"tune":2,"draws":2,"chains":2,"njobs":4},
              "vi_params":{"n":20000,"method":"advi", "callbacks":[pm.callbacks.CheckParametersConvergence(**params)],
              }, "draws":500}
fit_params['random_seed'] = 42
learner_params = {"n_objects": X_train.shape[-2], "n_object_features": X_train.shape[-1], 
                  'loss_function': 'categorical_crossentropy', 'regularization': 'l2', 'n_nests': 5}

In [24]:
leaner = GeneralizedLinearModel(**learner_params)
leaner.fit(X_train, Y_train, **fit_params)
eval_l(leaner)

Only 2 samples in chain.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 4 jobs)
NUTS: [intercept, weights, weights_sd, weights_mu]
Sampling 2 chains: 100%|██████████| 8/8 [00:00<00:00, 20.47draws/s]
The chain contains only diverging samples. The model is probably misspecified.
Average Loss = 19.252:  31%|███▏      | 6295/20000 [00:43<01:34, 145.64it/s]
Convergence achieved at 6300
Interrupted at 6,299 [31%]: Average Loss = 55.613
