In [3]:
import os
import numpy as np
from csrank.fate_ranking import FATERankingCore
from csrank.objectranking.rank_net import RankNet
from csrank.losses import smooth_rank_loss
from keras.optimizers import SGD
from csrank.util import *
from csrank.constants import OBJECT_RANKING
from experiments.util import lp_metric_dict
from csrank.dataset_reader import DepthDatasetReader
import time
from csrank.callbacks import LRScheduler, DebugOutput, EarlyStoppingWithWeights
from keras.callbacks import History
from collections import OrderedDict
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns; 
sns.set(color_codes=True)
plt.style.use("seaborn-dark-palette")
sns.set_style("dark")

# Test it with the general ranker

In [4]:
ERROR_OUTPUT_STRING = 'Out of sample error %s : %0.4f'
log_file = os.path.join(os.getcwd(),"logs","logs.log")
logger = configure_logging_numpy_keras(log_path=log_file, name="GeneralRankerTest")
dtype = "basicSaxena"
logger.info("DatasetType {}".format(dtype))

In [5]:
dataset_reader = DepthDatasetReader(dataset_type=dtype)

FileNotFoundError: [Errno 2] No such file or directory: '/home/prithag/git/cs-ranking/csrank/datasets/depth_data/saxena_basic61x55.dat'

In [4]:
ranking_metrics = OrderedDict({'KendallsTau': kendalls_mean_np, 'SpearmanCorrelation': spearman_mean_np,
     'ZeroOneRankLoss': zero_one_rank_loss_for_scores_np,
     'ZeroOneRankLossTies': zero_one_rank_loss_for_scores_ties_np, "ZeroOneAccuracy": zero_one_accuracy_np})

In [5]:
ranking_metrics.items()

odict_items([('KendallsTau', <function kendalls_mean_np at 0x7f32fbe82378>), ('SpearmanCorrelation', <function spearman_mean_np at 0x7f32fbe822f0>), ('ZeroOneRankLoss', <function zero_one_rank_loss_for_scores_np at 0x7f32fbe82510>), ('ZeroOneRankLossTies', <function zero_one_rank_loss_for_scores_ties_np at 0x7f32fbe82488>), ('ZeroOneAccuracy', <function zero_one_accuracy_np at 0x7f32fbe82400>)])

In [6]:
X_train, Y_train, X_test, Y_test = dataset_reader.get_single_train_test_split()

In [7]:
X_train.shape

(132022, 5, 9)

In [8]:
n_hidden_set_units = 512 
n_hidden_set_layers = 10
n_hidden_joint_units = 512
n_hidden_joint_layers = 10
lr = LRScheduler()
do = DebugOutput(delta=10)
hi = History()
hi.__name__ = "History"
n_hidden = 5
n_units = 50
rf = 3.2548513387780192e-05

In [None]:
time_before = time.time()
gor = GeneralObjectRanker(n_object_features=X_train.shape[2],
                          n_hidden_set_units=n_hidden_set_units,
                          n_hidden_set_layers=n_hidden_set_layers,
                          n_hidden_joint_units=n_hidden_joint_units,
                          n_hidden_joint_layers=n_hidden_joint_layers,
                          batch_size=1024,
                          optimizer=SGD(lr=1e-4, momentum=0.9, nesterov=True),
                          loss_function=smooth_rank_loss)
gor.fit(X_train, Y_train, verbose=False, epochs=50, validation_split=0.1, log_callbacks=[lr,do,hi])
time_after=time.time()

In [9]:
time_before = time.time()
rn = RankNet(n_features=X_train.shape[2],
                          n_hidden=n_hidden,
                          n_units=n_units,
                          batch_size=2048,
                          optimizer=SGD(lr=1e-4, momentum=0.9, nesterov=True))
rn.fit(X_train, Y_train, verbose=False, epochs=10, validation_split=0.1, log_callbacks=[lr,do,hi])
time_after=time.time()

In [10]:
hi.history["binary_accuracy"]

[0.60402643330388361,
 0.68875136972055595,
 0.57266381529084687,
 0.62460717826299572,
 0.71015352660891196,
 0.72908555645097362,
 0.74928925986581318,
 0.75560723044104539,
 0.76233927342537489,
 0.76602047807438944]

In [11]:
do.epoch

10

In [12]:
#rn.scoring_model = rn._create_scoring_model()
y_pred_scores = rn.predict_scores(X_test, batch_size=X_test.shape[0])

In [17]:
pred_file = os.path.join(os.getcwd(), "logs", "dataset_type_{}_depth_ranknet.h5".format(dtype))
f = h5py.File(pred_file, 'r')
y_pred_scores = np.array(f['scores'])
f.close()


In [18]:
from bordarank.util import get_loss_for_array

def eval_predictions(Y_test, y_pred_scores, ranking_metrics):
    for name, evaluation_metric in ranking_metrics.items():
        if isinstance(Y_test, dict):
            metric_loss = get_mean_loss_for_dictionary(logger, evaluation_metric, Y_test, y_pred_scores)
        else:
            metric_loss = get_loss_for_array(evaluation_metric, Y_test, y_pred_scores)
        logger.info(ERROR_OUTPUT_STRING % (name, metric_loss))
eval_predictions(Y_test, y_pred_scores, ranking_metrics)

MemoryError: 

In [14]:
import h5py
pred_file = os.path.join(os.getcwd(), "logs", "dataset_type_{}_depth_ranknet.h5".format(dtype))
f = h5py.File(pred_file, 'w')
f.create_dataset('scores', data=y_pred_scores)
f.close()

time_before = time.time()
gor = GeneralObjectRanker(n_object_features=X_train[2].shape[2],
                          n_hidden_set_units=512,
                          n_hidden_set_layers=10,
                          n_hidden_joint_units=512,
                          n_hidden_joint_layers=10,
                          batch_size=512,
                          optimizer=SGD(lr=5e-3, momentum=0.9, nesterov=True))
gor.fit(X_train[5], Y_train[5], verbose=True, epochs=1000)
time_after=time.time()

y_pred_scores = gor.predict_scores(X_test)

eval_predictions(y_pred_scores, ranking_metrics)

(time_after - time_before)/60 

i = 0
m = ["o", "^", "x"]
c = ["b", "k", "r"]
fig =  plt.figure()
for name, evaluation_metric in ranking_metrics.items():
    logger.info("Name {}".format(name))
    losses, total_instances = get_losses_for_dictionary(logger, evaluation_metric, Y_test, y_pred_scores)
    losses = dict(zip(keys, values))
    x = np.array(list(losses.keys()))
    y = np.array(list(losses.values()))
    plt.plot(x,y,label=name,color=c[i],marker=m[i])
    i = i+1
plt.title("GeneralRankerPerformance")
plt.legend(loc="best");
plt.show()