Merge 95476c8 into 229d5dd

kiudee · Jan 14, 2020 · b6e76d5 · b6e76d5
2 parents 229d5dd + 95476c8
commit b6e76d5
Show file tree

Hide file tree

Showing 6 changed files with 427 additions and 10 deletions.
diff --git a/csrank/dataset_reader/labelranking/survey_dataset_reader.py b/csrank/dataset_reader/labelranking/survey_dataset_reader.py
@@ -3,13 +3,15 @@
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import ShuffleSplit
-from sklearn.preprocessing import Imputer, StandardScaler
+from sklearn.preprocessing import StandardScaler
 from sklearn.utils import check_random_state
-
+#
 from csrank.constants import LABEL_RANKING
 from csrank.numpy_util import ranking_ordering_conversion
 from ..dataset_reader import DatasetReader
 
+from sklearn.impute import SimpleImputer
+
 
 class SurveyDatasetReader(DatasetReader):
     def __init__(self, random_state=None, **kwargs):
@@ -28,7 +30,7 @@ def __load_dataset__(self):
             context_feature = [float(i) if i != '.' else np.NAN for i in row[13:33]]
             features.append(context_feature)
         X = np.array(features)
-        X = Imputer().fit_transform(X)
+        X = SimpleImputer().fit_transform(X)
         X = np.array([np.log(np.array(X[:, i]) + 1) for i in range(len(features[0]))])
         X = np.array(X.T)
         self.X = StandardScaler().fit_transform(X)

diff --git a/csrank/experiments/constants.py b/csrank/experiments/constants.py
@@ -27,6 +27,7 @@
 FATELINEAR_RANKER = "fatelinear_ranker"
 FETALINEAR_RANKER = "fetalinear_ranker"
 RANDOM_RANKER = "random_ranker"
+LAMBDAMART = "lambdamart"
 
 FETA_CHOICE = 'feta_choice'
 FETALINEAR_CHOICE = "fetalinear_choice"
@@ -59,4 +60,4 @@
 DCMS = [FETA_DC, FATE_DC, RANKNET_DC, MNL, NLM, GEV, PCL, MLM, RANKSVM_DC, FATELINEAR_DC, FETALINEAR_DC, RANDOM_DC]
 CHOICE_FUNCTIONS = [FETA_CHOICE, FATE_CHOICE, RANKNET_CHOICE, RANKSVM_CHOICE, GLM_CHOICE, RANDOM_CHOICE,
                     FATELINEAR_CHOICE, FETALINEAR_CHOICE]
-OBJECT_RANKERS = [FATE_RANKER, FETA_RANKER, FATELINEAR_RANKER, FETALINEAR_RANKER, RANKSVM, ERR, RANKNET, LISTNET, RANDOM_RANKER]
+OBJECT_RANKERS = [FATE_RANKER, FETA_RANKER, FATELINEAR_RANKER, FETALINEAR_RANKER, RANKSVM, ERR, RANKNET, LISTNET, RANDOM_RANKER, LAMBDAMART]
diff --git a/csrank/metrics.py b/csrank/metrics.py
@@ -47,14 +47,13 @@
 import numpy as np
 import tensorflow as tf
 from keras import backend as K
+import math
 
 from csrank.tensorflow_util import scores_to_rankings, get_instances_objects, tensorify
 
-__all__ = ['zero_one_rank_loss', 'zero_one_rank_loss_for_scores',
-           'zero_one_rank_loss_for_scores_ties',
-           'make_ndcg_at_k_loss', 'kendalls_tau_for_scores',
-           'spearman_correlation_for_scores', "zero_one_accuracy",
-           "zero_one_accuracy_for_scores", "topk_categorical_accuracy"]
+__all__ = ['zero_one_rank_loss', 'zero_one_rank_loss_for_scores', 'zero_one_rank_loss_for_scores_ties',
+           'make_ndcg_at_k_loss', 'kendalls_tau_for_scores', 'spearman_correlation_for_scores', "zero_one_accuracy",
+           "zero_one_accuracy_for_scores", "topk_categorical_accuracy", "point_dcg", "dcg", "ndcg"]
 
 
 def zero_one_rank_loss(y_true, y_pred):
@@ -331,3 +330,38 @@ def err(y_true, y_pred, utility_function=None, probability_mapping=None):
     results = tf.reduce_sum(discounted_document_values, axis=1)
 
     return K.mean(results)
+
+def point_dcg(args):
+    """
+        Point DCG calculation function. Calculates the DCG for a given list. This list is assumed to be consisting of the rankings of documents belonging to the same query 
+    """
+    pos, label = args
+    return (2 ** label - 1) / math.log(pos + 2, 2)
+
+def dcg(preds):
+    """
+        List DCG calculation function. This function turns the list of rankings into a form which is easier to be passed to the point DCG function
+    """
+    return sum(map(point_dcg, enumerate(preds)))
+
+def ndcg(preds, k=10):
+    """
+        NDCG calculation function that calculates the NDCG values with the help of the DCG calculation helper functions.
+    """
+    ideal_top = preds[:k]
+
+    true_top = np.array([])
+    if len(preds) > 10:
+        true_top = np.partition(preds, -10)[-k:]
+        true_top.sort()
+    else:
+        true_top = np.sort(preds)
+    true_top = true_top[::-1]
+
+    max_dcg = dcg(true_top)
+    ideal_dcg = dcg(ideal_top)
+
+    if max_dcg == 0:
+        return 1
+
+    return ideal_dcg / max_dcg
diff --git a/csrank/objectranking/__init__.py b/csrank/objectranking/__init__.py
@@ -4,6 +4,7 @@
 from .fatelinear_object_ranker import FATELinearObjectRanker
 from .feta_object_ranker import FETAObjectRanker
 from .fetalinear_object_ranker import FETALinearObjectRanker
+from .lambdamart import LambdaMART
 from .list_net import ListNet
 from .rank_net import RankNet
 from .rank_svm import RankSVM