Skip to content

Commit

Permalink
Merge branch 'master' into prithag
Browse files Browse the repository at this point in the history
  • Loading branch information
prithagupta committed Feb 15, 2018
2 parents 0ed1b13 + 9d1cfff commit c9246af
Show file tree
Hide file tree
Showing 3 changed files with 338 additions and 6 deletions.
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@ Introduction
-------------
CS-Rank is a Python package for context-sensitive ranking algorithms.

We implement the following new object ranking architectures:

* FATE (First aggregate then evaluate)
* FETA (First evaluate then aggregate)

In addition we also offer these benchmark algorithms:

* Expected Rank Regression
* RankNet
* RankSVM

Check out our [interactive notebooks](https://mybinder.org/v2/gh/kiudee/cs-ranking/master?filepath=notebooks) to quickly find out what our package can do.


Expand All @@ -15,13 +26,15 @@ As a simple "Hello World!"-example we will try to learn the Medoid problem:
```python
import csrank as cs
from csrank import SyntheticDatasetGenerator
gen = SyntheticDatasetGenerator(dataset_type='medoid')
gen = SyntheticDatasetGenerator(dataset_type='medoid',
n_objects=5,
n_features=2)
X_train, Y_train, X_test, Y_test = gen.get_single_train_test_split()
```
All our learning algorithms are implemented using the scikit-learn estimator API.
Fitting our FATE-Network algorithm is as simple as calling the `fit` method:
```python
fate = cs.FATEObjectRanker()
fate = FATEObjectRanker(n_object_features=2)
fate.fit(X_train, Y_train)
```
Predictions can then be obtained using:
Expand Down
36 changes: 32 additions & 4 deletions csrank/fate_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from csrank.dyadranking.contextual_ranking import ContextualRanker
from csrank.labelranking.label_ranker import LabelRanker
from csrank.layers import DeepSet
from csrank.losses import hinged_rank_loss
from csrank.losses import hinged_rank_loss, smooth_rank_loss
from csrank.metrics import zero_one_rank_loss_for_scores_ties, \
zero_one_rank_loss_for_scores
from csrank.objectranking.object_ranker import ObjectRanker
Expand Down Expand Up @@ -594,9 +594,37 @@ def set_tunable_parameter_ranges(cls, param_ranges_dict):

class FATEObjectRanker(FATEObjectRankingCore, ObjectRanker):

def __init__(self, loss_function=hinged_rank_loss, metrics=None,
def __init__(self, n_object_features,
n_hidden_set_layers=2,
n_hidden_set_units=32,
loss_function=smooth_rank_loss,
metrics=None,
**kwargs):
FATEObjectRankingCore.__init__(self, **kwargs)
""" Create a FATE-network architecture for object ranking.
Training complexity is quadratic in the number of objects and
prediction complexity is only linear.
Parameters
----------
n_object_features : int
Dimensionality of the feature space of each object
n_hidden_set_layers : int
Number of hidden layers for the context representation
n_hidden_set_units : int
Number of hidden units in each layer of the context representation
loss_function : function
Differentiable loss function for the score vector
metrics : list
List of evaluation metrics (can be non-differentiable)
**kwargs
Keyword arguments for the hidden units
"""
FATEObjectRankingCore.__init__(self,
n_object_features=n_object_features,
n_hidden_set_layers=n_hidden_set_layers,
n_hidden_set_units=n_hidden_set_units,
**kwargs)
self.loss_function = loss_function
self.logger = logging.getLogger(GENERAL_OBJECT_RANKER)
if metrics is None:
Expand Down Expand Up @@ -647,7 +675,7 @@ def predict(self, X, **kwargs):
class FATELabelRanker(FATERankingCore, LabelRanker):
def __init__(self, loss_function=hinged_rank_loss, metrics=None,
**kwargs):
FATEObjectRankingCore.__init__(self, label_ranker=True, **kwargs)
super().__init__(self, label_ranker=True, **kwargs)
self.loss_function = loss_function
self.logger = logging.getLogger(GENERAL_LABEL_RANKER)
if metrics is None:
Expand Down
291 changes: 291 additions & 0 deletions notebooks/GeneralizationOfExperiments.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/prithag/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n",
"Using TensorFlow backend.\n",
"/home/prithag/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6\n",
" return f(*args, **kwds)\n"
]
}
],
"source": [
"import inspect\n",
"import os\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"from docopt import docopt\n",
"from csrank.fate_ranking import FATEObjectRanker\n",
"from csrank.objectranking.feta_ranker import FETANetwork\n",
"from csrank.callbacks import DebugOutput\n",
"from csrank.metrics import zero_one_rank_loss_for_scores\n",
"from csrank.util import rename_file_if_exist, configure_logging_numpy_keras, get_tensor_value\n",
"from csrank.dataset_reader import SyntheticDatasetGenerator"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Defining the Constants"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"MODEL = \"aModel\"\n",
"ERROR_OUTPUT_STRING = 'Out of sample error {} : {} for n_objects {}'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Generate the Medoid sythentic dataset for defined number of objects"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def generate_dataset(n_objects=5, random_state=42):\n",
" parameters = {\"n_features\": 2, \"n_objects\": n_objects, \"n_train_instances\": 10000, \"n_test_instances\": 100000,\n",
" \"dataset_type\": \"medoid\",\"random_state\":random_state}\n",
" generator = SyntheticDatasetGenerator(**parameters)\n",
" return generator.get_single_train_test_split()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fit the given ranker and predict on rankings with different sizes and check the zero one rank loss for them"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_evaluation_result(gor, X_train, Y_train, epochs):\n",
" gor.fit(X_train, Y_train, log_callbacks=[DebugOutput(delta=10)], verbose=False, epochs=epochs)\n",
" eval_results = {}\n",
" for n_objects in np.arange(3, 15):\n",
" _, _, X_test, Y_test = generate_dataset(n_objects=n_objects, random_state=seed + n_objects * 5)\n",
" y_pred_scores = gor.predict_scores(X_test, batch_size=X_test.shape[0])\n",
" metric_loss = get_tensor_value(zero_one_rank_loss_for_scores(Y_test, y_pred_scores))\n",
" logger.info(ERROR_OUTPUT_STRING.format(\"zero_one_rank_loss\", str(np.mean(metric_loss)), n_objects))\n",
" eval_results[n_objects] = metric_loss\n",
" return eval_results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Initialize the log file path and the dataframe path."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"n_objects = 5\n",
"dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n",
"log_path = os.path.join(dirname, \"logs\", \"generalizing_mean_{}.log\".format(n_objects))\n",
"df_path = os.path.join(dirname, \"logs\", \"generalizing_mean_{}.csv\".format(n_objects))\n",
"random_state = np.random.RandomState(seed=42)\n",
"seed = random_state.randint(2 ** 32)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Initialize tensorflow and keras with the seed and initialize the log file path"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rows_list = []\n",
"logger = configure_logging_numpy_keras(seed=seed, log_path=log_path)\n",
"\n",
"X_train, Y_train, _, _ = generate_dataset(n_objects=n_objects, random_state=seed)\n",
"n_instances, n_objects, n_features = X_train.shape\n",
"\n",
"epochs = 50\n",
"params = {\"n_objects\": n_objects, \n",
" \"n_features\": n_features, \n",
" \"n_object_features\": n_features, \n",
" \"use_early_stopping\": True, \n",
" \"metrics\":[zero_one_rank_loss_for_scores]}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Evaluate the FETANetwork with best parameters and check the generalization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"logger.info(\"############################# With Best Parameters FETA ##############################\")\n",
"best_point = [1, 16, 4.2054947998521569e-05, 2.6263496065703243e-10, 777]\n",
"gor = FETANetwork(**params)\n",
"gor.set_tunable_parameter_ranges({})\n",
"gor.set_tunable_parameters(best_point)\n",
"result = get_evaluation_result(gor, X_train, Y_train, epochs)\n",
"result[MODEL] = \"FETARanker\"\n",
"rows_list.append(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Evaluate the FATEObjectRanker with best parameters and check the generalization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from csrank.losses import smooth_rank_loss\n",
"logger.info(\"############################# With Best Parameters FATE ##############################\")\n",
"best_point = [1003, 0.0002908115170179143, 16, 132, 6, 247, 3.4195015492773324e-05]\n",
"gor = FATEObjectRanker(**params)\n",
"gor.set_tunable_parameter_ranges({})\n",
"gor.set_tunable_parameters(best_point)\n",
"result = get_evaluation_result(gor, X_train, Y_train, epochs)\n",
"result[MODEL] = \"FATERanker\"\n",
"rows_list.append(result)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame(rows_list)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cols = list(df.columns.values)\n",
"cols = cols[-7:] + cols[:-7]\n",
"MODEL = \"aModel\"\n",
"for x in ['Unnamed: 0', 'aModel']:\n",
" if x in cols:\n",
" cols.remove(x)\n",
" cols.insert(0, x)\n",
"df = df[cols]\n",
"#del df['Unnamed: 0']\n",
"df = df.set_index(MODEL).T"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.to_csv(df_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit c9246af

Please sign in to comment.