-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
338 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,291 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/home/prithag/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", | ||
" from ._conv import register_converters as _register_converters\n", | ||
"Using TensorFlow backend.\n", | ||
"/home/prithag/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6\n", | ||
" return f(*args, **kwds)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import inspect\n", | ||
"import os\n", | ||
"\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"from docopt import docopt\n", | ||
"from csrank.fate_ranking import FATEObjectRanker\n", | ||
"from csrank.objectranking.feta_ranker import FETANetwork\n", | ||
"from csrank.callbacks import DebugOutput\n", | ||
"from csrank.metrics import zero_one_rank_loss_for_scores\n", | ||
"from csrank.util import rename_file_if_exist, configure_logging_numpy_keras, get_tensor_value\n", | ||
"from csrank.dataset_reader import SyntheticDatasetGenerator" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Defining the Constants" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"MODEL = \"aModel\"\n", | ||
"ERROR_OUTPUT_STRING = 'Out of sample error {} : {} for n_objects {}'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Generate the Medoid sythentic dataset for defined number of objects" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def generate_dataset(n_objects=5, random_state=42):\n", | ||
" parameters = {\"n_features\": 2, \"n_objects\": n_objects, \"n_train_instances\": 10000, \"n_test_instances\": 100000,\n", | ||
" \"dataset_type\": \"medoid\",\"random_state\":random_state}\n", | ||
" generator = SyntheticDatasetGenerator(**parameters)\n", | ||
" return generator.get_single_train_test_split()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Fit the given ranker and predict on rankings with different sizes and check the zero one rank loss for them" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def get_evaluation_result(gor, X_train, Y_train, epochs):\n", | ||
" gor.fit(X_train, Y_train, log_callbacks=[DebugOutput(delta=10)], verbose=False, epochs=epochs)\n", | ||
" eval_results = {}\n", | ||
" for n_objects in np.arange(3, 15):\n", | ||
" _, _, X_test, Y_test = generate_dataset(n_objects=n_objects, random_state=seed + n_objects * 5)\n", | ||
" y_pred_scores = gor.predict_scores(X_test, batch_size=X_test.shape[0])\n", | ||
" metric_loss = get_tensor_value(zero_one_rank_loss_for_scores(Y_test, y_pred_scores))\n", | ||
" logger.info(ERROR_OUTPUT_STRING.format(\"zero_one_rank_loss\", str(np.mean(metric_loss)), n_objects))\n", | ||
" eval_results[n_objects] = metric_loss\n", | ||
" return eval_results" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Initialize the log file path and the dataframe path." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"n_objects = 5\n", | ||
"dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n", | ||
"log_path = os.path.join(dirname, \"logs\", \"generalizing_mean_{}.log\".format(n_objects))\n", | ||
"df_path = os.path.join(dirname, \"logs\", \"generalizing_mean_{}.csv\".format(n_objects))\n", | ||
"random_state = np.random.RandomState(seed=42)\n", | ||
"seed = random_state.randint(2 ** 32)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Initialize tensorflow and keras with the seed and initialize the log file path" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"rows_list = []\n", | ||
"logger = configure_logging_numpy_keras(seed=seed, log_path=log_path)\n", | ||
"\n", | ||
"X_train, Y_train, _, _ = generate_dataset(n_objects=n_objects, random_state=seed)\n", | ||
"n_instances, n_objects, n_features = X_train.shape\n", | ||
"\n", | ||
"epochs = 50\n", | ||
"params = {\"n_objects\": n_objects, \n", | ||
" \"n_features\": n_features, \n", | ||
" \"n_object_features\": n_features, \n", | ||
" \"use_early_stopping\": True, \n", | ||
" \"metrics\":[zero_one_rank_loss_for_scores]}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Evaluate the FETANetwork with best parameters and check the generalization" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"logger.info(\"############################# With Best Parameters FETA ##############################\")\n", | ||
"best_point = [1, 16, 4.2054947998521569e-05, 2.6263496065703243e-10, 777]\n", | ||
"gor = FETANetwork(**params)\n", | ||
"gor.set_tunable_parameter_ranges({})\n", | ||
"gor.set_tunable_parameters(best_point)\n", | ||
"result = get_evaluation_result(gor, X_train, Y_train, epochs)\n", | ||
"result[MODEL] = \"FETARanker\"\n", | ||
"rows_list.append(result)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Evaluate the FATEObjectRanker with best parameters and check the generalization" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from csrank.losses import smooth_rank_loss\n", | ||
"logger.info(\"############################# With Best Parameters FATE ##############################\")\n", | ||
"best_point = [1003, 0.0002908115170179143, 16, 132, 6, 247, 3.4195015492773324e-05]\n", | ||
"gor = FATEObjectRanker(**params)\n", | ||
"gor.set_tunable_parameter_ranges({})\n", | ||
"gor.set_tunable_parameters(best_point)\n", | ||
"result = get_evaluation_result(gor, X_train, Y_train, epochs)\n", | ||
"result[MODEL] = \"FATERanker\"\n", | ||
"rows_list.append(result)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"df = pd.DataFrame(rows_list)\n", | ||
"df" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"cols = list(df.columns.values)\n", | ||
"cols = cols[-7:] + cols[:-7]\n", | ||
"MODEL = \"aModel\"\n", | ||
"for x in ['Unnamed: 0', 'aModel']:\n", | ||
" if x in cols:\n", | ||
" cols.remove(x)\n", | ||
" cols.insert(0, x)\n", | ||
"df = df[cols]\n", | ||
"#del df['Unnamed: 0']\n", | ||
"df = df.set_index(MODEL).T" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"df.to_csv(df_path)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"df" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |