openml · ArlindKadra · Sep 30, 2019 · Sep 9, 2019 · Sep 9, 2019 · Sep 10, 2019
diff --git a/examples/20_basic/README.txt b/examples/20_basic/README.txt
@@ -0,0 +1,4 @@
+Introductory Examples
+=====================
+
+Introductory examples to the usage of the OpenML python connector.
diff --git a/examples/introduction_tutorial.py → examples/20_basic/introduction_tutorial.py b/examples/introduction_tutorial.py → examples/20_basic/introduction_tutorial.py
@@ -1,8 +1,8 @@
 """
-Introduction
-============
+Setup
+=====
 
-An introduction to OpenML, followed up by a simple example.
+An example how to set up OpenML-Python followed up by a simple example.
 """
 ############################################################################
 # OpenML is an online collaboration platform for machine learning which allows

diff --git a/examples/20_basic/simple_datasets_tutorial.py b/examples/20_basic/simple_datasets_tutorial.py
@@ -0,0 +1,29 @@
+"""
+========
+Datasets
+========
+
+A basic tutorial on how to list and download datasets.
+"""
+############################################################################
+import openml
+
+############################################################################
+# List datasets
+# =============
+
+datasets_df = openml.datasets.list_datasets(output_format='dataframe')
+print(datasets_df.head(n=10))
+
+############################################################################
+# Download a dataset
+# ==================
+
+first_dataset_id = int(datasets_df['did'].iloc[0])
+dataset = openml.datasets.get_dataset(first_dataset_id)
+
+# Print a summary
+print("This is dataset '%s', the target feature is '%s'" %
+      (dataset.name, dataset.default_target_attribute))
+print("URL: %s" % dataset.url)
+print(dataset.description[:500])
diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py
@@ -0,0 +1,48 @@
+"""
+Flows and Runs
+==============
+
+A simple tutorial on how to train/run a model and how to upload the results.
+"""
+
+import openml
+from sklearn import ensemble, neighbors
+
+############################################################################
+# Train a machine learning model
+# ==============================
+#
+# .. warning:: This example uploads data. For that reason, this example
+#   connects to the test server at test.openml.org. This prevents the main
+#   server from crowding with example datasets, tasks, runs, and so on.
+
+openml.config.start_using_configuration_for_example()
+
+# NOTE: We are using dataset 20 from the test server: https://test.openml.org/d/20
+dataset = openml.datasets.get_dataset(20)
+X, y, categorical_indicator, attribute_names = dataset.get_data(
+    dataset_format='array',
+    target=dataset.default_target_attribute
+)
+clf = neighbors.KNeighborsClassifier(n_neighbors=3)
+clf.fit(X, y)
+
+############################################################################
+# Running a model on a task
+# =========================
+
+task = openml.tasks.get_task(119)
+clf = ensemble.RandomForestClassifier()
+run = openml.runs.run_model_on_task(clf, task)
+print(run)
+
+############################################################################
+# Publishing the run
+# ==================
+
+myrun = run.publish()
+print("Run was uploaded to http://test.openml.org/r/" + str(myrun.run_id))
+print("The flow can be found at http://test.openml.org/f/" + str(myrun.flow_id))
+
+############################################################################
+openml.config.stop_using_configuration_for_example()
diff --git a/examples/20_basic/simple_studies_tutorial.py b/examples/20_basic/simple_studies_tutorial.py
@@ -0,0 +1,7 @@
+"""
+=======
+Studies
+=======
+
+This is only a placeholder so far.
+"""
diff --git a/examples/30_extended/README.txt b/examples/30_extended/README.txt
@@ -0,0 +1,4 @@
+In-Depth Examples
+=================
+
+Extended examples for the usage of the OpenML python connector.
diff --git a/examples/create_upload_tutorial.py → ...les/30_extended/create_upload_tutorial.py b/examples/create_upload_tutorial.py → ...les/30_extended/create_upload_tutorial.py
diff --git a/examples/datasets_tutorial.py → examples/30_extended/datasets_tutorial.py b/examples/datasets_tutorial.py → examples/30_extended/datasets_tutorial.py
@@ -14,6 +14,7 @@
 # **********
 #
 # * List datasets
+#
 #   * Use the output_format parameter to select output type
 #   * Default gives 'dict' (other option: 'dataframe')
 

diff --git a/examples/fetch_evaluations_tutorial.py → ...30_extended/fetch_evaluations_tutorial.py b/examples/fetch_evaluations_tutorial.py → ...30_extended/fetch_evaluations_tutorial.py
diff --git a/examples/flows_and_runs_tutorial.py → ...es/30_extended/flows_and_runs_tutorial.py b/examples/flows_and_runs_tutorial.py → ...es/30_extended/flows_and_runs_tutorial.py
@@ -132,7 +132,7 @@
 # The run may be stored offline, and the flow will be stored along with it:
 run.to_filesystem(directory='myrun')
 
-# They made later be loaded and uploaded
+# They may be loaded and uploaded at a later time
 run = openml.runs.OpenMLRun.from_filesystem(directory='myrun')
 run.publish()
 

diff --git a/examples/run_setup_tutorial.py → examples/30_extended/run_setup_tutorial.py b/examples/run_setup_tutorial.py → examples/30_extended/run_setup_tutorial.py
diff --git a/examples/tasks_tutorial.py → examples/30_extended/tasks_tutorial.py b/examples/tasks_tutorial.py → examples/30_extended/tasks_tutorial.py
@@ -13,15 +13,14 @@
 # Tasks are identified by IDs and can be accessed in two different ways:
 #
 # 1. In a list providing basic information on all tasks available on OpenML.
-# This function will not download the actual tasks, but will instead download
-# meta data that can be used to filter the tasks and retrieve a set of IDs.
-# We can filter this list, for example, we can only list tasks having a
-# special tag or only tasks for a specific target such as
-# *supervised classification*.
-#
+#    This function will not download the actual tasks, but will instead download
+#    meta data that can be used to filter the tasks and retrieve a set of IDs.
+#    We can filter this list, for example, we can only list tasks having a
+#    special tag or only tasks for a specific target such as
+#    *supervised classification*.
 # 2. A single task by its ID. It contains all meta information, the target
-# metric, the splits and an iterator which can be used to access the
-# splits in a useful manner.
+#    metric, the splits and an iterator which can be used to access the
+#    splits in a useful manner.
 
 ############################################################################
 # Listing tasks
@@ -148,20 +147,19 @@
 #
 # You can also create new tasks. Take the following into account:
 #
-# * You can only create tasks on _active_ datasets
+# * You can only create tasks on *active* datasets
 # * For now, only the following tasks are supported: classification, regression,
-# clustering, and learning curve analysis.
+#   clustering, and learning curve analysis.
 # * For now, tasks can only be created on a single dataset.
 # * The exact same task must not already exist.
 #
 # Creating a task requires the following input:
 #
 # * task_type_id: The task type ID, required (see below). Required.
 # * dataset_id: The dataset ID. Required.
-# * target_name: The name of the attribute you aim to predict.
-# Optional.
+# * target_name: The name of the attribute you aim to predict. Optional.
 # * estimation_procedure_id : The ID of the estimation procedure used to create train-test
-# splits. Optional.
+#   splits. Optional.
 # * evaluation_measure: The name of the evaluation measure. Optional.
 # * Any additional inputs for specific tasks
 #
@@ -178,7 +176,7 @@
 #
 # Let's create a classification task on a dataset. In this example we will do this on the
 # Iris dataset (ID=128 (on test server)). We'll use 10-fold cross-validation (ID=1),
-# and _predictive accuracy_ as the predefined measure (this can also be left open).
+# and *predictive accuracy* as the predefined measure (this can also be left open).
 # If a task with these parameters exist, we will get an appropriate exception.
 # If such a task doesn't exist, a task will be created and the corresponding task_id
 # will be returned.
@@ -212,8 +210,7 @@
 
 
 ############################################################################
-# [Complete list of task types](https://www.openml.org/search?type=task_type)
-# [Complete list of model estimation procedures](
-# https://www.openml.org/search?q=%2520measure_type%3Aestimation_procedure&type=measure)
-# [Complete list of evaluation measures](
-# https://www.openml.org/search?q=measure_type%3Aevaluation_measure&type=measure)
+# * `Complete list of task types <https://www.openml.org/search?type=task_type>`_.
+# * `Complete list of model estimation procedures <https://www.openml.org/search?q=%2520measure_type%3Aestimation_procedure&type=measure>`_.
+# * `Complete list of evaluation measures <https://www.openml.org/search?q=measure_type%3Aevaluation_measure&type=measure>`_.
+#
diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py
@@ -0,0 +1,18 @@
+"""
+Feurer et al. (2015)
+====================
+
+A tutorial on how to get the datasets used in the paper introducing *Auto-sklearn* by Feurer et al..
+
+Auto-sklearn website: https://automl.github.io/auto-sklearn/master/
+
+Publication
+~~~~~~~~~~~
+
+| Efficient and Robust Automated Machine Learning
+| Matthias Feurer, Aaron Klein, Katharina Eggensperger, Jost Springenberg, Manuel Blum and Frank Hutter
+| In *Advances in Neural Information Processing Systems 28*, 2015
+| Available at http://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning.pdf
+
+This is currently a placeholder.
+"""
diff --git a/examples/40_paper/2018_ida_strang_example.py b/examples/40_paper/2018_ida_strang_example.py
@@ -0,0 +1,17 @@
+"""
+Strang et al. (2018)
+====================
+
+A tutorial on how to reproduce the analysis conducted for *Don't Rule Out Simple Models
+Prematurely: A Large Scale Benchmark Comparing Linear and Non-linear Classifiers in OpenML*.
+
+Publication
+~~~~~~~~~~~
+
+| Don't Rule Out Simple Models Prematurely: A Large Scale Benchmark Comparing Linear and Non-linear Classifiers in OpenML
+| Benjamin Strang, Pieter Putten, Jan van Rijn and Frank Hutter
+| In *Advances in Intelligent Data Analysis XVII 17th International Symposium*, 2018
+| Available at https://link.springer.com/chapter/10.1007%2F978-3-030-01768-2_25
+
+This is currently a placeholder.
+"""
diff --git a/examples/40_paper/2018_kdd_rijn_example.py b/examples/40_paper/2018_kdd_rijn_example.py
@@ -0,0 +1,16 @@
+"""
+van Rijn and Hutter (2018)
+==========================
+
+A tutorial on how to reproduce the paper *Hyperparameter Importance Across Datasets*.
+
+Publication
+~~~~~~~~~~~
+
+| Hyperparameter importance across datasets
+| Jan van Rijn and Frank Hutter
+| In *Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining*, 2018
+| Available at https://dl.acm.org/citation.cfm?id=3220058
+
+This is currently a placeholder.
+"""
diff --git a/examples/40_paper/2018_neurips_fusi_example.py b/examples/40_paper/2018_neurips_fusi_example.py
@@ -0,0 +1,17 @@
+"""
+Fusi et al. (2018)
+==================
+
+A tutorial on how to get the datasets used in the paper introducing *Probabilistic Matrix
+Factorization for Automated Machine Learning* by Fusi et al..
+
+Publication
+~~~~~~~~~~~
+
+| Probabilistic Matrix Factorization for Automated Machine Learning
+| Nicolo Fusi and Rishit Sheth and Melih Elibol
+| In *Advances in Neural Information Processing Systems 31*, 2018
+| Available at http://papers.nips.cc/paper/7595-probabilistic-matrix-factorization-for-automated-machine-learning.pdf
+
+This is currently a placeholder.
+"""
diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py
@@ -0,0 +1,17 @@
+"""
+Perrone et al. (2018)
+=====================
+
+A tutorial on how to build a surrogate model based on OpenML data as done for *Scalable
+Hyperparameter Transfer Learning* by Perrone et al..
+
+Publication
+~~~~~~~~~~~
+
+| Scalable Hyperparameter Transfer Learning
+| Valerio Perrone and Rodolphe Jenatton and Matthias Seeger and Cedric Archambeau
+| In *Advances in Neural Information Processing Systems 31*, 2018
+| Available at http://papers.nips.cc/paper/7917-scalable-hyperparameter-transfer-learning.pdf
+
+This is currently a placeholder.
+"""
diff --git a/examples/40_paper/README.txt b/examples/40_paper/README.txt
@@ -0,0 +1,5 @@
+Usage in research papers
+========================
+
+These examples demonstrate how OpenML-Python can be used for research purposes by re-implementing
+its use in recent publications.
diff --git a/examples/README.txt b/examples/README.txt
@@ -1,4 +1,3 @@
-Introductory Examples
-=====================
-
-General examples for OpenML usage.
+========
+Examples
+========
diff --git a/examples/sklearn/README.txt b/examples/sklearn/README.txt
diff --git a/examples/sklearn/openml_run_example.py b/examples/sklearn/openml_run_example.py