diff --git a/README.md b/README.md
index b8171abd..35fe8de4 100644
--- a/README.md
+++ b/README.md
@@ -191,9 +191,9 @@ predictions = pipeline.predict(X_test)
 
 ## Step-by-step walk-through
 
-A step-by-step walk-through is available on our interactive notebook hosted on [Google Colab](https://colab.research.google.com/drive/1Idzht9dNoB85pjc9gOL24t9ksrXZEA-9?usp=sharing).
+A step-by-step walk-through is available on our documentation hosted on [Read the Docs](https://hiclass.readthedocs.io/en/latest/index.html).
 
-This will guide you through the process of installing hiclass with conda, training and predicting a small dataset.
+This will guide you through the process of installing hiclass within a virtual environment, training, predicting, persisting models and much more.
 
 ## API documentation
 
diff --git a/docs/examples/README.rst b/docs/examples/README.rst
new file mode 100644
index 00000000..90ce2230
--- /dev/null
+++ b/docs/examples/README.rst
@@ -0,0 +1,4 @@
+Gallery of Examples
+===================
+
+These examples illustrate the main features of HiClass.
\ No newline at end of file
diff --git a/docs/source/algorithms/selecting_training_policy.rst b/docs/examples/plot_binary_policies.py
similarity index 56%
rename from docs/source/algorithms/selecting_training_policy.rst
rename to docs/examples/plot_binary_policies.py
index d17a2732..e8f06c88 100644
--- a/docs/source/algorithms/selecting_training_policy.rst
+++ b/docs/examples/plot_binary_policies.py
@@ -1,4 +1,7 @@
-Selecting a training policy
+# -*- coding: utf-8 -*-
+"""
+===========================
+Binary Training Policies
 ===========================
 
 The siblings policy is used by default on the local classifier per node, but the remaining ones can be selected with the parameter :literal:`binary_policy`, for example:
@@ -40,3 +43,36 @@
 
         rf = RandomForestClassifier()
         classifier = LocalClassifierPerNode(local_classifier=rf, binary_policy="exclusive_siblings")
+
+In the code below, the inclusive policy is selected.
+However, the code can be easily updated by replacing lines 20-21 with the examples shown in the tabs above.
+
+.. seealso::
+
+   Mathematical definition on the different policies is given at :ref:`Training Policies`.
+"""
+from sklearn.ensemble import RandomForestClassifier
+
+from hiclass import LocalClassifierPerNode
+
+# Define data
+X_train = [[1], [2], [3], [4]]
+X_test = [[4], [3], [2], [1]]
+Y_train = [
+    ["Animal", "Mammal", "Sheep"],
+    ["Animal", "Mammal", "Cow"],
+    ["Animal", "Reptile", "Snake"],
+    ["Animal", "Reptile", "Lizard"],
+]
+
+# Use random forest classifiers for every node
+# And exclusive siblings policy to select training examples for binary classifiers.
+rf = RandomForestClassifier()
+classifier = LocalClassifierPerNode(local_classifier=rf, binary_policy="inclusive")
+
+# Train local classifier per node
+classifier.fit(X_train, Y_train)
+
+# Predict
+predictions = classifier.predict(X_test)
+print(predictions)
diff --git a/docs/examples/plot_hello_hiclass.py b/docs/examples/plot_hello_hiclass.py
new file mode 100644
index 00000000..1dbbe0e9
--- /dev/null
+++ b/docs/examples/plot_hello_hiclass.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+"""
+=====================
+Hello HiClass
+=====================
+
+A minimalist example showing how to use HiClass to train and predict.
+"""
+from sklearn.ensemble import RandomForestClassifier
+
+from hiclass import LocalClassifierPerNode
+
+# Define data
+X_train = [[1], [2], [3], [4]]
+X_test = [[4], [3], [2], [1]]
+Y_train = [
+    ["Animal", "Mammal", "Sheep"],
+    ["Animal", "Mammal", "Cow"],
+    ["Animal", "Reptile", "Snake"],
+    ["Animal", "Reptile", "Lizard"],
+]
+
+# Use random forest classifiers for every node
+rf = RandomForestClassifier()
+classifier = LocalClassifierPerNode(local_classifier=rf)
+
+# Train local classifier per node
+classifier.fit(X_train, Y_train)
+
+# Predict
+predictions = classifier.predict(X_test)
+print(predictions)
diff --git a/docs/examples/plot_model_persistence.py b/docs/examples/plot_model_persistence.py
new file mode 100644
index 00000000..57cc4a02
--- /dev/null
+++ b/docs/examples/plot_model_persistence.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+"""
+=====================
+Model Persistence
+=====================
+
+HiClass is fully compatible with Pickle.
+Pickle can be used to easily store machine learning models on disk.
+In this example, we demonstrate how to use pickle to store and load trained classifiers.
+"""
+import pickle
+
+from sklearn.linear_model import LogisticRegression
+
+from hiclass import LocalClassifierPerLevel
+
+# Define data
+X_train = [[1, 2], [3, 4], [5, 6], [7, 8]]
+X_test = [[7, 8], [5, 6], [3, 4], [1, 2]]
+Y_train = [
+    ["Animal", "Mammal", "Sheep"],
+    ["Animal", "Mammal", "Cow"],
+    ["Animal", "Reptile", "Snake"],
+    ["Animal", "Reptile", "Lizard"],
+]
+
+# Use Logistic Regression classifiers for every level in the hierarchy
+lr = LogisticRegression()
+classifier = LocalClassifierPerLevel(local_classifier=lr)
+
+# Train local classifier per level
+classifier.fit(X_train, Y_train)
+
+# Save the model to disk
+filename = "trained_model.sav"
+pickle.dump(classifier, open(filename, "wb"))
+
+# Some time in the future...
+
+# Load the model from disk
+loaded_model = pickle.load(open(filename, "rb"))
+
+# Predict
+predictions = loaded_model.predict(X_test)
+print(predictions)
diff --git a/docs/examples/plot_parallel_training.py b/docs/examples/plot_parallel_training.py
new file mode 100644
index 00000000..e90712af
--- /dev/null
+++ b/docs/examples/plot_parallel_training.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+=====================
+Parallel Training
+=====================
+
+Larger datasets require more time for training.
+While by default the models in HiClass are trained using a single core,
+it is possible to train each local classifier in parallel by leveraging the library Ray [1]_.
+In this example, we demonstrate how to train a hierarchical classifier in parallel,
+using all the cores available, on a mock dataset from Kaggle [2]_.
+
+.. [1] https://www.ray.io/
+.. [2] https://www.kaggle.com/datasets/kashnitsky/hierarchical-text-classification
+"""
+import sys
+from os import cpu_count
+
+import pandas as pd
+import requests
+from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
+
+from hiclass import LocalClassifierPerParentNode
+
+
+def download(url: str, path: str) -> None:
+    """
+    Download a file from the internet.
+
+    Parameters
+    ----------
+    url : str
+        The address of the file to be downloaded.
+    path : str
+        The path to store the downloaded file.
+    """
+    response = requests.get(url)
+    with open(path, "wb") as file:
+        file.write(response.content)
+
+
+# Download training data
+training_data_url = "https://zenodo.org/record/6657410/files/train_40k.csv?download=1"
+training_data_path = "train_40k.csv"
+download(training_data_url, training_data_path)
+
+# Load training data into pandas dataframe
+training_data = pd.read_csv(training_data_path).fillna(" ")
+
+# We will use logistic regression classifiers for every parent node
+lr = LogisticRegression(max_iter=1000)
+
+pipeline = Pipeline(
+    [
+        ("count", CountVectorizer()),
+        ("tfidf", TfidfTransformer()),
+        (
+            "lcppn",
+            LocalClassifierPerParentNode(local_classifier=lr, n_jobs=cpu_count()),
+        ),
+    ]
+)
+
+# Select training data
+X_train = training_data["Title"]
+Y_train = training_data[["Cat1", "Cat2", "Cat3"]]
+
+# Fixes bug AttributeError: '_LoggingTee' object has no attribute 'fileno'
+# This only happens when building the documentation
+# Hence, you don't actually need it for your code to work
+sys.stdout.fileno = lambda: False
+
+# Now, let's train the local classifier per parent node
+pipeline.fit(X_train, Y_train)
diff --git a/docs/examples/plot_pipeline.py b/docs/examples/plot_pipeline.py
new file mode 100644
index 00000000..8e53e089
--- /dev/null
+++ b/docs/examples/plot_pipeline.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+"""
+=====================
+Building Pipelines
+=====================
+
+HiClass can be adopted in scikit-learn pipelines, and fully supports sparse matrices as input.
+This example desmonstrates the use of both of these features.
+"""
+from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
+
+from hiclass import LocalClassifierPerParentNode
+
+# Define data
+X_train = [
+    "Struggling to repay loan",
+    "Unable to get annual report",
+]
+X_test = [
+    "Unable to get annual report",
+    "Struggling to repay loan",
+]
+Y_train = [["Loan", "Student loan"], ["Credit reporting", "Reports"]]
+
+# We will use logistic regression classifiers for every parent node
+lr = LogisticRegression()
+
+# Let's build a pipeline using CountVectorizer and TfidfTransformer
+# to extract features as sparse matrices
+pipeline = Pipeline(
+    [
+        ("count", CountVectorizer()),
+        ("tfidf", TfidfTransformer()),
+        ("lcppn", LocalClassifierPerParentNode(local_classifier=lr)),
+    ]
+)
+
+# Now, let's train a local classifier per parent node
+pipeline.fit(X_train, Y_train)
+
+# Finally, let's predict using the pipeline
+predictions = pipeline.predict(X_test)
+print(predictions)
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 07afe325..f85830b7 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -2,4 +2,7 @@
 sphinx==5.0.0
 sphinx_rtd_theme==1.0.0
 readthedocs-sphinx-search==0.1.2
-sphinx_code_tabs==0.5.3
\ No newline at end of file
+sphinx_code_tabs==0.5.3
+sphinx-gallery==0.10.1
+matplotlib==3.5.2
+pandas==1.4.2
diff --git a/docs/source/algorithms/local_classifier_per_node.rst b/docs/source/algorithms/local_classifier_per_node.rst
index 14cee312..9fbc905d 100644
--- a/docs/source/algorithms/local_classifier_per_node.rst
+++ b/docs/source/algorithms/local_classifier_per_node.rst
@@ -14,6 +14,5 @@ One of the most popular approaches in the literature, the local classifier per n
     :hidden:
 
     training_policies
-    selecting_training_policy
 
 Each binary classifier is trained in parallel using the library `Ray <https://www.ray.io/>`_. In order to avoid inconsistencies, prediction is performed in a top-down manner. For example, given a hypothetical test example, the local classifier per node firstly queries the binary classifiers at nodes "Reptile" and "Mammal". Let's suppose that in this case the probability of the test example belonging to class "Reptile" is 0.8, while the probability of belonging to class "Mammal" is 0.5, then class "Reptile" is picked. At the next level, only the classifiers at nodes "Snake" and "Lizard" are queried, and again the one with the highest probability is selected.
diff --git a/docs/source/algorithms/metrics.rst b/docs/source/algorithms/metrics.rst
index da1f99c1..b609eb9d 100644
--- a/docs/source/algorithms/metrics.rst
+++ b/docs/source/algorithms/metrics.rst
@@ -1,6 +1,6 @@
 .. _metrics-overview:
 
-Hierarchical Metrics
+Metrics
 ====================
 
 According to [1]_, the use of flat classification metrics might not be adequate to give enough insight of which algorithm is better at classifying hierarchical data. Hence, in HiClass we implemented the metrics of hierarchical precision (hP), hierarchical recall (hR) and hierarchical F-score (hF), which are extensions of the renowned metrics of precision, recall and F-score, but tailored to the hierarchical classification scenario. These hierarchical counterparts were initially proposed by [2]_, and are defined as follows:
diff --git a/docs/source/algorithms/training_policies.rst b/docs/source/algorithms/training_policies.rst
index 1e40d94b..7eecb9bd 100644
--- a/docs/source/algorithms/training_policies.rst
+++ b/docs/source/algorithms/training_policies.rst
@@ -48,6 +48,10 @@ Using as example the class "Wolf" from the hierarchy represented in the image be
 **Exclusive siblings**  Wolf                    Cat
 ======================  ======================  ===============================================
 
+.. seealso::
+
+   In terms of code, we explain how to select those different policies here: :ref:`Binary Training Policies`.
+
 .. [1] Silla, C. N., & Freitas, A. A. (2011). A survey of hierarchical classification across different application domains. Data Mining and Knowledge Discovery, 22(1), 31-72.
 
 .. [2] Eisner, R., Poulin, B., Szafron, D., Lu, P., & Greiner, R. (2005, November). Improving protein function prediction using the hierarchical structure of the gene ontology. In 2005 IEEE symposium on computational intelligence in bioinformatics and computational biology (pp. 1-10). IEEE.
diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst
index 2d710f36..379729a5 100644
--- a/docs/source/api/index.rst
+++ b/docs/source/api/index.rst
@@ -1,7 +1,7 @@
 .. _code:
 
-API reference documentation
-===========================
+API reference
+=============
 The documentation lists all available functions for each of the implemented classes. This includes inherited functions.
 Therefore, not everything that is listed under a classes documentations is necessarily implemented by said class.
 This is done in order to provide a complete list of the callable functions for each of the classes.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 809e8dab..b39f35cd 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -36,6 +36,7 @@
     'sphinx.ext.napoleon',
     'sphinx.ext.autosectionlabel',
     'sphinx_code_tabs',
+    'sphinx_gallery.gen_gallery',
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -72,4 +73,9 @@
 html_theme_options = {}
 
 if not use_rtd_scheme:
-    html_theme_options["sidebar_width"] = "230px"
\ No newline at end of file
+    html_theme_options["sidebar_width"] = "230px"
+
+sphinx_gallery_conf = {
+    'examples_dirs': '../examples',
+    'gallery_dirs': 'auto_examples',
+}
\ No newline at end of file
diff --git a/docs/source/get_started/full_example.rst b/docs/source/get_started/full_example.rst
index acf630cf..9e43c05d 100644
--- a/docs/source/get_started/full_example.rst
+++ b/docs/source/get_started/full_example.rst
@@ -9,7 +9,7 @@ It is now time to stitch the code together. Here is the full example:
     from hiclass import LocalClassifierPerNode
     from sklearn.ensemble import RandomForestClassifier
 
-    # define data
+    # Define data
     X_train = [[1], [2], [3], [4]]
     X_test = [[4], [3], [2], [1]]
     Y_train = [
@@ -45,4 +45,4 @@ The array below should be printed on the terminal:
      ['Animal' 'Mammal' 'Cow']
      ['Animal' 'Mammal' 'Sheep']]
 
-There is more to HiClass than what is shown in this "Hello World" example, such as training with missing data points, storing trained models and computation of hierarchical metrics. These concepts are covered in the next tutorial.
\ No newline at end of file
+There is more to HiClass than what is shown in this "Hello World" example, such as training with missing leaf nodes, storing trained models and computation of hierarchical metrics. These concepts are covered in the :ref:`Gallery of Examples`.
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 85ad184b..443dd6a2 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -18,14 +18,14 @@ Welcome to hiclass' documentation!
     :target: https://codecov.io/gh/mirand863/hiclass
     :alt: codecov
 
+.. image:: https://static.pepy.tech/personalized-badge/hiclass?period=total&units=international_system&left_color=grey&right_color=brightgreen&left_text=pypi
+    :target: https://pypi.org/project/hiclass/
+    :alt: Downloads pypi
+
 .. image:: https://img.shields.io/conda/dn/conda-forge/hiclass?label=conda
     :target: https://anaconda.org/conda-forge/hiclass
     :alt: Downloads Conda
 
-.. image:: https://img.shields.io/pypi/dm/hiclass?label=pypi
-    :target: https://pypi.org/project/hiclass/
-    :alt: Downloads pypi
-
 .. image:: https://img.shields.io/badge/License-BSD_3--Clause-blue.svg
     :target: https://opensource.org/licenses/BSD-3-Clause
     :alt: License
@@ -35,6 +35,7 @@ Welcome to hiclass' documentation!
 
     introduction/index
     get_started/index
+    auto_examples/index
     algorithms/index
 
 .. toctree::
diff --git a/docs/source/introduction/learn.rst b/docs/source/introduction/learn.rst
index b8edf4d6..af64a081 100644
--- a/docs/source/introduction/learn.rst
+++ b/docs/source/introduction/learn.rst
@@ -6,6 +6,6 @@ In the next few chapters, you will learn how to :ref:`Install HiClass` and set u
 Once you are set up, we suggest working through our examples, including:
 
 - A typical :ref:`A "Hello World" example`, for an entry-level description of the main concepts.
-- A more detailed `tutorial <TODO>`_ to give you hands-on experience.
+- Further examples are displayed in our :ref:`Gallery of Examples`, to give you hands-on experience.
 
-We also recommend the :ref:`API reference documentation` for additional information.
+We also recommend the sections :ref:`Algorithms Overview` and :ref:`API reference` for additional information.
diff --git a/hiclass/LocalClassifierPerLevel.py b/hiclass/LocalClassifierPerLevel.py
index 6c896afd..7bc6a239 100644
--- a/hiclass/LocalClassifierPerLevel.py
+++ b/hiclass/LocalClassifierPerLevel.py
@@ -139,7 +139,7 @@ def predict(self, X):
         for level, classifier in enumerate(self.local_classifiers_):
             self.logger_.info(f"Predicting level {level}")
             if level == 0:
-                y[:, level] = classifier.predict(X)
+                y[:, level] = classifier.predict(X).flatten()
             else:
                 all_probabilities = classifier.predict_proba(X)
                 successors = np.array(