diff --git a/docs/tutorials/basics.ipynb b/docs/tutorials/basics.ipynb
index 8c91df01..22bbb0f0 100644
--- a/docs/tutorials/basics.ipynb
+++ b/docs/tutorials/basics.ipynb
@@ -63,7 +63,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[32m2023-11-27 14:54:08.788\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpolaris.hub.client\u001b[0m:\u001b[36mlogin\u001b[0m:\u001b[36m262\u001b[0m - \u001b[1mYou are already logged in to the Polaris Hub as cwognum (cas@valencediscovery.com). Set `overwrite=True` to force re-authentication.\u001b[0m\n"
+      "\u001b[32m2024-02-18 12:35:01.048\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpolaris.hub.client\u001b[0m:\u001b[36mlogin\u001b[0m:\u001b[36m262\u001b[0m - \u001b[1mYou are already logged in to the Polaris Hub as cwognum (cas@valencelabs.com). Set `overwrite=True` to force re-authentication.\u001b[0m\n"
      ]
     }
    ],
@@ -99,8 +99,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dataset = po.load_dataset(\"polaris/hello_world_dataset\")\n",
-    "benchmark = po.load_benchmark(\"polaris/hello_world_benchmark\")"
+    "dataset = po.load_dataset(\"polaris/hello-world\")\n",
+    "benchmark = po.load_benchmark(\"polaris/hello-world-benchmark\")"
    ]
   },
   {
@@ -231,10 +231,422 @@
    "id": "748dd278-0fd0-4c5b-ac6a-8d974143c3b9",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to find the pandas get_adjustment() function to patch\n",
+      "Failed to patch pandas - PandasTools will have limited functionality\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
-       "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestRegressor(max_depth=2, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestRegressor</label><div class=\"sk-toggleable__content\"><pre>RandomForestRegressor(max_depth=2, random_state=0)</pre></div></div></div></div></div>"
+       "<style>#sk-container-id-1 {\n",
+       "  /* Definition of color scheme common for light and dark mode */\n",
+       "  --sklearn-color-text: black;\n",
+       "  --sklearn-color-line: gray;\n",
+       "  /* Definition of color scheme for unfitted estimators */\n",
+       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
+       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
+       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
+       "  --sklearn-color-unfitted-level-3: chocolate;\n",
+       "  /* Definition of color scheme for fitted estimators */\n",
+       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
+       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
+       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
+       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
+       "\n",
+       "  /* Specific color for light theme */\n",
+       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
+       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+       "  --sklearn-color-icon: #696969;\n",
+       "\n",
+       "  @media (prefers-color-scheme: dark) {\n",
+       "    /* Redefinition of color scheme for dark theme */\n",
+       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
+       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+       "    --sklearn-color-icon: #878787;\n",
+       "  }\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 pre {\n",
+       "  padding: 0;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 input.sk-hidden--visually {\n",
+       "  border: 0;\n",
+       "  clip: rect(1px 1px 1px 1px);\n",
+       "  clip: rect(1px, 1px, 1px, 1px);\n",
+       "  height: 1px;\n",
+       "  margin: -1px;\n",
+       "  overflow: hidden;\n",
+       "  padding: 0;\n",
+       "  position: absolute;\n",
+       "  width: 1px;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-dashed-wrapped {\n",
+       "  border: 1px dashed var(--sklearn-color-line);\n",
+       "  margin: 0 0.4em 0.5em 0.4em;\n",
+       "  box-sizing: border-box;\n",
+       "  padding-bottom: 0.4em;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-container {\n",
+       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
+       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
+       "     so we also need the `!important` here to be able to override the\n",
+       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
+       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
+       "  display: inline-block !important;\n",
+       "  position: relative;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-text-repr-fallback {\n",
+       "  display: none;\n",
+       "}\n",
+       "\n",
+       "div.sk-parallel-item,\n",
+       "div.sk-serial,\n",
+       "div.sk-item {\n",
+       "  /* draw centered vertical line to link estimators */\n",
+       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
+       "  background-size: 2px 100%;\n",
+       "  background-repeat: no-repeat;\n",
+       "  background-position: center center;\n",
+       "}\n",
+       "\n",
+       "/* Parallel-specific style estimator block */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item::after {\n",
+       "  content: \"\";\n",
+       "  width: 100%;\n",
+       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
+       "  flex-grow: 1;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel {\n",
+       "  display: flex;\n",
+       "  align-items: stretch;\n",
+       "  justify-content: center;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  position: relative;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item {\n",
+       "  display: flex;\n",
+       "  flex-direction: column;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
+       "  align-self: flex-end;\n",
+       "  width: 50%;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
+       "  align-self: flex-start;\n",
+       "  width: 50%;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
+       "  width: 0;\n",
+       "}\n",
+       "\n",
+       "/* Serial-specific style estimator block */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-serial {\n",
+       "  display: flex;\n",
+       "  flex-direction: column;\n",
+       "  align-items: center;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  padding-right: 1em;\n",
+       "  padding-left: 1em;\n",
+       "}\n",
+       "\n",
+       "\n",
+       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
+       "clickable and can be expanded/collapsed.\n",
+       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
+       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
+       "*/\n",
+       "\n",
+       "/* Pipeline and ColumnTransformer style (default) */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable {\n",
+       "  /* Default theme specific background. It is overwritten whether we have a\n",
+       "  specific estimator or a Pipeline/ColumnTransformer */\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "}\n",
+       "\n",
+       "/* Toggleable label */\n",
+       "#sk-container-id-1 label.sk-toggleable__label {\n",
+       "  cursor: pointer;\n",
+       "  display: block;\n",
+       "  width: 100%;\n",
+       "  margin-bottom: 0;\n",
+       "  padding: 0.5em;\n",
+       "  box-sizing: border-box;\n",
+       "  text-align: center;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
+       "  /* Arrow on the left of the label */\n",
+       "  content: \"▸\";\n",
+       "  float: left;\n",
+       "  margin-right: 0.25em;\n",
+       "  color: var(--sklearn-color-icon);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "}\n",
+       "\n",
+       "/* Toggleable content - dropdown */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content {\n",
+       "  max-height: 0;\n",
+       "  max-width: 0;\n",
+       "  overflow: hidden;\n",
+       "  text-align: left;\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content pre {\n",
+       "  margin: 0.2em;\n",
+       "  border-radius: 0.25em;\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
+       "  /* Expand drop-down */\n",
+       "  max-height: 200px;\n",
+       "  max-width: 100%;\n",
+       "  overflow: auto;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
+       "  content: \"▾\";\n",
+       "}\n",
+       "\n",
+       "/* Pipeline/ColumnTransformer-specific style */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Estimator-specific style */\n",
+       "\n",
+       "/* Colorize estimator box */\n",
+       "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
+       "#sk-container-id-1 div.sk-label label {\n",
+       "  /* The background is the default theme color */\n",
+       "  color: var(--sklearn-color-text-on-default-background);\n",
+       "}\n",
+       "\n",
+       "/* On hover, darken the color of the background */\n",
+       "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Label box, darken color on hover, fitted */\n",
+       "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Estimator label */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label label {\n",
+       "  font-family: monospace;\n",
+       "  font-weight: bold;\n",
+       "  display: inline-block;\n",
+       "  line-height: 1.2em;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label-container {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "\n",
+       "/* Estimator-specific */\n",
+       "#sk-container-id-1 div.sk-estimator {\n",
+       "  font-family: monospace;\n",
+       "  border: 1px dotted var(--sklearn-color-border-box);\n",
+       "  border-radius: 0.25em;\n",
+       "  box-sizing: border-box;\n",
+       "  margin-bottom: 0.5em;\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-estimator.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "/* on hover */\n",
+       "#sk-container-id-1 div.sk-estimator:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
+       "\n",
+       "/* Common style for \"i\" and \"?\" */\n",
+       "\n",
+       ".sk-estimator-doc-link,\n",
+       "a:link.sk-estimator-doc-link,\n",
+       "a:visited.sk-estimator-doc-link {\n",
+       "  float: right;\n",
+       "  font-size: smaller;\n",
+       "  line-height: 1em;\n",
+       "  font-family: monospace;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  border-radius: 1em;\n",
+       "  height: 1em;\n",
+       "  width: 1em;\n",
+       "  text-decoration: none !important;\n",
+       "  margin-left: 1ex;\n",
+       "  /* unfitted */\n",
+       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-unfitted-level-1);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link.fitted,\n",
+       "a:link.sk-estimator-doc-link.fitted,\n",
+       "a:visited.sk-estimator-doc-link.fitted {\n",
+       "  /* fitted */\n",
+       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-fitted-level-1);\n",
+       "}\n",
+       "\n",
+       "/* On hover */\n",
+       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
+       ".sk-estimator-doc-link:hover,\n",
+       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
+       ".sk-estimator-doc-link:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
+       ".sk-estimator-doc-link.fitted:hover,\n",
+       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
+       ".sk-estimator-doc-link.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "/* Span, style for the box shown on hovering the info icon */\n",
+       ".sk-estimator-doc-link span {\n",
+       "  display: none;\n",
+       "  z-index: 9999;\n",
+       "  position: relative;\n",
+       "  font-weight: normal;\n",
+       "  right: .2ex;\n",
+       "  padding: .5ex;\n",
+       "  margin: .5ex;\n",
+       "  width: min-content;\n",
+       "  min-width: 20ex;\n",
+       "  max-width: 50ex;\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  box-shadow: 2pt 2pt 4pt #999;\n",
+       "  /* unfitted */\n",
+       "  background: var(--sklearn-color-unfitted-level-0);\n",
+       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link.fitted span {\n",
+       "  /* fitted */\n",
+       "  background: var(--sklearn-color-fitted-level-0);\n",
+       "  border: var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link:hover span {\n",
+       "  display: block;\n",
+       "}\n",
+       "\n",
+       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
+       "\n",
+       "#sk-container-id-1 a.estimator_doc_link {\n",
+       "  float: right;\n",
+       "  font-size: 1rem;\n",
+       "  line-height: 1em;\n",
+       "  font-family: monospace;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  border-radius: 1rem;\n",
+       "  height: 1rem;\n",
+       "  width: 1rem;\n",
+       "  text-decoration: none;\n",
+       "  /* unfitted */\n",
+       "  color: var(--sklearn-color-unfitted-level-1);\n",
+       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
+       "  /* fitted */\n",
+       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-fitted-level-1);\n",
+       "}\n",
+       "\n",
+       "/* On hover */\n",
+       "#sk-container-id-1 a.estimator_doc_link:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestRegressor(max_depth=2, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;RandomForestRegressor<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.ensemble.RandomForestRegressor.html\">?<span>Documentation for RandomForestRegressor</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>RandomForestRegressor(max_depth=2, random_state=0)</pre></div> </div></div></div></div>"
       ],
       "text/plain": [
        "RandomForestRegressor(max_depth=2, random_state=0)"
@@ -249,12 +661,15 @@
     "import datamol as dm\n",
     "from sklearn.ensemble import RandomForestRegressor\n",
     "\n",
-    "# Convert smiles to ECFP fingerprints\n",
-    "train_fps = [dm.to_fp(smi) for smi in train.inputs]\n",
+    "# Load the benchmark (automatically loads the underlying dataset as well)\n",
+    "benchmark = po.load_benchmark(\"polaris/hello_world_benchmark\")\n",
+    "\n",
+    "# Get the split and convert SMILES to ECFP fingerprints by specifying an featurize function.\n",
+    "train, test = benchmark.get_train_test_split(featurization_fn=dm.to_fp)\n",
     "\n",
     "# Define a model and train\n",
     "model = RandomForestRegressor(max_depth=2, random_state=0)\n",
-    "model.fit(train_fps, train.targets)"
+    "model.fit(train.X, train.y)"
    ]
   },
   {
@@ -272,8 +687,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_fps = [dm.to_fp(smi) for smi in test.inputs]\n",
-    "predictions = model.predict(test_fps)"
+    "predictions = model.predict(test.X)"
    ]
   },
   {
@@ -285,7 +699,7 @@
     {
      "data": {
       "text/html": [
-       "<table border=\"1\"><tr><th>name</th><td>None</td></tr><tr><th>description</th><td></td></tr><tr><th>tags</th><td></td></tr><tr><th>user_attributes</th><td></td></tr><tr><th>owner</th><td>None</td></tr><tr><th>benchmark_name</th><td>hello_world_benchmark</td></tr><tr><th>benchmark_owner</th><td><table border=\"1\"><tr><th>slug</th><td>polaris</td></tr><tr><th>external_id</th><td>org_2WG9hRFgKNIRtGw4orsMPcr1F4S</td></tr><tr><th>type</th><td>organization</td></tr></table></td></tr><tr><th>github_url</th><td>None</td></tr><tr><th>paper_url</th><td>None</td></tr><tr><th>contributors</th><td>None</td></tr><tr><th>artifact_id</th><td>None</td></tr><tr><th>benchmark_artifact_id</th><td>polaris/hello-world-benchmark</td></tr><tr><th>results</th><td><table border=\"1\"><thead><tr><th>Test set</th><th>Target label</th><th>Metric</th><th>Score</th></tr></thead><tbody><tr><td>test</td><td>SOL</td><td>mean_squared_error</td><td>2.6875139821</td></tr><tr><td>test</td><td>SOL</td><td>mean_absolute_error</td><td>1.2735690161</td></tr></tbody></table></td></tr></table>"
+       "<table border=\"1\"><tr><th>name</th><td>None</td></tr><tr><th>description</th><td></td></tr><tr><th>tags</th><td></td></tr><tr><th>user_attributes</th><td></td></tr><tr><th>owner</th><td>None</td></tr><tr><th>benchmark_name</th><td>hello-world-benchmark</td></tr><tr><th>benchmark_owner</th><td><table border=\"1\"><tr><th>slug</th><td>polaris</td></tr><tr><th>external_id</th><td>org_2WG9hRFgKNIRtGw4orsMPcr1F4S</td></tr><tr><th>type</th><td>organization</td></tr></table></td></tr><tr><th>github_url</th><td>None</td></tr><tr><th>paper_url</th><td>None</td></tr><tr><th>contributors</th><td>None</td></tr><tr><th>artifact_id</th><td>None</td></tr><tr><th>benchmark_artifact_id</th><td>polaris/hello-world-benchmark</td></tr><tr><th>results</th><td><table border=\"1\"><thead><tr><th>Test set</th><th>Target label</th><th>Metric</th><th>Score</th></tr></thead><tbody><tr><td>test</td><td>SOL</td><td>mean_squared_error</td><td>2.6875139821</td></tr><tr><td>test</td><td>SOL</td><td>mean_absolute_error</td><td>1.2735690161</td></tr></tbody></table></td></tr></table>"
       ],
       "text/plain": [
        "{\n",
@@ -294,7 +708,7 @@
        "  \"tags\": [],\n",
        "  \"user_attributes\": {},\n",
        "  \"owner\": null,\n",
-       "  \"benchmark_name\": \"hello_world_benchmark\",\n",
+       "  \"benchmark_name\": \"hello-world-benchmark\",\n",
        "  \"benchmark_owner\": {\n",
        "    \"slug\": \"polaris\",\n",
        "    \"external_id\": \"org_2WG9hRFgKNIRtGw4orsMPcr1F4S\",\n",
@@ -349,7 +763,7 @@
    "source": [
     "results.name = f\"hello-world-result\"\n",
     "results.github_url = \"https://github.com/polaris-hub/polaris-hub\"\n",
-    "results.paper_url = \"https://polaris-hub.vercel.app\"\n",
+    "results.paper_url = \"https://polarishub.io/\"\n",
     "results.description = \"Hello, World!\""
    ]
   },
@@ -373,11 +787,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/cas/micromamba/envs/polaris/lib/python3.12/site-packages/pydantic/main.py:308: UserWarning: Pydantic serializer warnings:\n",
+      "/Users/cas.wognum/micromamba/envs/polaris/lib/python3.12/site-packages/pydantic/main.py:314: UserWarning: Pydantic serializer warnings:\n",
       "  Expected `url` but got `str` - serialized value may not be as expected\n",
       "  Expected `url` but got `str` - serialized value may not be as expected\n",
       "  return self.__pydantic_serializer__.to_python(\n",
-      "\u001b[32m2023-11-27 14:54:46.649\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mpolaris.hub.client\u001b[0m:\u001b[36mupload_results\u001b[0m:\u001b[36m428\u001b[0m - \u001b[32m\u001b[1mYour result has been successfully uploaded to the Hub. View it here: https://polarishub.io/benchmarks/polaris/hello_world_benchmark/ns4JrC3hQNK9M1hbVPchy\u001b[0m\n"
+      "\u001b[32m2024-02-18 12:35:09.465\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mpolaris.hub.client\u001b[0m:\u001b[36mupload_results\u001b[0m:\u001b[36m431\u001b[0m - \u001b[32m\u001b[1mYour result has been successfully uploaded to the Hub. View it here: https://polarishub.io/benchmarks/polaris/hello-world-benchmark/l3uWzFBEyaD09Sa4Aik21\u001b[0m\n"
      ]
     }
    ],
@@ -415,7 +829,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.0"
+   "version": "3.12.2"
   }
  },
  "nbformat": 4,
diff --git a/polaris/benchmark/_base.py b/polaris/benchmark/_base.py
index 1b9e8e93..895cf25e 100644
--- a/polaris/benchmark/_base.py
+++ b/polaris/benchmark/_base.py
@@ -1,7 +1,7 @@
 import json
 import os
 from hashlib import md5
-from typing import Any, Optional, Union
+from typing import Any, Callable, Optional, Union
 
 import fsspec
 import numpy as np
@@ -353,7 +353,10 @@ def task_type(self) -> TaskType:
         return v.value
 
     def get_train_test_split(
-        self, input_format: DataFormat = "dict", target_format: DataFormat = "dict"
+        self,
+        input_format: DataFormat = "dict",
+        target_format: DataFormat = "dict",
+        featurization_fn: Optional[Callable] = None,
     ) -> tuple[Subset, Union["Subset", dict[str, Subset]]]:
         """Construct the train and test sets, given the split in the benchmark specification.
 
@@ -365,6 +368,8 @@ def get_train_test_split(
             input_format: How the input data is returned from the `Subset` object.
             target_format: How the target data is returned from the `Subset` object.
                 This will only affect the train set.
+            featurization_fn: A function to apply to the input data. If a multi-input benchmark, this function
+                expects an input in the format specified by the `input_format` parameter.
 
         Returns:
             A tuple with the train `Subset` and test `Subset` objects.
@@ -381,6 +386,7 @@ def _get_subset(indices, hide_targets):
                 target_cols=self.target_cols,
                 target_format=target_format,
                 hide_targets=hide_targets,
+                featurization_fn=featurization_fn,
             )
 
         train = _get_subset(self.split[0], hide_targets=False)
@@ -388,6 +394,7 @@ def _get_subset(indices, hide_targets):
             test = {k: _get_subset(v, hide_targets=True) for k, v in self.split[1].items()}
         else:
             test = _get_subset(self.split[1], hide_targets=True)
+
         return train, test
 
     def evaluate(self, y_pred: PredictionsType) -> BenchmarkResults:
@@ -406,8 +413,10 @@ def evaluate(self, y_pred: PredictionsType) -> BenchmarkResults:
         5. There can be metrics which measure across tasks.
 
         Args:
-            y_pred: The predictions for the test set, as NumPy arrays. If there are multiple test sets,
-                this should be a dictionary with the test set names as keys.
+            y_pred: The predictions for the test set, as NumPy arrays.
+                If there are multiple targets, the predictions should be wrapped in a dictionary with the target labels as keys.
+                If there are multiple test sets, the predictions should be further wrapped in a dictionary
+                    with the test subset labels as keys.
 
         Returns:
             A `BenchmarkResults` object. This object can be directly submitted to the Polaris Hub.
@@ -416,7 +425,7 @@ def evaluate(self, y_pred: PredictionsType) -> BenchmarkResults:
         # Instead of having the user pass the ground truth, we extract it from the benchmark spec ourselves.
         # This simplifies the API, but also was added to make accidental access to the test set targets less likely.
         # See also the `hide_targets` parameter in the `Subset` class.
-        test = self.get_train_test_split()[1]
+        test = self.get_train_test_split(target_format="dict")[1]
 
         if not isinstance(test, dict):
             test = {"test": test}
diff --git a/polaris/dataset/_subset.py b/polaris/dataset/_subset.py
index e1d4b9ae..04f5df95 100644
--- a/polaris/dataset/_subset.py
+++ b/polaris/dataset/_subset.py
@@ -1,9 +1,8 @@
-from typing import List, Literal, Optional, Sequence, Union
+from typing import Callable, List, Literal, Optional, Sequence, Union
 
 import numpy as np
 
 from polaris.dataset import Dataset
-from polaris.utils.context import tmp_attribute_change
 from polaris.utils.errors import TestAccessError
 from polaris.utils.types import DataFormat, DatapointType
 
@@ -11,6 +10,20 @@
 class Subset:
     """The `Subset` class provides easy access to a single partition of a split dataset.
 
+    Info: No need to create this class manually
+        You should not have to create this class manually. In most use-cases, you can create a `Subset` through the
+        `get_train_test_split` method of a `BenchmarkSpecification` object.
+
+    Tip: Featurize your inputs
+        Not all datasets are already featurized. For example, a small-molecule task might simply provide the SMILES string.
+        To easily featurize the inputs, you can pass or set a transformation function. For example:
+
+        ```python
+        import datamol as dm
+
+        benchmark.get_train_test_split(..., featurization_fn=dm.to_fp)
+        ```
+
     This should be the starting point for any framework-specific (e.g. PyTorch, Tensorflow) data-loader implementation.
     How the data is loaded in Polaris can be non-trivial, so this class is provided to abstract away the details.
     To easily build framework-specific data-loaders, a `Subset` supports various styles of accessing the data:
@@ -45,8 +58,6 @@ class Subset:
         TestAccessError: When trying to access the targets of the test set (specified by the `hide_targets` attribute).
     """
 
-    _SUPPORTED_FORMATS = ["dict", "tuple"]
-
     def __init__(
         self,
         dataset: Dataset,
@@ -54,26 +65,19 @@ def __init__(
         input_cols: Union[List[str], str],
         target_cols: Union[List[str], str],
         input_format: DataFormat = "dict",
-        target_format: DataFormat = "tuple",
+        target_format: DataFormat = "dict",
+        featurization_fn: Optional[Callable] = None,
         hide_targets: bool = False,
     ):
         self.dataset = dataset
         self.indices = indices
         self.target_cols = target_cols if isinstance(target_cols, list) else [target_cols]
         self.input_cols = input_cols if isinstance(input_cols, list) else [input_cols]
-
-        # Validate the output format
-        if input_format not in self._SUPPORTED_FORMATS:
-            raise ValueError(
-                f"Unsupported output format {input_format}. Choose from {self._SUPPORTED_FORMATS}"
-            )
-        if target_format not in self._SUPPORTED_FORMATS:
-            raise ValueError(
-                f"Unsupported output format {target_format}. Choose from {self._SUPPORTED_FORMATS}"
-            )
         self._input_format = input_format
         self._target_format = target_format
 
+        self._featurization_fn = featurization_fn
+
         # For the iterator implementation
         self._pointer = 0
 
@@ -90,43 +94,73 @@ def is_multi_input(self):
 
     @property
     def inputs(self):
-        """
-        Scikit-learn style access to the inputs.
-        If the dataset is multi-input, this will return a dict of arrays.
-        """
+        """Alias for `self.as_array("x")`"""
+        return self.as_array("x")
+
+    @property
+    def X(self):
+        """Alias for `self.as_array("x")`"""
         return self.as_array("x")
 
     @property
     def targets(self):
-        """
-        Scikit-learn style access to the targets.
-        If the dataset is multi-target, this will return a dict of arrays.
-        """
+        """Alias for `self.as_array("y")`"""
+        return self.as_array("y")
+
+    @property
+    def y(self):
+        """Alias for `self.as_array("y")`"""
         return self.as_array("y")
 
     @staticmethod
-    def _convert(data: dict, order: List[str], fmt: str):
-        """Converts from the default dict format to the specified format"""
+    def _format(data: dict, order: List[str], fmt: str):
+        """
+        Converts the internally used dict format to the user-specified format.
+        If the user-specified format is a tuple, it orders the column according to the specified order.
+        """
         if len(data) == 1:
             data = list(data.values())[0]
         elif fmt == "tuple":
             data = tuple(data[k] for k in order)
         return data
 
-    def _extract(
+    def _get_single(
         self,
-        data: DatapointType,
-        data_type: Union[Literal["x"], Literal["y"], Literal["xy"]],
-        key: Optional[str] = None,
+        row: str | int,
+        cols: List[str],
+        featurization_fn: Optional[Callable],
+        format: DataFormat,
     ):
-        """Helper function to extract data from the return format of this class"""
-        if self._hide_targets:
-            return data
-        x, y = data
-        ret = x if data_type == "x" else y
-        if not isinstance(ret, dict) or key is None:
-            return ret
-        return ret[key]
+        """
+        Loads a subset of the variables for a single data-point from the datasets.
+        The dataset stores datapoint in a row-wise manner, so this method is used to access a single row.
+
+        Args:
+            row: The row index of the datapoint.
+            cols: The columns (i.e. variables) to load for that data point.
+            featurization_fn: The transformation function to apply to the data-point.
+            format: The format to return the data-point in.
+        """
+        # Load the data-point
+        # Also handles loading data stored in external files for pointer columns
+        ret = {col: self.dataset.get_data(row, col) for col in cols}
+
+        # Format
+        ret = self._format(ret, cols, format)
+
+        # Featurize
+        if featurization_fn is not None:
+            ret = featurization_fn(ret)
+
+        return ret
+
+    def _get_single_input(self, row: str | int):
+        """Get a single input for a specific data-point and given the benchmark specification."""
+        return self._get_single(row, self.input_cols, self._featurization_fn, self._input_format)
+
+    def _get_single_output(self, row: str | int):
+        """Get a single output for a specific data-point and given the benchmark specification."""
+        return self._get_single(row, self.target_cols, None, self._target_format)
 
     def as_array(self, data_type: Union[Literal["x"], Literal["y"], Literal["xy"]]):
         """
@@ -138,21 +172,30 @@ def as_array(self, data_type: Union[Literal["x"], Literal["y"], Literal["xy"]]):
             return self.as_array("x"), self.as_array("y")
 
         if data_type == "y" and self._hide_targets:
-            raise TestAccessError("Within Polaris, you should not need to access the targets of the test set")
-
-        if not self.is_multi_task:
-            return np.array([self._extract(ret, data_type) for ret in self])
-
-        out = {}
-        columns = self.input_cols if data_type == "x" else self.target_cols
-
-        # Temporarily change the target format for easier conversion
-        with tmp_attribute_change(self, "_target_format", "dict"):
-            with tmp_attribute_change(self, "_input_format", "dict"):
-                for k in columns:
-                    out[k] = np.array([self._extract(ret, data_type, k) for ret in self])
-
-        return self._convert(out, self.target_cols, self._target_format)
+            raise TestAccessError("Within Polaris you should not need to access the targets of the test set")
+
+        if data_type == "x":
+            ret = [self._get_single_input(self.dataset.table.iloc[idx].name) for idx in self.indices]
+        else:
+            ret = [self._get_single_output(self.dataset.table.iloc[idx].name) for idx in self.indices]
+
+        if not ((self.is_multi_input and data_type == "x") or (self.is_multi_task and data_type == "y")):
+            # If the target format is not a dict, we can just create the array directly.
+            # With a single-task or single-input data point, this will be a 1D array.
+            # With a multi-task or multi-input data point, this will be a 2D array.
+            return np.array(ret)
+
+        # If the return format is a dict, we want to convert
+        # from an array of dicts to a dict of arrays.
+        if data_type == "y" and self._target_format == "dict":
+            ret = {k: np.array([v[k] for v in ret]) for k in self.target_cols}
+        elif data_type == "x" and self._input_format == "dict":
+            ret = {k: np.array([v[k] for v in ret]) for k in self.input_cols}
+        else:
+            # The format is a tuple, so we have list of tuples and convert this to an array
+            ret = np.array(ret)
+
+        return ret
 
     def __len__(self):
         return len(self.indices)
@@ -175,8 +218,7 @@ def __getitem__(self, item) -> DatapointType:
         row = self.dataset.table.iloc[idx]
 
         # Load the input modalities
-        ins = {col: self.dataset.get_data(row.name, col) for col in self.input_cols}
-        ins = self._convert(ins, self.input_cols, self._input_format)
+        ins = self._get_single_input(row.name)
 
         if self._hide_targets:
             # If we are not allowed to access the targets, we return the inputs only.
@@ -184,9 +226,7 @@ def __getitem__(self, item) -> DatapointType:
             return ins
 
         # Retrieve the targets
-        outs = {col: self.dataset.get_data(row.name, col) for col in self.target_cols}
-        outs = self._convert(outs, self.target_cols, self._target_format)
-
+        outs = self._get_single_output(row.name)
         return ins, outs
 
     def __iter__(self):
diff --git a/polaris/utils/types.py b/polaris/utils/types.py
index 402b98e9..8291f8b3 100644
--- a/polaris/utils/types.py
+++ b/polaris/utils/types.py
@@ -124,9 +124,9 @@ class License(BaseModel):
             Else it is required to manually specify this.
     """
 
-    SPDX_LICENSE_DATA_PATH: ClassVar[
-        str
-    ] = "https://raw.githubusercontent.com/spdx/license-list-data/main/json/licenses.json"
+    SPDX_LICENSE_DATA_PATH: ClassVar[str] = (
+        "https://raw.githubusercontent.com/spdx/license-list-data/main/json/licenses.json"
+    )
 
     id: str
     reference: Optional[HttpUrlString] = None
diff --git a/pyproject.toml b/pyproject.toml
index b7594baa..b21af303 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -107,8 +107,7 @@ lint.ignore = [
 line-length = 110
 target-version = "py310"
 
-[tool.ruff.lint.per-file-ignores]
-"__init__.py" = [
+lint.per-file-ignores."__init__.py" = [
     "F401", # imported but unused
     "E402", # Module level import not at top of file
 ]
diff --git a/tests/conftest.py b/tests/conftest.py
index f5b0077b..cd71fcf2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -74,7 +74,7 @@ def test_zarr_archive_single_array(tmp_path):
     return _get_zarr_archive(tmp_path, datapoint_per_array=False)
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def test_single_task_benchmark(test_dataset):
     train_indices = list(range(90))
     test_indices = list(range(90, 100))
@@ -96,7 +96,7 @@ def test_single_task_benchmark(test_dataset):
     )
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def test_single_task_benchmark_clf(test_dataset):
     train_indices = list(range(90))
     test_indices = list(range(90, 100))
@@ -111,7 +111,7 @@ def test_single_task_benchmark_clf(test_dataset):
     )
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def test_single_task_benchmark_multiple_test_sets(test_dataset):
     train_indices = list(range(90))
     test_indices = {"test_1": list(range(90, 95)), "test_2": list(range(95, 100))}
@@ -133,7 +133,7 @@ def test_single_task_benchmark_multiple_test_sets(test_dataset):
     )
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def test_multi_task_benchmark(test_dataset):
     # For the sake of simplicity, just use a small set of indices
     train_indices = list(range(90))
@@ -157,7 +157,7 @@ def test_multi_task_benchmark(test_dataset):
     )
 
 
-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def test_multi_task_benchmark_clf(test_dataset):
     # For the sake of simplicity, just use a small set of indices
     train_indices = list(range(90))
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 7b96a4e3..b2f4626c 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -50,6 +50,7 @@ def test_multi_task_benchmark_loop(test_multi_task_benchmark):
     x_test = np.array([dm.to_fp(dm.to_mol(smi)) for smi in test.inputs])
 
     y_pred = {}
+    print(multi_y)
     for k, y in multi_y.items():
         model = RandomForestRegressor()
 
diff --git a/tests/test_subset.py b/tests/test_subset.py
index c06d1168..c6e1c4c5 100644
--- a/tests/test_subset.py
+++ b/tests/test_subset.py
@@ -1,3 +1,5 @@
+import datamol as dm
+import numpy as np
 import pytest
 
 from polaris.dataset import Subset
@@ -24,6 +26,8 @@ def test_consistency_across_access_methods(test_dataset):
     # Property
     assert (task.inputs == expected_smiles).all()
     assert (task.targets == expected_targets).all()
+    assert (task.X == expected_smiles).all()
+    assert (task.y == expected_targets).all()
 
 
 def test_access_to_test_set(test_single_task_benchmark):
@@ -47,3 +51,76 @@ def test_access_to_test_set(test_single_task_benchmark):
     # For the train set it should work
     assert all(isinstance(y, float) for x, y in train)
     assert all(isinstance(train[i][1], float) for i in range(len(train)))
+
+
+def test_input_featurization(test_single_task_benchmark):
+    # Without a transformation, we expect a SMILES string
+    train, test = test_single_task_benchmark.get_train_test_split()
+    test_single_task_benchmark._n_splits_since_evaluate = 0  # Manually reset for sake of test
+
+    x, y = train[0]
+    assert isinstance(x, str)
+
+    x = test[0]
+    assert isinstance(x, str)
+
+    train, test = test_single_task_benchmark.get_train_test_split(featurization_fn=dm.to_fp)
+
+    # For all different flavours of accessing the data
+    # Make sure the input is now featurized
+    x, y = train[0]
+    assert isinstance(x, np.ndarray)
+
+    x = test[0]
+    assert isinstance(x, np.ndarray)
+
+    x, y = next(train)
+    assert isinstance(x, np.ndarray)
+
+    x = next(test)
+    assert isinstance(x, np.ndarray)
+
+    x = train.X[0]
+    assert isinstance(x, np.ndarray)
+
+    x = test.X[0]
+    assert isinstance(x, np.ndarray)
+
+
+@pytest.mark.parametrize("fmt", ["dict", "tuple"])
+def test_different_subset_formats_single_task(test_single_task_benchmark, fmt):
+    train, _ = test_single_task_benchmark.get_train_test_split(target_format=fmt)
+    assert isinstance(train.y, np.ndarray)
+    assert train.y.shape == (len(train),)
+    assert isinstance(train[0][1], float)
+    assert isinstance(next(train)[1], float)
+
+
+def test_different_subset_formats_multi_task_dict(test_multi_task_benchmark):
+    train, _ = test_multi_task_benchmark.get_train_test_split(target_format="dict")
+    assert isinstance(train.y, dict)
+    assert all(c in test_multi_task_benchmark.target_cols for c in train.y)
+    assert all(isinstance(v, np.ndarray) and v.shape == (len(train),) for v in train.y.values())
+    assert isinstance(train[0][1], dict)
+    assert isinstance(next(train)[1], dict)
+
+
+def test_different_subset_formats_multi_task_tuple(test_multi_task_benchmark):
+    train, _ = test_multi_task_benchmark.get_train_test_split(target_format="tuple")
+    assert isinstance(train.y, np.ndarray)
+    assert train.y.shape == (len(train), len(train.target_cols))
+    assert isinstance(train[0][1], tuple)
+    assert isinstance(next(train)[1], tuple)
+
+
+def test_consistency_between_different_formats(test_multi_task_benchmark):
+    train_tup, _ = test_multi_task_benchmark.get_train_test_split(target_format="tuple")
+    train_dict, _ = test_multi_task_benchmark.get_train_test_split(target_format="dict")
+
+    t = train_tup[0][1]
+    d = train_dict[0][1]
+
+    assert len(d) == len(t)
+    for k, v in d.items():
+        idx = test_multi_task_benchmark.target_cols.index(k)
+        assert t[idx] == v