plot_digits_pipe unwind gridsearchCV

yuanchi2807 · yuanchi2807 · commit f0b6dd74849b · 2021-06-01T09:26:48.000-04:00
diff --git a/notebooks/plot_digits_pipe.ipynb b/notebooks/plot_digits_pipe.ipynb
@@ -153,27 +153,68 @@
     "from sklearn.pipeline import Pipeline\n",
     "from sklearn.model_selection import GridSearchCV\n",
     "\n",
-    "\n",
     "# Define a pipeline to search for the best combination of PCA truncation\n",
     "# and classifier regularization.\n",
-    "pca = PCA()\n",
+    "pca_5 = PCA(n_components=5)\n",
+    "pca_15 = PCA(n_components=15)\n",
+    "pca_30 = PCA(n_components=30)\n",
+    "pca_45 = PCA(n_components=45)\n",
+    "pca_64 = PCA(n_components=64)\n",
+    "\n",
     "# set the tolerance to a large value to make the example faster\n",
-    "logistic = LogisticRegression(max_iter=10000, tol=0.1)\n",
+    "logistic_1 = LogisticRegression(max_iter=10000, tol=0.1, C=1.00000000e-04)\n",
+    "logistic_2 = LogisticRegression(max_iter=10000, tol=0.1, C=4.64158883e-02)\n",
+    "logistic_3 = LogisticRegression(max_iter=10000, tol=0.1, C=2.15443469e+01)\n",
+    "logistic_4 = LogisticRegression(max_iter=10000, tol=0.1, C=1.00000000e+04)\n",
     "\n",
     "## initialize codeflare pipeline by first creating the nodes\n",
     "pipeline = dm.Pipeline()\n",
-    "node_pca = dm.EstimatorNode('pca', pca)\n",
-    "node_logistic = dm.EstimatorNode('logistic', logistic)\n",
+    "node_pca_5 = dm.EstimatorNode('pca_5', pca_5)\n",
+    "node_pca_15 = dm.EstimatorNode('pca_15', pca_15)\n",
+    "node_pca_30 = dm.EstimatorNode('pca_30', pca_30)\n",
+    "node_pca_45 = dm.EstimatorNode('pca_45', pca_45)\n",
+    "node_pca_64 = dm.EstimatorNode('pca_64', pca_64)\n",
+    "\n",
+    "node_logistic_1 = dm.EstimatorNode('logistic_1', logistic_1)\n",
+    "node_logistic_2 = dm.EstimatorNode('logistic_2', logistic_2)\n",
+    "node_logistic_3 = dm.EstimatorNode('logistic_3', logistic_3)\n",
+    "node_logistic_4 = dm.EstimatorNode('logistic_4', logistic_4)\n",
     "\n",
     "## codeflare nodes are then connected by edges\n",
-    "pipeline.add_edge(node_pca, node_logistic)\n",
+    "pipeline.add_edge(node_pca_5, node_logistic_1)\n",
+    "pipeline.add_edge(node_pca_15, node_logistic_1)\n",
+    "pipeline.add_edge(node_pca_30, node_logistic_1)\n",
+    "pipeline.add_edge(node_pca_45, node_logistic_1)\n",
+    "pipeline.add_edge(node_pca_64, node_logistic_1)\n",
+    "\n",
+    "pipeline.add_edge(node_pca_5, node_logistic_2)\n",
+    "pipeline.add_edge(node_pca_15, node_logistic_2)\n",
+    "pipeline.add_edge(node_pca_30, node_logistic_2)\n",
+    "pipeline.add_edge(node_pca_45, node_logistic_2)\n",
+    "pipeline.add_edge(node_pca_64, node_logistic_2)\n",
+    "\n",
+    "pipeline.add_edge(node_pca_5, node_logistic_3)\n",
+    "pipeline.add_edge(node_pca_15, node_logistic_3)\n",
+    "pipeline.add_edge(node_pca_30, node_logistic_3)\n",
+    "pipeline.add_edge(node_pca_45, node_logistic_3)\n",
+    "pipeline.add_edge(node_pca_64, node_logistic_3)\n",
+    "\n",
+    "pipeline.add_edge(node_pca_5, node_logistic_4)\n",
+    "pipeline.add_edge(node_pca_15, node_logistic_4)\n",
+    "pipeline.add_edge(node_pca_30, node_logistic_4)\n",
+    "pipeline.add_edge(node_pca_45, node_logistic_4)\n",
+    "pipeline.add_edge(node_pca_64, node_logistic_4)\n",
     "\n",
     "X_digits, y_digits = datasets.load_digits(return_X_y=True)\n",
     "\n",
     "# execute FIT\n",
     "pipeline_input = dm.PipelineInput()\n",
     "xy = dm.Xy(X_digits, y_digits)\n",
-    "pipeline_input.add_xy_arg(node_pca, xy)\n",
+    "pipeline_input.add_xy_arg(node_pca_5, xy)\n",
+    "pipeline_input.add_xy_arg(node_pca_15, xy)\n",
+    "pipeline_input.add_xy_arg(node_pca_30, xy)\n",
+    "pipeline_input.add_xy_arg(node_pca_45, xy)\n",
+    "pipeline_input.add_xy_arg(node_pca_64, xy)\n",
     "\n",
     "# Parameters of pipelines can be set using ‘__’ separated parameter names:\n",
     "param_grid = {\n",
@@ -215,6 +256,33 @@
     "plt.tight_layout()\n",
     "plt.show()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1.00000000e-04, 4.64158883e-02, 2.15443469e+01, 1.00000000e+04])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.logspace(-4, 4, 4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {