Finding the number of clusters example renamed

mcasl · Oct 23, 2018 · ade85d0 · ade85d0
1 parent e62daeb
commit ade85d0
Show file tree

Hide file tree

Showing 5 changed files with 103 additions and 6 deletions.
diff --git a/doc/conf.py b/doc/conf.py
@@ -93,9 +93,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.1'
+version = '0.2'
 # The full version, including alpha/beta/rc tags.
-release = '0.1.0'
+release = '0.2.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/...les/example_Finding_Number_of_clusters.py → examples/plot_Finding_Number_of_clusters.py b/...les/example_Finding_Number_of_clusters.py → examples/plot_Finding_Number_of_clusters.py
diff --git a/pipegraph/base.py b/pipegraph/base.py
@@ -207,7 +207,7 @@ def _fit_single(self, step_name):
         try:
             self._steps_dict[step_name].fit(**fit_inputs)
         except ValueError:
-            print("ERROR: _fit.fit call ValueError!")
+            print("ERROR: step.fit call ValueError!")
 
         predict_inputs = self._fetch_signature_values(graph_data=self._fit_data,
                                                       step_name=step_name,
@@ -389,9 +389,14 @@ def score(self, X, y=None, sample_weight=None):
         final_step_name, final_step = self.steps[-1]
 
         predict_inputs = self._fetch_signature_values(graph_data=self._predict_data,
-                                                      step_name=final_step_name,
-                                                      method='predict')
+                                                  step_name=final_step_name,
+                                                  method='predict')
         Xt = predict_inputs['X']
+
+        if y is None:
+            node_and_outer_variable_tuple = self.predict_connections[final_step_name]['y']
+            y = self._predict_data.get(node_and_outer_variable_tuple, None)
+
         return final_step.score(Xt, y, **score_params)
 
     @property
@@ -426,6 +431,7 @@ def _fetch_signature_values(self, graph_data, step_name, method):
             step_name:
 
         Returns:
+        :rtype: dict
 
         """
         connections = self.fit_connections if graph_data is self._fit_data else self.predict_connections

diff --git a/pipegraph/tests/test_examples.py b/pipegraph/tests/test_examples.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+# The MIT License (MIT)
+#
+# Copyright (c) 2018 Laura Fernandez Robles,
+#                    Hector Alaiz Moreton,
+#                    Jaime Cifuentes-Rodriguez,
+#                    Javier Alfonso-Cendón,
+#                    Camino Fernández-Llamas,
+#                    Manuel Castejón-Limas
+#
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import logging
+import unittest
+
+import numpy as np
+import pandas as pd
+from pandas.util.testing import assert_frame_equal
+from sklearn import datasets
+from sklearn.cluster import KMeans
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+
+from sklearn.model_selection import GridSearchCV
+
+from pipegraph.base import (PipeGraph,
+                            )
+
+
+from pipegraph.demo_blocks import (CustomCombination,
+                                   )
+
+logging.basicConfig(level=logging.NOTSET)
+logger = logging.getLogger(__name__)
+
+
+class TestExampleKmeansLDA(unittest.TestCase):
+    def setUp(self):
+        X, y = datasets.make_blobs(n_samples=10000, n_features=5, centers=10)
+        self.X, self.y = X, y
+        clustering = KMeans(n_clusters=10)
+        classification = LinearDiscriminantAnalysis()
+
+        steps = [('clustering', clustering),
+                 ('classification', classification)
+                 ]
+
+        pgraph = PipeGraph(steps=steps)
+        pgraph.inject(sink='clustering', sink_var='X', source='_External', source_var='X')
+        pgraph.inject(sink='classification', sink_var='X', source='_External', source_var='X')
+        pgraph.inject(sink='classification', sink_var='y', source='clustering', source_var='predict')
+        self.pgraph=pgraph
+
+
+
+    def test_kmeans_plus_lda(self):
+        #gs = GridSearchCV(pgraph, param_grid=dict(clustering__n_clusters=[1, 30]))
+        #gs.fit(X)
+        pgraph, X, y = self.pgraph, self.X, self.y
+        pgraph.fit(X)
+        result = pgraph.score(X, y=None)
+        expected = pgraph.named_steps['classification'].score(X, pgraph._predict_data[('clustering', 'predict')])
+        self.assertEqual(result, expected)
+
+    def test_gridsearch(self):
+        pgraph, X, y = self.pgraph, self.X, self.y
+        gs = GridSearchCV(pgraph, param_grid=dict(clustering__n_clusters=[2, 30]), cv=5, refit=True)
+        gs.fit(X)
+        result = gs.score(X, y=None)
+        model = gs.best_estimator_
+        expected = model.named_steps['classification'].score(X, model._predict_data[('clustering', 'predict')])
+        self.assertEqual(result, expected)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/setup.py b/setup.py
@@ -93,7 +93,6 @@
 
         # Specify the Python versions you support here. In particular, ensure
         # that you indicate whether you support Python 2, Python 3 or both.
-        'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
     ],