Skip to content

Commit

Permalink
Finding the number of clusters example renamed
Browse files Browse the repository at this point in the history
  • Loading branch information
mcasl committed Oct 23, 2018
1 parent e62daeb commit ade85d0
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 6 deletions.
4 changes: 2 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@
# built documents.
#
# The short X.Y version.
version = '0.1'
version = '0.2'
# The full version, including alpha/beta/rc tags.
release = '0.1.0'
release = '0.2.0'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
File renamed without changes.
12 changes: 9 additions & 3 deletions pipegraph/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def _fit_single(self, step_name):
try:
self._steps_dict[step_name].fit(**fit_inputs)
except ValueError:
print("ERROR: _fit.fit call ValueError!")
print("ERROR: step.fit call ValueError!")

predict_inputs = self._fetch_signature_values(graph_data=self._fit_data,
step_name=step_name,
Expand Down Expand Up @@ -389,9 +389,14 @@ def score(self, X, y=None, sample_weight=None):
final_step_name, final_step = self.steps[-1]

predict_inputs = self._fetch_signature_values(graph_data=self._predict_data,
step_name=final_step_name,
method='predict')
step_name=final_step_name,
method='predict')
Xt = predict_inputs['X']

if y is None:
node_and_outer_variable_tuple = self.predict_connections[final_step_name]['y']
y = self._predict_data.get(node_and_outer_variable_tuple, None)

return final_step.score(Xt, y, **score_params)

@property
Expand Down Expand Up @@ -426,6 +431,7 @@ def _fetch_signature_values(self, graph_data, step_name, method):
step_name:
Returns:
:rtype: dict
"""
connections = self.fit_connections if graph_data is self._fit_data else self.predict_connections
Expand Down
92 changes: 92 additions & 0 deletions pipegraph/tests/test_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
# The MIT License (MIT)
#
# Copyright (c) 2018 Laura Fernandez Robles,
# Hector Alaiz Moreton,
# Jaime Cifuentes-Rodriguez,
# Javier Alfonso-Cendón,
# Camino Fernández-Llamas,
# Manuel Castejón-Limas
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import logging
import unittest

import numpy as np
import pandas as pd
from pandas.util.testing import assert_frame_equal
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.model_selection import GridSearchCV

from pipegraph.base import (PipeGraph,
)


from pipegraph.demo_blocks import (CustomCombination,
)

logging.basicConfig(level=logging.NOTSET)
logger = logging.getLogger(__name__)


class TestExampleKmeansLDA(unittest.TestCase):
def setUp(self):
X, y = datasets.make_blobs(n_samples=10000, n_features=5, centers=10)
self.X, self.y = X, y
clustering = KMeans(n_clusters=10)
classification = LinearDiscriminantAnalysis()

steps = [('clustering', clustering),
('classification', classification)
]

pgraph = PipeGraph(steps=steps)
pgraph.inject(sink='clustering', sink_var='X', source='_External', source_var='X')
pgraph.inject(sink='classification', sink_var='X', source='_External', source_var='X')
pgraph.inject(sink='classification', sink_var='y', source='clustering', source_var='predict')
self.pgraph=pgraph



def test_kmeans_plus_lda(self):
#gs = GridSearchCV(pgraph, param_grid=dict(clustering__n_clusters=[1, 30]))
#gs.fit(X)
pgraph, X, y = self.pgraph, self.X, self.y
pgraph.fit(X)
result = pgraph.score(X, y=None)
expected = pgraph.named_steps['classification'].score(X, pgraph._predict_data[('clustering', 'predict')])
self.assertEqual(result, expected)

def test_gridsearch(self):
pgraph, X, y = self.pgraph, self.X, self.y
gs = GridSearchCV(pgraph, param_grid=dict(clustering__n_clusters=[2, 30]), cv=5, refit=True)
gs.fit(X)
result = gs.score(X, y=None)
model = gs.best_estimator_
expected = model.named_steps['classification'].score(X, model._predict_data[('clustering', 'predict')])
self.assertEqual(result, expected)


if __name__ == '__main__':
unittest.main()
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@

# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
],

Expand Down

0 comments on commit ade85d0

Please sign in to comment.