Skip to content

Commit 22747f8

Browse files
Kun-Lung WuGitHub Enterprise
authored andcommitted
Merge pull request #58 from codeflare/tests-rework
All unit tests passed
2 parents 0cc3494 + cf94b0a commit 22747f8

File tree

4 files changed

+121
-21
lines changed

4 files changed

+121
-21
lines changed

codeflare/pipelines/tests/test_Datamodel.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,32 +7,29 @@
77
from sklearn.pipeline import Pipeline
88
from sklearn.preprocessing import StandardScaler, MinMaxScaler
99
from sklearn.tree import DecisionTreeClassifier
10+
import sklearn.base as base
1011
import codeflare.pipelines.Datamodel as dm
1112
import codeflare.pipelines.Runtime as rt
1213
from codeflare.pipelines.Datamodel import Xy
1314
from codeflare.pipelines.Runtime import ExecutionType
1415

15-
1616
class FeatureUnion(dm.AndEstimator):
1717
def __init__(self):
1818
pass
19-
20-
def fit_transform(self, xy_list: list):
21-
return self.transform(xy_list)
22-
2319
def get_estimator_type(self):
2420
return 'transform'
25-
21+
def clone(self):
22+
return base.clone(self)
23+
def fit_transform(self, xy_list):
24+
return self.transform(xy_list)
2625
def transform(self, xy_list):
2726
X_list = []
28-
y_list = []
29-
27+
y_vec = None
3028
for xy in xy_list:
3129
X_list.append(xy.get_x())
32-
X_concat = np.concatenate(X_list, axis=0)
33-
34-
return Xy(X_concat, None)
35-
30+
y_vec = xy.get_y()
31+
X_concat = np.concatenate(X_list, axis=1)
32+
return Xy(X_concat, y_vec)
3633

3734
class MultibranchTestCase(unittest.TestCase):
3835

codeflare/pipelines/tests/test_and.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,30 @@
22
import ray
33
import pandas as pd
44
import numpy as np
5+
import sklearn.base as base
56
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler
67
import codeflare.pipelines.Datamodel as dm
78
import codeflare.pipelines.Runtime as rt
89
from codeflare.pipelines.Datamodel import Xy
910
from codeflare.pipelines.Datamodel import XYRef
1011
from codeflare.pipelines.Runtime import ExecutionType
1112

12-
class FeatureUnion(dm.AndTransform):
13+
class FeatureUnion(dm.AndEstimator):
1314
def __init__(self):
1415
pass
15-
16+
def get_estimator_type(self):
17+
return 'transform'
18+
def clone(self):
19+
return base.clone(self)
20+
def fit_transform(self, xy_list):
21+
return self.transform(xy_list)
1622
def transform(self, xy_list):
1723
X_list = []
1824
y_vec = None
19-
2025
for xy in xy_list:
2126
X_list.append(xy.get_x())
2227
y_vec = xy.get_y()
2328
X_concat = np.concatenate(X_list, axis=1)
24-
2529
return Xy(X_concat, y_vec)
2630

2731
def test_two_tier_and():

codeflare/pipelines/tests/test_multibranch.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,30 @@
88
from sklearn.preprocessing import StandardScaler, MinMaxScaler
99
from sklearn.tree import DecisionTreeClassifier
1010
from sklearn.linear_model import LogisticRegression
11+
import sklearn.base as base
1112
import codeflare.pipelines.Datamodel as dm
1213
import codeflare.pipelines.Runtime as rt
1314
from codeflare.pipelines.Datamodel import Xy
1415
from codeflare.pipelines.Datamodel import XYRef
1516
from codeflare.pipelines.Runtime import ExecutionType
1617

17-
class FeatureUnion(dm.AndTransform):
18+
class FeatureUnion(dm.AndEstimator):
1819
def __init__(self):
1920
pass
20-
21+
def get_estimator_type(self):
22+
return 'transform'
23+
def clone(self):
24+
return base.clone(self)
25+
def fit_transform(self, xy_list):
26+
return self.transform(xy_list)
2127
def transform(self, xy_list):
2228
X_list = []
2329
y_vec = None
24-
2530
for xy in xy_list:
2631
X_list.append(xy.get_x())
2732
y_vec = xy.get_y()
2833
X_concat = np.concatenate(X_list, axis=1)
29-
30-
return Xy(X_concat, y_vec.values.ravel())
34+
return Xy(X_concat, y_vec)
3135

3236
def test_multibranch_1():
3337

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import pytest
2+
import ray
3+
4+
# Taking an example from sklearn pipeline to assert that
5+
# the classification report from a rediction from sklearn pipeline is
6+
# the same as that from the converted codeflare pipeline
7+
8+
from sklearn import set_config
9+
set_config(display='diagram')
10+
from sklearn.datasets import make_classification
11+
from sklearn.model_selection import train_test_split
12+
from sklearn.feature_selection import SelectKBest, f_classif
13+
from sklearn.pipeline import make_pipeline
14+
from sklearn.svm import LinearSVC
15+
from sklearn.metrics import classification_report
16+
17+
import codeflare.pipelines.Datamodel as dm
18+
import codeflare.pipelines.Runtime as rt
19+
from codeflare.pipelines.Datamodel import Xy
20+
from codeflare.pipelines.Datamodel import XYRef
21+
from codeflare.pipelines.Runtime import ExecutionType
22+
23+
#
24+
# prediction from an sklearn pipeline
25+
#
26+
27+
def test_pipeline_predict():
28+
29+
ray.shutdown()
30+
ray.init()
31+
32+
#
33+
# prediction from an sklearn pipeline
34+
#
35+
X, y = make_classification(
36+
n_features=20, n_informative=3, n_redundant=0, n_classes=2,
37+
n_clusters_per_class=2, random_state=42)
38+
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
39+
40+
anova_filter = SelectKBest(f_classif, k=3)
41+
clf = LinearSVC()
42+
43+
anova_svm = make_pipeline(anova_filter, clf)
44+
anova_svm.fit(X_train, y_train)
45+
46+
y_pred = anova_svm.predict(X_test)
47+
48+
report_sklearn = classification_report(y_test, y_pred)
49+
print(report_sklearn)
50+
51+
#
52+
# constructing a codeflare pipeline
53+
#
54+
pipeline = dm.Pipeline()
55+
node_anova_filter = dm.EstimatorNode('anova_filter', anova_filter)
56+
node_clf = dm.EstimatorNode('clf', clf)
57+
pipeline.add_edge(node_anova_filter, node_clf)
58+
59+
pipeline_input = dm.PipelineInput()
60+
xy = dm.Xy(X_train, y_train)
61+
62+
pipeline_input.add_xy_arg(node_anova_filter, xy)
63+
64+
pipeline_output = rt.execute_pipeline(pipeline, ExecutionType.FIT, pipeline_input)
65+
66+
node_clf_output = pipeline_output.get_xyrefs(node_clf)
67+
68+
Xout = ray.get(node_clf_output[0].get_Xref())
69+
yout = ray.get(node_clf_output[0].get_yref())
70+
71+
selected_pipeline = rt.select_pipeline(pipeline_output, node_clf_output[0])
72+
73+
pipeline_input = dm.PipelineInput()
74+
pipeline_input.add_xy_arg(node_anova_filter, dm.Xy(X_test, y_test))
75+
76+
predict_output = rt.execute_pipeline(selected_pipeline, ExecutionType.PREDICT, pipeline_input)
77+
78+
predict_clf_output = predict_output.get_xyrefs(node_clf)
79+
80+
#y_pred = ray.get(predict_clf_output[0].get_yref())
81+
y_pred = ray.get(predict_clf_output[0].get_Xref())
82+
83+
84+
report_codeflare = classification_report(y_test, y_pred)
85+
86+
print(report_codeflare)
87+
88+
assert(report_sklearn == report_codeflare)
89+
90+
ray.shutdown()
91+
92+
93+
if __name__ == "__main__":
94+
sys.exit(pytest.main(["-v", __file__]))
95+

0 commit comments

Comments
 (0)