In [None]:
%matplotlib inline


# Pipeline ANOVA SVM

This example shows how a feature selection can be easily integrated within
a machine learning pipeline.

We also show that you can easily introspect part of the pipeline.


In [9]:
print(__doc__)

from sklearn import set_config
set_config(display='diagram')
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(
    n_features=20, n_informative=3, n_redundant=0, n_classes=2,
    n_clusters_per_class=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC

anova_filter = SelectKBest(f_classif, k=3)
clf = LinearSVC()
anova_svm = make_pipeline(anova_filter, clf)
anova_svm.fit(X_train, y_train)

from sklearn.metrics import classification_report

y_pred = anova_svm.predict(X_test)
print(classification_report(y_test, y_pred))

anova_svm[-1].coef_

anova_svm[:-1].inverse_transform(anova_svm[-1].coef_)


Automatically created module for IPython interactive environment
              precision    recall  f1-score   support

           0       0.92      0.80      0.86        15
           1       0.75      0.90      0.82        10

    accuracy                           0.84        25
   macro avg       0.84      0.85      0.84        25
weighted avg       0.85      0.84      0.84        25



array([[0.        , 0.        , 0.75791043, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.27158921,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.26109702]])

In [11]:
import ray
import codeflare.pipelines.Datamodel as dm
import codeflare.pipelines.Runtime as rt
from codeflare.pipelines.Datamodel import Xy
from codeflare.pipelines.Datamodel import XYRef
from codeflare.pipelines.Runtime import ExecutionType

ray.shutdown()
ray.init()

from sklearn import set_config
set_config(display='diagram')
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(
    n_features=20, n_informative=3, n_redundant=0, n_classes=2,
    n_clusters_per_class=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC

anova_filter = SelectKBest(f_classif, k=3)
clf = LinearSVC()

pipeline = dm.Pipeline()
node_anova_filter = dm.EstimatorNode('anova_filter', anova_filter)
node_clf = dm.EstimatorNode('clf', clf)
pipeline.add_edge(node_anova_filter, node_clf)

pipeline_input = dm.PipelineInput()
xy = dm.Xy(X_train, y_train)

pipeline_input.add_xy_arg(node_anova_filter, xy)

pipeline_output = rt.execute_pipeline(pipeline, ExecutionType.FIT, pipeline_input)

node_clf_output = pipeline_output.get_xyrefs(node_clf)

Xout = ray.get(node_clf_output[0].get_Xref())
yout = ray.get(node_clf_output[0].get_yref())

selected_pipeline = rt.select_pipeline(pipeline_output, node_clf_output[0])

pipeline_input = dm.PipelineInput()
pipeline_input.add_xy_arg(node_anova_filter, dm.Xy(X_test, y_test))

predict_output = rt.execute_pipeline(selected_pipeline, ExecutionType.PREDICT, pipeline_input)

predict_clf_output = predict_output.get_xyrefs(node_clf)
y_pred = ray.get(predict_clf_output[0].get_yref())

from sklearn.metrics import classification_report

#y_pred = anova_svm.predict(X_test)
print(classification_report(y_test, y_pred))

#anova_svm[-1].coef_

#anova_svm[:-1].inverse_transform(anova_svm[-1].coef_)


2021-06-02 09:20:27,751	INFO services.py:1267 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m


RayTaskError(ValueError): [36mray::execute_or_node_remote()[39m (pid=29747, ip=192.168.1.5)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "/opt/anaconda3/lib/python3.8/site-packages/codeflare_pipelines-1.0.0-py3.8.egg/codeflare/pipelines/Runtime.py", line 23, in execute_or_node_remote
  File "/opt/anaconda3/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 47, in wrapper
    return func(*args, **kwargs)
ValueError: 'object_refs' must either be an object ref or a list of object refs.