## 1. Defining Pipelines Steps

In [3]:
from sklearn.feature_selection import VarianceThreshold


selectors = [
        ('var_threshold', VarianceThreshold())
]

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler


scalers = [
        ('minmax', MinMaxScaler()),
        ('standard', StandardScaler())
]

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC


classifiers = [
        ('LR', LogisticRegression()),
        ('SVC', SVC())
]

In [6]:
from sklearn.model_selection import StratifiedKFold


grid_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
eval_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

In [7]:
from reskit.core import Pipeliner


steps = [
        ('Data', datasets),
        ('Selector', selectors),
        ('Scaler', scalers),
        ('Classifier', classifiers)
]

## 2. Defining Grid Search Parameters

In [8]:


param_grid = {
        'LR' : {
                'penalty' : ['l1', 'l2']
        },
        'SVC' : {
                'kernel' : ['linear', 'poly', 'rbf', 'sigmoid']
        }
}

pipe = Pipeliner(steps, eval_cv=eval_cv, grid_cv=grid_cv, param_grid=param_grid)
pipe.plan_table

Unnamed: 0,Data,Selector,Scaler,Classifier
0,dataset1,var_threshold,minmax,LR
1,dataset1,var_threshold,minmax,SVC
2,dataset1,var_threshold,standard,LR
3,dataset1,var_threshold,standard,SVC
4,dataset2,var_threshold,minmax,LR
5,dataset2,var_threshold,minmax,SVC
6,dataset2,var_threshold,standard,LR
7,dataset2,var_threshold,standard,SVC


## 3. Launching Experiment

In [9]:
pipe.get_results(data='path/to/directory', caching_steps=['Data'], scoring=['roc_auc'])

Removed previous results file -- results.csv.
Line: 1/8
Line: 2/8
Line: 3/8
Line: 4/8
Line: 5/8
Line: 6/8
Line: 7/8
Line: 8/8


Unnamed: 0,Data,Selector,Scaler,Classifier,grid_roc_auc_mean,grid_roc_auc_std,grid_roc_auc_best_params,eval_roc_auc_mean,eval_roc_auc_std,eval_roc_auc_scores
0,dataset1,var_threshold,minmax,LR,0.850809,0.0518758,{'penalty': 'l2'},0.831009,0.109763,[ 0.77508651 0.73356401 0.984375 ]
1,dataset1,var_threshold,minmax,SVC,0.883676,0.0242608,{'kernel': 'linear'},0.853594,0.0623489,[ 0.816609 0.80276817 0.94140625]
2,dataset1,var_threshold,standard,LR,0.896176,0.0496912,{'penalty': 'l2'},0.889499,0.0419183,[ 0.87889273 0.84429066 0.9453125 ]
3,dataset1,var_threshold,standard,SVC,0.929191,0.0343201,{'kernel': 'rbf'},0.87231,0.0325624,[ 0.85467128 0.84429066 0.91796875]
4,dataset2,var_threshold,minmax,LR,0.873676,0.0397707,{'penalty': 'l2'},0.872644,0.0861623,[ 0.75432526 0.90657439 0.95703125]
5,dataset2,var_threshold,minmax,SVC,0.934853,0.0289309,{'kernel': 'linear'},0.919226,0.0441214,[ 0.8615917 0.92733564 0.96875 ]
6,dataset2,var_threshold,standard,LR,0.926176,0.0233515,{'penalty': 'l1'},0.939726,0.0387793,[ 0.89965398 0.92733564 0.9921875 ]
7,dataset2,var_threshold,standard,SVC,0.92875,0.0309362,{'kernel': 'poly'},0.902371,0.0640724,[ 0.82352941 0.90311419 0.98046875]


In [None]:
pipe.