In [1]:
from julearn import run_cross_validation
from seaborn import load_dataset
df_iris = load_dataset('iris')
df_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [2]:
# Multi Class Problem
run_cross_validation(X=['sepal_length','sepal_width', 'petal_length'], 
                    y='species', confounds=['petal_width'],
                    data=df_iris, 
                    model='svm',
                    problem_type='multiclass_classification')

array([0.96666667, 0.93333333, 0.96666667, 0.96666667, 1.        ,
       0.93333333, 0.96666667, 1.        , 0.96666667, 0.93333333,
       0.93333333, 1.        , 0.9       , 1.        , 0.96666667,
       0.93333333, 1.        , 0.96666667, 0.96666667, 0.96666667,
       0.96666667, 0.96666667, 0.93333333, 1.        , 0.93333333])

In [3]:
# Binary Problem
run_cross_validation(X=['sepal_length', 'petal_length'], 
                    y='species', confounds=['petal_width', 'sepal_width'], 
                    data=df_iris.iloc[:100,:], 
                    model='svm',
                    problem_type='binary_classification') # specified here

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1.])

# Adding transformers to the Pipeline:
* Transform X/features
    * Add a list of valid transformer names or Transformers to the preprocess_X argument

* Transform y/target
    * Currently, only support one transformer or valid transformer_name 
    * set preprocess_y to that transformer/transformer_name

* Transform confounds
    * same as features. Only, make sure to only use transformers which return the same features 
instead of creating new ones e.g. components etc.

In [4]:
run_cross_validation(
    X=['sepal_length', 'petal_length'], 
    y='species', confounds=['petal_width', 'sepal_width'],
    data=df_iris, 
    model='svm',
    preprocess_X = ['pca'],
    preprocess_confounds=['zscore'],
    problem_type='binary_classification')

array([1.        , 0.93333333, 0.9       , 0.93333333, 0.96666667,
       0.93333333, 1.        , 0.96666667, 0.96666667, 0.9       ,
       0.93333333, 0.96666667, 0.93333333, 0.9       , 1.        ,
       0.96666667, 0.96666667, 0.93333333, 0.96666667, 0.96666667,
       1.        , 0.9       , 0.9       , 0.93333333, 1.        ])

* If you want to use your own hyper parameters you can specify them like following:
    - For the model use the `model_name`__parameter as in sklearn
    - For transformers you can just add `features__`,`confounds__`, `target__` for 
    the feature_transfomers, confounds_transformers or target_transformer respectively

In [5]:
hyper_parameters = {'svm__C': [0.1,.5], 
                    'features__zscore__with_mean': [False],
                    'confounds__zscore__with_mean': [True, False]}

run_cross_validation(X=['sepal_length','sepal_width', 'petal_length'], 
                    y='species', confounds=['petal_width'],
                    data=df_iris, 
                    model='svm', problem_type='multiclass_classification', 
                    hyperparameters=hyper_parameters
                    )

array([0.9       , 0.96666667, 0.96666667, 0.96666667, 0.93333333,
       0.93333333, 0.96666667, 0.96666667, 0.93333333, 0.93333333,
       0.9       , 0.96666667, 0.93333333, 0.93333333, 1.        ,
       0.86666667, 1.        , 1.        , 0.96666667, 0.93333333,
       0.93333333, 0.96666667, 1.        , 0.93333333, 0.93333333])

# Using differn CV-Methods

* by default the inner and outer cv use reated KFold cv 
    ** provide a string to change n_repeats and n_folds like this:
        'repeat:5_nfolds:5' for 5 reapeats and 5 folds  
        'repeat:20_nfolds:10' would mean 20 repeats and 10 folds 
        you can use 'same' for inner fold to let it be the same kind as outer
* you can also provide any valid sklearn splitting method:
    ** this includes just providing an int for the amount of folds 
    ** providing functions like skleanr.model_selection.LeaveOneOut()

In [6]:
run_cross_validation(X=['sepal_length','sepal_width', 'petal_length'], 
                    y='species', confounds=['petal_width'], data=df_iris, 
                    model='svm', preprocess_X=['remove_confound', 'zscore'],
                    problem_type='multiclass_classification', 
                    cv_evaluation='repeats:2_nfolds:3', cv_model_selection='same')

array([0.72, 0.66, 0.56, 0.72, 0.54, 0.56])

In [7]:
from sklearn.model_selection import LeaveOneOut, KFold
run_cross_validation(X=['sepal_length','sepal_width', 'petal_length'], 
                    y='species', confounds=['petal_width'], data=df_iris, 
                    model='svm', preprocess_X=['remove_confound', 'zscore'],
                    problem_type='multiclass_classification', 
                    cv_evaluation=LeaveOneOut(), cv_model_selection=KFold(n_splits=2))

array([1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.,
       1., 1., 1., 0., 1., 0., 1., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 0.,
       0., 1., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1.,
       1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0.,
       0., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0.,
       0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0.,
       0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0.,
       1., 0., 0., 0., 1., 1., 0., 0., 1., 1., 0., 0., 1., 0.])