# Testing notebook for scikit-BOLD code
This notebook offers some code to test the functionality of the scikit-BOLD repository.

In [1]:
import os
import pickle
import h5py
import glob
import numpy as np
import shutil
import pandas as pd
from os.path import join as opj
from sklearn.preprocessing import LabelEncoder
from transformers import ClusterThreshold
from data2mvpa import glm2mvpa, load_mvp_object

from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import StratifiedKFold
from sklearn.cross_validation import train_test_split
from sklearn.utils.estimator_checks import check_estimator

In [None]:
from transformers import ClusterThreshold
from transformers import AnovaCutoff
from transformers import AverageRegionTransformer

### Converting first-level GLM to scikit-learn compatible format

In [None]:
test_dirs = glob.glob('/home/lukas/DecodingEmotions/*/*.feat')
_ = [shutil.rmtree(d) for d in [os.path.dirname(tdir) + '/mvp_data' for tdir in test_dirs] if os.path.exists(d)]

mask = '/home/lukas/Dropbox/PhD_projects/DynamicAffect_Multiscale/ROIs/GrayMatter.nii.gz'
_ = [glm2mvpa(test_dir, mask=mask, beta2tstat=False) for test_dir in test_dirs]

### Testing pipelines
First, we'll load in the data.

In [7]:
mvp_dir = '/home/lukas/DecodingEmotions/HWW_002/mvp_data'
mvp = load_mvp_object(mvp_dir, identifier='merged')
mvp.y = LabelEncoder().fit_transform(mvp.class_labels)

X_train, X_test, y_train, y_test = train_test_split(mvp.X, mvp.y, test_size=0.1, random_state=42)

#### Actual testing

To do:
- do a variancethreshold before f_classif

In [None]:
clf = SVC(kernel='linear', C=1)
cluster = ClusterThreshold(mvp.mask_shape, mvp.mask_index, cutoff=1, min_cluster_size=20)

pipeline = Pipeline([('clustering', cluster), ('classifier', clf)])

parameters = dict(clustering__cutoff=[1, 1.25, 1.5, 1.75, 2, 2.25, 2.5], 
                  clustering__min_cluster_size=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
                  classifier__C=[0.001, 0.01, 0.1, 1, 10, 100],
                  classifier__kernel=['linear', 'rbf'])

cv = GridSearchCV(pipeline, param_grid=parameters, n_jobs=-1)
cv.fit(X_train, y_train)

In [None]:
print(cv.best_params_)