# How to use parallel computation to speed up the process?

In [1]:
from pathlib import Path
import os
import pandas as pd
import numpy as np
from time import time
from itertools import product
from tqdm import tqdm
from copy import deepcopy

os.chdir("/work")
from modules.decoder import RidgeRegression
from modules.model_selection import RidgeSigTest
from modules.metrics import get_scorer

## make data

In [5]:
n_neurons = 30
time_bins_train = 1000
time_bins_test = 80
n_positions = 4

np.random.seed(0)
X = np.random.rand(time_bins_train,n_neurons)
y = np.random.uniform(low=-40, high=40, size=(time_bins_train,1))

## train without parallel

In [7]:
scorer = get_scorer("mean_square_error")
def split(X, n_split):
    fold_size = int( len(X) / n_split )
    for id_fold in range(n_split):
        id_index = ( id_fold + 1 ) * fold_size
        train_indexes, test_indexes = range(id_index-1), range(id_index-1, id_index)
        yield  train_indexes, test_indexes
def train(train_indexes, test_indexes, param):
    X_train, X_test = X[train_indexes], X[test_indexes]
    y_train, y_test = y[train_indexes], y[test_indexes]
    rr = RidgeRegression()
    rr.fit(X_train, y_train, param)    
    rr.predict(X_test)
    sig_tests = RidgeSigTest(rr)
    result = {
            "train_scores": scorer(y_train, np.einsum("ij,j->i",X_train, rr.fitted_param)),
            "test_scores" : scorer(y_test, rr.prediction),
            "fitted_param": rr.fitted_param,
            "hyper_param": param,
            "RSS": sig_tests.RSS,
            "F_stat": sig_tests.f_stat,
            "F_p_value": sig_tests.f_p_value,
            "coeff_stats": sig_tests.t_stat_list,
            "coeff_p_values": sig_tests.t_p_value_list
        }
    return result

In [8]:
candidate_params = np.arange(6)
for id_, (param, (train_indexes, test_indexes)) in tqdm(enumerate(product(candidate_params, split(X, 5)))):
    train(train_indexes, test_indexes, param)

30it [00:06,  4.63it/s]


## train with parallel

In [9]:
from joblib import Parallel, delayed

In [16]:
%%time
candidate_params = np.arange(6)
parallel = Parallel(n_jobs=-1)
out = parallel(delayed(train)(train_indexes, test_indexes, param) for id_, (param, (train_indexes, test_indexes)) in enumerate(product(candidate_params, split(X, 5))))

CPU times: user 332 ms, sys: 906 ms, total: 1.24 s
Wall time: 3 s


## call `evaluate_candidates`(with parallel) from `SearchCV`

In [14]:
from modules.model_selection import SearchCV

In [17]:
%%time
candidate_params = np.arange(6)
search = SearchCV(RidgeRegression(), "mean_square_error", candidate_params, 5)
search.evaluate_candidates(X, y)

30it [00:06,  4.71it/s]

CPU times: user 26 s, sys: 25.5 s, total: 51.5 s
Wall time: 6.37 s



