# Simulation effects of confound on SNR 
Univariate vs. multivariate

In [8]:
import sys
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import permutation_test_score
import statsmodels.api as sm
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
sys.path.append('..')
from generate_data import generate_data

In [48]:
data_args = dict(n_samp=240, k_feat=5, c_type='categorical', corr_cy=0.65,
                 signal_r2=0.01, confound_r2=0.1, verbose=False)
X, y, c = generate_data(**data_args)

In [55]:
# univar
for i in range(X.shape[1]):
    pred = sm.add_constant(y)
    # pred = np.hstack((pred, c[:, np.newaxis]))
    this_data = X[:, i]
    model = sm.OLS(this_data, pred)
    results_without_c = model.fit()
    
    pred = np.hstack((pred, c[:, np.newaxis]))
    model = sm.OLS(this_data, pred)
    results_with_c = model.fit()
    print((np.abs(results_with_c.tvalues[0]) - np.abs(results_without_c.tvalues[0])) / np.abs(results_with_c.tvalues[0] * 100))
    

0.00390157216891
-0.0179760046255
-0.00907022346178
-0.0236921170981
-0.0126143947041


In [50]:
# Multivar
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel='linear', C=0.01))
])

score, perms, pval = permutation_test_score(estimator=pipe, X=X, y=y, cv=10,
                                            n_permutations=1000, n_jobs=-1, random_state=0,
                                            verbose=1, scoring=None)

[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    2.4s finished
