In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [3]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy import stats, linalg
from sklearn import preprocessing, decomposition, linear_model, metrics 
from sklearn.utils import shuffle
import warnings

In [4]:
# set fontsizes for matplotlib plots
baseline_fontsize = 12
SMALL_SIZE = 8 + baseline_fontsize
MEDIUM_SIZE = 10 + baseline_fontsize
BIGGER_SIZE = 12 + baseline_fontsize

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# Load Data

In [5]:
hcp_z = np.load('hcpya_z.npy')
hcp_ct = np.load('hcpya_ct.npy')
hcp_g = np.load('hcpya_g.npy')

In [6]:
abcd_z = np.load('abcd_z.npy')
abcd_ct = np.load('abcd_ct.npy')
abcd_g = np.load('abcd_g.npy')

In [7]:
print(hcp_z.shape)
print(hcp_ct.shape)
print(hcp_g.shape)

(946, 187)
(946, 151)
(946,)


In [8]:
print(abcd_z.shape)
print(abcd_ct.shape)
print(abcd_g.shape)

(4717, 187)
(4717, 151)
(4717,)


# HCP Create Train/Test Splits

In [9]:
# generate train/test splits
np.random.seed(42)
n_train = int(0.8 * hcp_z.shape[0])

train_idxs = np.random.choice(range(hcp_z.shape[0]), size=n_train, replace=False)
test_idxs = np.array([x for x in range(hcp_z.shape[0]) if x not in train_idxs])

In [10]:
train_data_z = hcp_z[train_idxs, :]
test_data_z = hcp_z[test_idxs, :]

train_data_ct = hcp_ct[train_idxs, :]
test_data_ct = hcp_ct[test_idxs, :]

train_phen = hcp_g[train_idxs]
test_phen = hcp_g[test_idxs]

In [11]:
# mean center train/test data (using train means)
train_mu_centered_z = (train_data_z - train_data_z.mean(axis=0))
test_mu_centered_z = (test_data_z - train_data_z.mean(axis=0))

train_mu_centered_ct = (train_data_ct - train_data_ct.mean(axis=0))
test_mu_centered_ct = (test_data_ct - train_data_ct.mean(axis=0))

# HCP Principal Component Regression (BBS)

In [12]:
pca_model_z = decomposition.PCA(n_components=75).fit(train_data_z)
pca_model_ct = decomposition.PCA(n_components=75).fit(train_data_ct)
# from pca documentation, "the input data is centered but not scaled for each feature before applying the SVD"

In [13]:
train_transformed_z = pca_model_z.transform(train_data_z)
test_transformed_z = pca_model_z.transform(test_data_z)
train_transformed_ct = pca_model_ct.transform(train_data_ct)
test_transformed_ct = pca_model_ct.transform(test_data_ct)

## HCP Fit Linear Regression Model 

In [14]:
# fast OLS using matrix math
# we will check that this matches sklearn results later

# fit ols model on dimension reduced train data
train_features_z = np.hstack([np.ones((train_transformed_z.shape[0], 1)), 
                            train_transformed_z])
train_features_inv_z = linalg.pinv2(train_features_z)
train_betas_z = np.dot(train_features_inv_z, train_phen)
train_pred_phen_z = np.dot(train_features_z, train_betas_z)

# fit ols model on dimension reduced test data
test_features_z = np.hstack([np.ones((test_transformed_z.shape[0], 1)), 
                           test_transformed_z])
test_pred_phen_z = np.dot(test_features_z, train_betas_z)

In [15]:
# fast OLS using matrix math
# we will check that this matches sklearn results later

# fit ols model on dimension reduced train data
train_features_ct = np.hstack([np.ones((train_transformed_ct.shape[0], 1)), 
                            train_transformed_ct])
train_features_inv_ct = linalg.pinv2(train_features_ct)
train_betas_ct = np.dot(train_features_inv_ct, train_phen)
train_pred_phen_ct = np.dot(train_features_ct, train_betas_ct)

# fit ols model on dimension reduced test data
test_features_ct = np.hstack([np.ones((test_transformed_ct.shape[0], 1)), 
                           test_transformed_ct])
test_pred_phen_ct = np.dot(test_features_ct, train_betas_ct)

In [17]:
# OLS using sklearn
lr_model_z = linear_model.LinearRegression(fit_intercept=True)
lr_model_z.fit(train_transformed_z, train_phen)
train_pred_phen_lr_model_z = lr_model_z.predict(train_transformed_z)
test_pred_phen_lr_model_z = lr_model_z.predict(test_transformed_z)

In [18]:
# OLS using sklearn
lr_model_ct = linear_model.LinearRegression(fit_intercept=True)
lr_model_ct.fit(train_transformed_ct, train_phen)
train_pred_phen_lr_model_ct = lr_model_ct.predict(train_transformed_ct)
test_pred_phen_lr_model_ct = lr_model_ct.predict(test_transformed_ct)

In [19]:
# ensure matrix math predictions and sklearn predictions are accurate to 5 decimals
assert np.allclose(np.round(train_pred_phen_z - train_pred_phen_lr_model_z, 5), 0), 'Failed'
assert np.allclose(np.round(test_pred_phen_z - test_pred_phen_lr_model_z, 5), 0), 'Failed'
print('Passed')

Passed


In [20]:
# ensure matrix math predictions and sklearn predictions are accurate to 5 decimals
assert np.allclose(np.round(train_pred_phen_ct - train_pred_phen_lr_model_ct, 5), 0), 'Failed'
assert np.allclose(np.round(test_pred_phen_ct - test_pred_phen_lr_model_ct, 5), 0), 'Failed'
print('Passed')

Passed


## HCP Accuracy of Predictions

In [21]:
train_r2_z = metrics.r2_score(train_phen, train_pred_phen_lr_model_z)
test_r2_z = metrics.r2_score(test_phen, test_pred_phen_lr_model_z)
train_mae_z = metrics.mean_absolute_error(train_phen, train_pred_phen_lr_model_z)
test_mae_z = metrics.mean_absolute_error(test_phen, test_pred_phen_lr_model_z)
train_mse_z = metrics.mean_squared_error(train_phen, train_pred_phen_lr_model_z)
test_mse_z = metrics.mean_squared_error(test_phen, test_pred_phen_lr_model_z)
print(f'Deviation model Train R^2: {train_r2_z:.3f}')
print(f'Deviation model Test R^2: {test_r2_z:.3f}')
print(f'Deviation model Train MAE: {train_mae_z:.3f}')
print(f'Deviation model Test MAE: {test_mae_z:.3f}')
print(f'Deviation model Train MSE: {train_mse_z:.3f}')
print(f'Deviation model Test MSE: {test_mse_z:.3f}')

Deviation model Train R^2: 0.255
Deviation model Test R^2: 0.032
Deviation model Train MAE: 0.581
Deviation model Test MAE: 0.686
Deviation model Train MSE: 0.532
Deviation model Test MSE: 0.741


In [22]:
train_r2_ct = metrics.r2_score(train_phen, train_pred_phen_lr_model_ct)
test_r2_ct = metrics.r2_score(test_phen, test_pred_phen_lr_model_ct)
train_mae_ct = metrics.mean_absolute_error(train_phen, train_pred_phen_lr_model_ct)
test_mae_ct = metrics.mean_absolute_error(test_phen, test_pred_phen_lr_model_ct)
train_mse_ct = metrics.mean_squared_error(train_phen, train_pred_phen_lr_model_ct)
test_mse_ct = metrics.mean_squared_error(test_phen, test_pred_phen_lr_model_ct)
print(f'Cortical thickness model Train R^2: {train_r2_ct:.3f}')
print(f'Cortical thickness model Test R^2: {test_r2_ct:.3f}')
print(f'Cortical thickness model Train MAE: {train_mae_ct:.3f}')
print(f'Cortical thickness model Test MAE: {test_mae_ct:.3f}')
print(f'Cortical thickness model Train MSE: {train_mse_ct:.3f}')
print(f'Cortical thickness model Test MSE: {test_mse_ct:.3f}')

Cortical thickness model Train R^2: 0.185
Cortical thickness model Test R^2: -0.085
Cortical thickness model Train MAE: 0.611
Cortical thickness model Test MAE: 0.724
Cortical thickness model Train MSE: 0.582
Cortical thickness model Test MSE: 0.830


In [23]:
diff_test_r = test_r2_z - test_r2_ct
diff_test_mae = test_mae_ct - test_mae_z
diff_test_mse = test_mse_ct - test_mse_z

In [63]:
print(f'Cortical thickness - Deviation Test R^2: {diff_test_r:.3f}')
print(f' Cortical thickness - Deviation Test MAE: {diff_test_mae:.3f}')
print(f'Cortical thickness - Deviation Test MSE: {diff_test_mse:.3f}')

Cortical thickness - Deviation Test R^2: 0.116
 Cortical thickness - Deviation Test MAE: 0.037
Cortical thickness - Deviation Test MSE: 0.049


# Connectome Predictive Modelling 

In [25]:
# correlation train_brain with train_phenotype
train_z_pheno_corr_p = [stats.pearsonr(train_data_z[:, i], train_phen) for i in range(train_data_z.shape[1])]  # train_pheno_corr_p: (259200, )
# there are some nan correlations if brain data is poorly cropped (ie: some columns are always 0)

In [26]:
# correlation train_brain with train_phenotype
train_ct_pheno_corr_p = [stats.pearsonr(train_data_ct[:, i], train_phen) for i in range(train_data_ct.shape[1])]  # train_pheno_corr_p: (259200, )
# there are some nan correlations if brain data is poorly cropped (ie: some columns are always 0)

In [27]:
# split into positive and negative correlations 
# and keep edges with p values below threshold
pval_threshold = 0.01

train_z_corrs = np.array([x[0] for x in train_z_pheno_corr_p])
train_z_pvals = np.array([x[1] for x in train_z_pheno_corr_p])

keep_edges_pos_z = (train_z_corrs > 0) & (train_z_pvals < pval_threshold)
keep_edges_neg_z = (train_z_corrs < 0) & (train_z_pvals < pval_threshold)

train_ct_corrs = np.array([x[0] for x in train_ct_pheno_corr_p])
train_ct_pvals = np.array([x[1] for x in train_ct_pheno_corr_p])

keep_edges_pos_ct = (train_ct_corrs > 0) & (train_ct_pvals < pval_threshold)
keep_edges_neg_ct = (train_ct_corrs < 0) & (train_ct_pvals < pval_threshold)

In [28]:
print(f'number of positive Z features kept = {np.sum(keep_edges_pos_z)}')
print(f'number of negative Z features kept = {np.sum(keep_edges_neg_z)}')
print(f'number of positive CT features kept = {np.sum(keep_edges_pos_ct)}')
print(f'number of negative CT features kept = {np.sum(keep_edges_neg_ct)}')

number of positive Z features kept = 37
number of negative Z features kept = 2
number of positive CT features kept = 15
number of negative CT features kept = 1


In [29]:
train_pos_edges_sum_z = train_data_z[:, keep_edges_pos_z].sum(1)
train_neg_edges_sum_z = train_data_z[:, keep_edges_neg_z].sum(1)

In [30]:
train_pos_edges_sum_ct = train_data_ct[:, keep_edges_pos_ct].sum(1)
train_neg_edges_sum_ct = train_data_ct[:, keep_edges_neg_ct].sum(1)

In [32]:
fit_pos_z = linear_model.LinearRegression(fit_intercept=True).fit(train_pos_edges_sum_z.reshape(-1, 1), train_phen)
fit_neg_z = linear_model.LinearRegression(fit_intercept=True).fit(train_neg_edges_sum_z.reshape(-1, 1), train_phen)

In [33]:
fit_pos_ct = linear_model.LinearRegression(fit_intercept=True).fit(train_pos_edges_sum_ct.reshape(-1, 1), train_phen)
fit_neg_ct = linear_model.LinearRegression(fit_intercept=True).fit(train_neg_edges_sum_ct.reshape(-1, 1), train_phen)

In [34]:
pos_error_z = metrics.mean_absolute_error(train_phen, fit_pos_z.predict(train_pos_edges_sum_z.reshape(-1, 1)))
neg_error_z = metrics.mean_absolute_error(train_phen, fit_neg_z.predict(train_neg_edges_sum_z.reshape(-1, 1)))
pos_error_ct = metrics.mean_absolute_error(train_phen, fit_pos_ct.predict(train_pos_edges_sum_ct.reshape(-1, 1)))
neg_error_ct = metrics.mean_absolute_error(train_phen, fit_neg_ct.predict(train_neg_edges_sum_ct.reshape(-1, 1)))

print(f'Training Error (Positive Z Features Model) = {pos_error_z:.3f}')
print(f'Training Error (Negative Z Features Model) = {neg_error_z:.3f}')
print(f'Training Error (Positive CT Features Model) = {pos_error_ct:.3f}')
print(f'Training Error (Negative CT Features Model) = {neg_error_ct:.3f}')

Training Error (Positive Z Features Model) = 0.631
Training Error (Negative Z Features Model) = 0.666
Training Error (Positive CT Features Model) = 0.662
Training Error (Negative CT Features Model) = 0.665


In [35]:
# combine positive/negative edges in one linear regression model
fit_pos_neg_z = linear_model.LinearRegression(fit_intercept=True).fit(np.stack((train_pos_edges_sum_z, train_neg_edges_sum_z)).T, train_phen)

In [36]:
# combine positive/negative edges in one linear regression model
fit_pos_neg_ct = linear_model.LinearRegression(fit_intercept=True).fit(np.stack((train_pos_edges_sum_ct, train_neg_edges_sum_ct)).T, train_phen)

In [37]:
pos_neg_error_z = metrics.mean_absolute_error(train_phen, fit_pos_neg_z.predict(np.stack((train_pos_edges_sum_z, train_neg_edges_sum_z)).T))
pos_neg_error_ct = metrics.mean_absolute_error(train_phen, fit_pos_neg_ct.predict(np.stack((train_pos_edges_sum_ct, train_neg_edges_sum_ct)).T))

print(f'Training Error (Positive/Negative Z Features Model) = {pos_neg_error_z:.3f}')
print(f'Training Error (Positive/Negative CT Features Model) = {pos_neg_error_ct:.3f}')

Training Error (Positive/Negative Z Features Model) = 0.620
Training Error (Positive/Negative CT Features Model) = 0.642


In [38]:
# evaluate out of sample performance 
test_pos_edges_sum_z = test_data_z[:, keep_edges_pos_z].sum(1)
test_neg_edges_sum_z = test_data_z[:, keep_edges_neg_z].sum(1)

pos_test_error_z = metrics.mean_squared_error(test_phen, fit_pos_z.predict(test_pos_edges_sum_z.reshape(-1, 1)))
neg_test_error_z = metrics.mean_squared_error(test_phen, fit_neg_z.predict(test_neg_edges_sum_z.reshape(-1, 1)))
pos_neg_test_error_z = metrics.mean_squared_error(test_phen, fit_pos_neg_z.predict(np.stack((test_pos_edges_sum_z, test_neg_edges_sum_z)).T))

test_pos_edges_sum_ct = test_data_ct[:, keep_edges_pos_ct].sum(1)
test_neg_edges_sum_ct = test_data_ct[:, keep_edges_neg_ct].sum(1)

pos_test_error_ct = metrics.mean_squared_error(test_phen, fit_pos_ct.predict(test_pos_edges_sum_ct.reshape(-1, 1)))
neg_test_error_ct = metrics.mean_squared_error(test_phen, fit_neg_ct.predict(test_neg_edges_sum_ct.reshape(-1, 1)))
pos_neg_test_error_ct = metrics.mean_squared_error(test_phen, fit_pos_neg_ct.predict(np.stack((test_pos_edges_sum_ct, test_neg_edges_sum_ct)).T))

print(f'Testing Error (Positive Z Features Model) = {pos_test_error_z:.3f}')
print(f'Testing Error (Negative Z Features Model) = {neg_test_error_z:.3f}')
print(f'Testing Error (Positive/Negative Z Features Model) = {pos_neg_test_error_z:.3f}')
print(f'Testing Error (Positive CT Features Model) = {pos_test_error_ct:.3f}')
print(f'Testing Error (Negative CT Features Model) = {neg_test_error_ct:.3f}')
print(f'Testing Error (Positive/Negative CT Features Model) = {pos_neg_test_error_ct:.3f}')

Testing Error (Positive Z Features Model) = 0.737
Testing Error (Negative Z Features Model) = 0.752
Testing Error (Positive/Negative Z Features Model) = 0.714
Testing Error (Positive CT Features Model) = 0.798
Testing Error (Negative CT Features Model) = 0.753
Testing Error (Positive/Negative CT Features Model) = 0.777


In [39]:
diff_test_mse = pos_neg_test_error_ct - pos_neg_test_error_z

In [64]:
print(f'Cortical thickness - Deviation Test MSE: {diff_test_mse:.3f}')

Cortical thickness - Deviation Test MSE: 0.049


# Lasso (Linear Regression + L1 Regularization)

In [None]:
# LassoCV uses coordinate descent to select hyperparameter alpha 
alpha_grid = np.array([10**a for a in np.arange(-3, 3, 0.25)])
lassoCV_model_z = linear_model.LassoCV(cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, fit_intercept=True, random_state=42, verbose=True, n_jobs=5).fit(train_data_z, train_phen)

In [124]:
# LassoCV uses coordinate descent to select hyperparameter alpha 
alpha_grid = np.array([10**a for a in np.arange(-3, 3, 0.25)])
lassoCV_model_ct = linear_model.LassoCV(cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, fit_intercept=True, random_state=42, verbose=True, n_jobs=5).fit(train_data_ct, train_phen)

In [43]:
# based on cv results above, set alpha=100
lasso_model_z = linear_model.Lasso(alpha=lassoCV_model_z.alpha_, fit_intercept=True).fit(train_data_z, train_phen)

In [44]:
# based on cv results above, set alpha=100
lasso_model_ct = linear_model.Lasso(alpha=lassoCV_model_ct.alpha_, fit_intercept=True).fit(train_data_ct, train_phen)

In [45]:
train_preds_lasso_model_z = lasso_model_z.predict(train_data_z)
test_preds_lasso_model_z = lasso_model_z.predict(test_data_z)

train_mse_z = metrics.mean_squared_error(train_phen, train_preds_lasso_model_z)
test_mse_z = metrics.mean_squared_error(test_phen, test_preds_lasso_model_z)

train_preds_lasso_model_ct = lasso_model_ct.predict(train_data_ct)
test_preds_lasso_model_ct = lasso_model_ct.predict(test_data_ct)

train_mse_ct = metrics.mean_squared_error(train_phen, train_preds_lasso_model_ct)
test_mse_ct = metrics.mean_squared_error(test_phen, test_preds_lasso_model_ct)

print(f'Train MSE Z model: {train_mse_z:.3f}')
print(f'Test MSE Z model: {test_mse_z:.3f}')
print(f'Train MSE CT model: {train_mse_ct:.3f}')
print(f'Test MSE CT model: {test_mse_ct:.3f}')

Train MSE Z model: 0.623
Test MSE Z model: 0.707
Train MSE CT model: 0.673
Test MSE CT model: 0.759


In [46]:
diff_test_mse = test_mse_ct - test_mse_z

In [47]:
print(f'Deviation model - Cortical thickness model Test MSE: {diff_test_mse:.3f}')

Deviation model - Cortical thickness model Test MSE: 0.051


# Ridge (Linear Regression + L2 Regularization)

In [48]:
# RidgeCV uses generalized cross validation to select hyperparameter alpha 
with warnings.catch_warnings():
    # ignore matrix decomposition errors
    warnings.simplefilter("ignore")
    ridgeCV_model_z = linear_model.RidgeCV(alphas=(0.1, 1.0, 10.0), fit_intercept=True, cv=5).fit(train_data_z, train_phen)

In [49]:
# RidgeCV uses generalized cross validation to select hyperparameter alpha 
with warnings.catch_warnings():
    # ignore matrix decomposition errors
    warnings.simplefilter("ignore")
    ridgeCV_model_ct = linear_model.RidgeCV(alphas=(0.1, 1.0, 10.0), fit_intercept=True, cv=5).fit(train_data_ct, train_phen)

In [50]:
ridge_alpha_z = ridgeCV_model_z.alpha_
print(f'CV Selected Alpha Z model = {ridge_alpha_z:.3f}')

CV Selected Alpha Z model = 10.000


In [51]:
ridge_alpha_ct = ridgeCV_model_ct.alpha_
print(f'CV Selected Alpha CT model = {ridge_alpha_ct:.3f}')

CV Selected Alpha CT model = 10.000


In [52]:
ridge_model_z = linear_model.Ridge(alpha=ridge_alpha_z, fit_intercept=True).fit(train_data_z, train_phen)

In [53]:
ridge_model_ct = linear_model.Ridge(alpha=ridge_alpha_ct, fit_intercept=True).fit(train_data_ct, train_phen)

In [54]:
train_preds_ridge_model_z = ridge_model_z.predict(train_data_z)
test_preds_ridge_model_z = ridge_model_z.predict(test_data_z)

train_mse_z = metrics.mean_squared_error(train_phen, train_preds_ridge_model_z)
test_mse_z = metrics.mean_squared_error(test_phen, test_preds_ridge_model_z)

train_preds_ridge_model_ct = ridge_model_ct.predict(train_data_ct)
test_preds_ridge_model_ct = ridge_model_ct.predict(test_data_ct)

train_mse_ct = metrics.mean_squared_error(train_phen, train_preds_ridge_model_ct)
test_mse_ct = metrics.mean_squared_error(test_phen, test_preds_ridge_model_ct)

print(f'Train MSE Z model: {train_mse_z:.3f}')
print(f'Test MSE Z model: {test_mse_z:.3f}')
print(f'Train MSE CT model: {train_mse_ct:.3f}')
print(f'Test MSE CT model: {test_mse_ct:.3f}')

Train MSE Z model: 0.436
Test MSE Z model: 0.870
Train MSE CT model: 0.566
Test MSE CT model: 0.756


In [55]:
diff_test_mse = test_mse_ct - test_mse_z

In [56]:
print(f'Deviation model - Cortical thickness model Test MSE: {diff_test_mse:.3f}')

Deviation model - Cortical thickness model Test MSE: -0.114


# Elastic Net (Linear Regression + L1/L2 Regularization)

In [None]:
# RidgeCV uses generalized cross validation to select hyperparameter alpha 
elasticnetCV_model_z = linear_model.ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, random_state=42, verbose=True, n_jobs=5).fit(train_data_z, train_phen)

In [None]:
# RidgeCV uses generalized cross validation to select hyperparameter alpha 
elasticnetCV_model_ct = linear_model.ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, random_state=42, verbose=True, n_jobs=5).fit(train_data_ct, train_phen)

In [59]:
print(f'CV selected alpha Z model {elasticnetCV_model_z.alpha_:.3f}')
print(f'Elastic net L1 ratio Z model {elasticnetCV_model_z.l1_ratio_:.3f}')
print(f'CV selected alpha CT model {elasticnetCV_model_ct.alpha_:.3f}')
print(f'Elastic net L1 ratio CT model {elasticnetCV_model_ct.l1_ratio_:.3f}')

CV selected alpha Z model 0.056
Elastic net L1 ratio Z model 0.700
CV selected alpha CT model 0.032
Elastic net L1 ratio CT model 0.100


In [60]:
elasticnet_model_z = linear_model.ElasticNet(alpha=elasticnetCV_model_z.alpha_, l1_ratio=elasticnetCV_model_z.l1_ratio_, fit_intercept=True, random_state=42).fit(train_data_z, train_phen)

train_preds_en_model_z = elasticnet_model_z.predict(train_data_z)
test_preds_en_model_z = elasticnet_model_z.predict(test_data_z)

train_mse_z = metrics.mean_squared_error(train_phen, train_preds_en_model_z)
test_mse_z = metrics.mean_squared_error(test_phen, test_preds_en_model_z)

elasticnet_model_ct = linear_model.ElasticNet(alpha=elasticnetCV_model_ct.alpha_, l1_ratio=elasticnetCV_model_ct.l1_ratio_, fit_intercept=True, random_state=42).fit(train_data_ct, train_phen)

train_preds_en_model_ct = elasticnet_model_ct.predict(train_data_ct)
test_preds_en_model_ct = elasticnet_model_ct.predict(test_data_ct)

train_mse_ct = metrics.mean_squared_error(train_phen, train_preds_en_model_ct)
test_mse_ct = metrics.mean_squared_error(test_phen, test_preds_en_model_ct)

print(f'Train MSE Z model: {train_mse_z:.3f}')
print(f'Test MSE Z model: {test_mse_z:.3f}')
print(f'Train MSE CT model: {train_mse_ct:.3f}')
print(f'Test MSE CT model: {test_mse_ct:.3f}')

Train MSE Z model: 0.605
Test MSE Z model: 0.697
Train MSE CT model: 0.640
Test MSE CT model: 0.746


In [61]:
diff_test_mse = test_mse_ct - test_mse_z

In [62]:
print(f'Deviation model - Cortical thickness model Test MSE: {diff_test_mse:.3f}')

Deviation model - Cortical thickness model Test MSE: 0.049


# ABCD Create Train/Test Splits

In [65]:
# generate train/test splits
np.random.seed(42)
n_train = int(0.8 * abcd_z.shape[0])

train_idxs = np.random.choice(range(abcd_z.shape[0]), size=n_train, replace=False)
test_idxs = np.array([x for x in range(abcd_z.shape[0]) if x not in train_idxs])

In [66]:
train_data_z = abcd_z[train_idxs, :]
test_data_z = abcd_z[test_idxs, :]

train_data_ct = abcd_ct[train_idxs, :]
test_data_ct = abcd_ct[test_idxs, :]

train_phen = abcd_g[train_idxs]
test_phen = abcd_g[test_idxs]

In [67]:
# mean center train/test data (using train means)
train_mu_centered_z = (train_data_z - train_data_z.mean(axis=0))
test_mu_centered_z = (test_data_z - train_data_z.mean(axis=0))

train_mu_centered_ct = (train_data_ct - train_data_ct.mean(axis=0))
test_mu_centered_ct = (test_data_ct - train_data_ct.mean(axis=0))

# ABCD Principal Component Regression (BBS)

In [68]:
pca_model_z = decomposition.PCA(n_components=75).fit(train_data_z)
# from pca documentation, "the input data is centered but not scaled for each feature before applying the SVD"

In [69]:
pca_model_ct = decomposition.PCA(n_components=75).fit(train_data_ct)
# from pca documentation, "the input data is centered but not scaled for each feature before applying the SVD"

In [70]:
train_transformed_z = pca_model_z.transform(train_data_z)
test_transformed_z = pca_model_z.transform(test_data_z)

In [71]:
train_transformed_ct = pca_model_ct.transform(train_data_ct)
test_transformed_ct = pca_model_ct.transform(test_data_ct)

## ABCD Fit Linear Regression Model 

In [72]:
# fast OLS using matrix math
# we will check that this matches sklearn results later

# fit ols model on dimension reduced train data
train_features_z = np.hstack([np.ones((train_transformed_z.shape[0], 1)), 
                            train_transformed_z])
train_features_inv_z = linalg.pinv2(train_features_z)
train_betas_z = np.dot(train_features_inv_z, train_phen)
train_pred_phen_z = np.dot(train_features_z, train_betas_z)

# fit ols model on dimension reduced test data
test_features_z = np.hstack([np.ones((test_transformed_z.shape[0], 1)), 
                           test_transformed_z])
test_pred_phen_z = np.dot(test_features_z, train_betas_z)

In [73]:
# fast OLS using matrix math
# we will check that this matches sklearn results later

# fit ols model on dimension reduced train data
train_features_ct = np.hstack([np.ones((train_transformed_ct.shape[0], 1)), 
                            train_transformed_ct])
train_features_inv_ct = linalg.pinv2(train_features_ct)
train_betas_ct = np.dot(train_features_inv_ct, train_phen)
train_pred_phen_ct = np.dot(train_features_ct, train_betas_ct)

# fit ols model on dimension reduced test data
test_features_ct = np.hstack([np.ones((test_transformed_ct.shape[0], 1)), 
                           test_transformed_ct])
test_pred_phen_ct = np.dot(test_features_ct, train_betas_ct)

In [75]:
# OLS using sklearn

lr_model_z = linear_model.LinearRegression(fit_intercept=True)
lr_model_z.fit(train_transformed_z, train_phen)
train_pred_phen_lr_model_z = lr_model_z.predict(train_transformed_z)
test_pred_phen_lr_model_z = lr_model_z.predict(test_transformed_z)

In [76]:
# OLS using sklearn

lr_model_ct = linear_model.LinearRegression(fit_intercept=True)
lr_model_ct.fit(train_transformed_ct, train_phen)
train_pred_phen_lr_model_ct = lr_model_ct.predict(train_transformed_ct)
test_pred_phen_lr_model_ct = lr_model_ct.predict(test_transformed_ct)

In [77]:
# ensure matrix math predictions and sklearn predictions are accurate to 5 decimals
assert np.allclose(np.round(train_pred_phen_z - train_pred_phen_lr_model_z, 5), 0), 'Failed'
assert np.allclose(np.round(test_pred_phen_z - test_pred_phen_lr_model_z, 5), 0), 'Failed'
print('Passed')

Passed


In [78]:
# ensure matrix math predictions and sklearn predictions are accurate to 5 decimals
assert np.allclose(np.round(train_pred_phen_ct - train_pred_phen_lr_model_ct, 5), 0), 'Failed'
assert np.allclose(np.round(test_pred_phen_ct - test_pred_phen_lr_model_ct, 5), 0), 'Failed'
print('Passed')

Passed


## ABCD Accuracy of Predictions

In [79]:
train_r2_z = metrics.r2_score(train_phen, train_pred_phen_lr_model_z)
test_r2_z = metrics.r2_score(test_phen, test_pred_phen_lr_model_z)
train_mae_z = metrics.mean_absolute_error(train_phen, train_pred_phen_lr_model_z)
test_mae_z = metrics.mean_absolute_error(test_phen, test_pred_phen_lr_model_z)
train_mse_z = metrics.mean_squared_error(train_phen, train_pred_phen_lr_model_z)
test_mse_z = metrics.mean_squared_error(test_phen, test_pred_phen_lr_model_z)
print(f'Deviation model Train R^2: {train_r2_z:.3f}')
print(f'Deviation model Test R^2: {test_r2_z:.3f}')
print(f'Deviation model Train MAE: {train_mae_z:.3f}')
print(f'Deviation model Test MAE: {test_mae_z:.3f}')
print(f'Deviation model Train MSE: {train_mse_z:.3f}')
print(f'Deviation model Test MSE: {test_mse_z:.3f}')

Deviation model Train R^2: 0.121
Deviation model Test R^2: 0.056
Deviation model Train MAE: 0.687
Deviation model Test MAE: 0.692
Deviation model Train MSE: 0.762
Deviation model Test MSE: 0.784


In [80]:
train_r2_ct = metrics.r2_score(train_phen, train_pred_phen_lr_model_ct)
test_r2_ct = metrics.r2_score(test_phen, test_pred_phen_lr_model_ct)
train_mae_ct = metrics.mean_absolute_error(train_phen, train_pred_phen_lr_model_ct)
test_mae_ct = metrics.mean_absolute_error(test_phen, test_pred_phen_lr_model_ct)
train_mse_ct = metrics.mean_squared_error(train_phen, train_pred_phen_lr_model_ct)
test_mse_ct = metrics.mean_squared_error(test_phen, test_pred_phen_lr_model_ct)
print(f'Cortical thickness model Train R^2: {train_r2_ct:.3f}')
print(f'Cortical thickness model Test R^2: {test_r2_ct:.3f}')
print(f'Cortical thickness model Train MAE: {train_mae_ct:.3f}')
print(f'Cortical thickness model Test MAE: {test_mae_ct:.3f}')
print(f'Cortical thickness model Train MSE: {train_mse_ct:.3f}')
print(f'Cortical thickness model Test MSE: {test_mse_ct:.3f}')

Cortical thickness model Train R^2: 0.104
Cortical thickness model Test R^2: 0.039
Cortical thickness model Train MAE: 0.693
Cortical thickness model Test MAE: 0.696
Cortical thickness model Train MSE: 0.776
Cortical thickness model Test MSE: 0.797


In [81]:
diff_test_r = test_r2_z - test_r2_ct
diff_test_mae = test_mae_ct - test_mae_z
diff_test_mse = test_mse_ct - test_mse_z

In [82]:
print(f'Cortical thickness - Deviation Test R^2: {diff_test_r:.3f}')
print(f'Cortical thickness - Deviation Test MAE: {diff_test_mae:.3f}')
print(f'Cortical thickness - Deviation Test MSE: {diff_test_mse:.3f}')

Cortical thickness - Deviation Test R^2: 0.016
Cortical thickness - Deviation Test MAE: 0.004
Cortical thickness - Deviation Test MSE: 0.014


# Connectome Predictive Modelling 

In [83]:
# correlation train_brain with train_phenotype
train_z_pheno_corr_p = [stats.pearsonr(train_data_z[:, i], train_phen) for i in range(train_data_z.shape[1])]  # train_pheno_corr_p: (259200, )
# there are some nan correlations if brain data is poorly cropped (ie: some columns are always 0)

In [84]:
# correlation train_brain with train_phenotype
train_ct_pheno_corr_p = [stats.pearsonr(train_data_ct[:, i], train_phen) for i in range(train_data_ct.shape[1])]  # train_pheno_corr_p: (259200, )
# there are some nan correlations if brain data is poorly cropped (ie: some columns are always 0)

In [85]:
# split into positive and negative correlations 
# and keep edges with p values below threshold
pval_threshold = 0.01

train_z_corrs = np.array([x[0] for x in train_z_pheno_corr_p])
train_z_pvals = np.array([x[1] for x in train_z_pheno_corr_p])

keep_edges_pos_z = (train_z_corrs > 0) & (train_z_pvals < pval_threshold)
keep_edges_neg_z = (train_z_corrs < 0) & (train_z_pvals < pval_threshold)

train_ct_corrs = np.array([x[0] for x in train_ct_pheno_corr_p])
train_ct_pvals = np.array([x[1] for x in train_ct_pheno_corr_p])

keep_edges_pos_ct = (train_ct_corrs > 0) & (train_ct_pvals < pval_threshold)
keep_edges_neg_ct = (train_ct_corrs < 0) & (train_ct_pvals < pval_threshold)

In [86]:
print(f'number of positive Z features kept = {np.sum(keep_edges_pos_z)}')
print(f'number of negative Z features kept = {np.sum(keep_edges_neg_z)}')
print(f'number of positive CT features kept = {np.sum(keep_edges_pos_ct)}')
print(f'number of negative CT features kept = {np.sum(keep_edges_neg_ct)}')

number of positive Z features kept = 70
number of negative Z features kept = 8
number of positive CT features kept = 46
number of negative CT features kept = 10


In [87]:
train_pos_edges_sum_z = train_data_z[:, keep_edges_pos_z].sum(1)
train_neg_edges_sum_z = train_data_z[:, keep_edges_neg_z].sum(1)

In [88]:
train_pos_edges_sum_ct = train_data_ct[:, keep_edges_pos_ct].sum(1)
train_neg_edges_sum_ct = train_data_ct[:, keep_edges_neg_ct].sum(1)

In [90]:
fit_pos_z = linear_model.LinearRegression(fit_intercept=True).fit(train_pos_edges_sum_z.reshape(-1, 1), train_phen)
fit_neg_z = linear_model.LinearRegression(fit_intercept=True).fit(train_neg_edges_sum_z.reshape(-1, 1), train_phen)

In [91]:
fit_pos_ct = linear_model.LinearRegression(fit_intercept=True).fit(train_pos_edges_sum_ct.reshape(-1, 1), train_phen)
fit_neg_ct = linear_model.LinearRegression(fit_intercept=True).fit(train_neg_edges_sum_ct.reshape(-1, 1), train_phen)

In [92]:
pos_error_z = metrics.mean_absolute_error(train_phen, fit_pos_z.predict(train_pos_edges_sum_z.reshape(-1, 1)))
neg_error_z = metrics.mean_absolute_error(train_phen, fit_neg_z.predict(train_neg_edges_sum_z.reshape(-1, 1)))
pos_error_ct = metrics.mean_absolute_error(train_phen, fit_pos_ct.predict(train_pos_edges_sum_ct.reshape(-1, 1)))
neg_error_ct = metrics.mean_absolute_error(train_phen, fit_neg_ct.predict(train_neg_edges_sum_ct.reshape(-1, 1)))

print(f'Training Error (Positive Z Features Model) = {pos_error_z:.3f}')
print(f'Training Error (Negative Z Features Model) = {neg_error_z:.3f}')
print(f'Training Error (Positive CT Features Model) = {pos_error_ct:.3f}')
print(f'Training Error (Negative CT Features Model) = {neg_error_ct:.3f}')

Training Error (Positive Z Features Model) = 0.714
Training Error (Negative Z Features Model) = 0.729
Training Error (Positive CT Features Model) = 0.729
Training Error (Negative CT Features Model) = 0.727


In [93]:
# combine positive/negative edges in one linear regression model
fit_pos_neg_z = linear_model.LinearRegression(fit_intercept=True).fit(np.stack((train_pos_edges_sum_z, train_neg_edges_sum_z)).T, train_phen)

In [94]:
# combine positive/negative edges in one linear regression model
fit_pos_neg_ct = linear_model.LinearRegression(fit_intercept=True).fit(np.stack((train_pos_edges_sum_ct, train_neg_edges_sum_ct)).T, train_phen)

In [95]:
pos_neg_error_z = metrics.mean_absolute_error(train_phen, fit_pos_neg_z.predict(np.stack((train_pos_edges_sum_z, train_neg_edges_sum_z)).T))
pos_neg_error_ct = metrics.mean_absolute_error(train_phen, fit_pos_neg_ct.predict(np.stack((train_pos_edges_sum_ct, train_neg_edges_sum_ct)).T))

print(f'Training Error (Positive/Negative Z Features Model) = {pos_neg_error_z:.3f}')
print(f'Training Error (Positive/Negative CT Features Model) = {pos_neg_error_ct:.3f}')

Training Error (Positive/Negative Z Features Model) = 0.704
Training Error (Positive/Negative CT Features Model) = 0.716


In [96]:
# evaluate out of sample performance 
test_pos_edges_sum_z = test_data_z[:, keep_edges_pos_z].sum(1)
test_neg_edges_sum_z = test_data_z[:, keep_edges_neg_z].sum(1)

pos_test_error_z = metrics.mean_squared_error(test_phen, fit_pos_z.predict(test_pos_edges_sum_z.reshape(-1, 1)))
neg_test_error_z = metrics.mean_squared_error(test_phen, fit_neg_z.predict(test_neg_edges_sum_z.reshape(-1, 1)))
pos_neg_test_error_z = metrics.mean_squared_error(test_phen, fit_pos_neg_z.predict(np.stack((test_pos_edges_sum_z, test_neg_edges_sum_z)).T))

test_pos_edges_sum_ct = test_data_ct[:, keep_edges_pos_ct].sum(1)
test_neg_edges_sum_ct = test_data_ct[:, keep_edges_neg_ct].sum(1)

pos_test_error_ct = metrics.mean_squared_error(test_phen, fit_pos_ct.predict(test_pos_edges_sum_ct.reshape(-1, 1)))
neg_test_error_ct = metrics.mean_squared_error(test_phen, fit_neg_ct.predict(test_neg_edges_sum_ct.reshape(-1, 1)))
pos_neg_test_error_ct = metrics.mean_squared_error(test_phen, fit_pos_neg_ct.predict(np.stack((test_pos_edges_sum_ct, test_neg_edges_sum_ct)).T))

print(f'Testing Error (Positive Z Features Model) = {pos_test_error_z:.3f}')
print(f'Testing Error (Negative Z Features Model) = {neg_test_error_z:.3f}')
print(f'Testing Error (Positive/Negative Z Features Model) = {pos_neg_test_error_z:.3f}')
print(f'Testing Error (Positive CT Features Model) = {pos_test_error_ct:.3f}')
print(f'Testing Error (Negative CT Features Model) = {neg_test_error_ct:.3f}')
print(f'Testing Error (Positive/Negative CT Features Model) = {pos_neg_test_error_ct:.3f}')

Testing Error (Positive Z Features Model) = 0.802
Testing Error (Negative Z Features Model) = 0.830
Testing Error (Positive/Negative Z Features Model) = 0.797
Testing Error (Positive CT Features Model) = 0.823
Testing Error (Negative CT Features Model) = 0.822
Testing Error (Positive/Negative CT Features Model) = 0.806


In [97]:
diff_test_mse = pos_neg_test_error_ct - pos_neg_test_error_z

In [98]:
print(f'Cortical thickness - Deviation Test MSE: {diff_test_mse:.3f}')

Cortical thickness - Deviation Test MSE: 0.009


# Lasso (Linear Regression + L1 Regularization)

In [None]:
# LassoCV uses coordinate descent to select hyperparameter alpha 
alpha_grid = np.array([10**a for a in np.arange(-3, 3, 0.25)])
lassoCV_model_z = linear_model.LassoCV(cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, fit_intercept=True, random_state=42, verbose=True, n_jobs=5).fit(train_data_z, train_phen)

In [None]:
# LassoCV uses coordinate descent to select hyperparameter alpha 
alpha_grid = np.array([10**a for a in np.arange(-3, 3, 0.25)])
lassoCV_model_ct = linear_model.LassoCV(cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, fit_intercept=True, random_state=42, verbose=True, n_jobs=5).fit(train_data_ct, train_phen)

In [104]:
# based on cv results above, set alpha=100
lasso_model_z = linear_model.Lasso(alpha=lassoCV_model_z.alpha_, fit_intercept=True).fit(train_data_z, train_phen)

In [105]:
# based on cv results above, set alpha=100
lasso_model_ct = linear_model.Lasso(alpha=lassoCV_model_ct.alpha_, fit_intercept=True).fit(train_data_ct, train_phen)

In [106]:
train_preds_lasso_model_z = lasso_model_z.predict(train_data_z)
test_preds_lasso_model_z = lasso_model_z.predict(test_data_z)

train_mse_z = metrics.mean_squared_error(train_phen, train_preds_lasso_model_z)
test_mse_z = metrics.mean_squared_error(test_phen, test_preds_lasso_model_z)

train_preds_lasso_model_ct = lasso_model_ct.predict(train_data_ct)
test_preds_lasso_model_ct = lasso_model_ct.predict(test_data_ct)

train_mse_ct = metrics.mean_squared_error(train_phen, train_preds_lasso_model_ct)
test_mse_ct = metrics.mean_squared_error(test_phen, test_preds_lasso_model_ct)

print(f'Train MSE Z model: {train_mse_z:.3f}')
print(f'Test MSE Z model: {test_mse_z:.3f}')
print(f'Train MSE CT model: {train_mse_ct:.3f}')
print(f'Test MSE CT model: {test_mse_ct:.3f}')

Train MSE Z model: 0.753
Test MSE Z model: 0.766
Train MSE CT model: 0.773
Test MSE CT model: 0.779


In [107]:
diff_test_mse = test_mse_ct - test_mse_z

In [108]:
print(f'Cortical thickness - Deviation Test MSE: {diff_test_mse:.3f}')

Cortical thickness - Deviation Test MSE: 0.014


# Ridge (Linear Regression + L2 Regularization)

In [109]:
# RidgeCV uses generalized cross validation to select hyperparameter alpha 
with warnings.catch_warnings():
    # ignore matrix decomposition errors
    warnings.simplefilter("ignore")
    ridgeCV_model_z = linear_model.RidgeCV(alphas=(0.1, 1.0, 10.0), fit_intercept=True, cv=5).fit(train_data_z, train_phen)

In [110]:
# RidgeCV uses generalized cross validation to select hyperparameter alpha 
with warnings.catch_warnings():
    # ignore matrix decomposition errors
    warnings.simplefilter("ignore")
    ridgeCV_model_ct = linear_model.RidgeCV(alphas=(0.1, 1.0, 10.0), fit_intercept=True, cv=5).fit(train_data_ct, train_phen)

In [111]:
ridge_alpha_z = ridgeCV_model_z.alpha_
print(f'CV Selected Alpha Z model = {ridge_alpha_z:.3f}')

CV Selected Alpha Z model = 1.000


In [112]:
ridge_alpha_ct = ridgeCV_model_ct.alpha_
print(f'CV Selected Alpha CT model = {ridge_alpha_ct:.3f}')

CV Selected Alpha CT model = 10.000


In [113]:
ridge_model_z = linear_model.Ridge(alpha=ridge_alpha_z, fit_intercept=True).fit(train_data_z, train_phen)

In [114]:
ridge_model_ct = linear_model.Ridge(alpha=ridge_alpha_ct, fit_intercept=True).fit(train_data_ct, train_phen)

In [115]:
train_preds_ridge_model_z = ridge_model_z.predict(train_data_z)
test_preds_ridge_model_z = ridge_model_z.predict(test_data_z)

train_mse_z = metrics.mean_squared_error(train_phen, train_preds_ridge_model_z)
test_mse_z = metrics.mean_squared_error(test_phen, test_preds_ridge_model_z)

train_preds_ridge_model_ct = ridge_model_ct.predict(train_data_ct)
test_preds_ridge_model_ct = ridge_model_ct.predict(test_data_ct)

train_mse_ct = metrics.mean_squared_error(train_phen, train_preds_ridge_model_ct)
test_mse_ct = metrics.mean_squared_error(test_phen, test_preds_ridge_model_ct)

print(f'Train MSE Z model: {train_mse_z:.3f}')
print(f'Test MSE Z model: {test_mse_z:.3f}')
print(f'Train MSE CT model: {train_mse_ct:.3f}')
print(f'Test MSE CT model: {test_mse_ct:.3f}')

Train MSE Z model: 0.717
Test MSE Z model: 0.801
Train MSE CT model: 0.756
Test MSE CT model: 0.788


In [116]:
diff_test_mse = test_mse_ct - test_mse_z

In [117]:
print(f'Cortical thickness - Deviation Test MSE: {diff_test_mse:.3f}')

Cortical thickness - Deviation Test MSE: -0.012


# Elastic Net (Linear Regression + L1/L2 Regularization)

In [None]:
# RidgeCV uses generalized cross validation to select hyperparameter alpha 
elasticnetCV_model_z = linear_model.ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, random_state=42, verbose=True, n_jobs=5).fit(train_data_z, train_phen)

In [None]:
# RidgeCV uses generalized cross validation to select hyperparameter alpha 
elasticnetCV_model_ct = linear_model.ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], cv=5, n_alphas=len(alpha_grid), alphas=alpha_grid, random_state=42, verbose=True, n_jobs=5).fit(train_data_ct, train_phen)

In [120]:
print(f'CV selected alpha Z model {elasticnetCV_model_z.alpha_:.3f}')
print(f'Elastic net L1 ratio Z model {elasticnetCV_model_z.l1_ratio_:.3f}')
print(f'CV selected alpha CT model {elasticnetCV_model_ct.alpha_:.3f}')
print(f'Elastic net L1 ratio CT model {elasticnetCV_model_ct.l1_ratio_:.3f}')

CV selected alpha Z model 0.100
Elastic net L1 ratio Z model 0.100
CV selected alpha CT model 0.010
Elastic net L1 ratio CT model 0.100


In [121]:
elasticnet_model_z = linear_model.ElasticNet(alpha=elasticnetCV_model_z.alpha_, l1_ratio=elasticnetCV_model_z.l1_ratio_, fit_intercept=True, random_state=42).fit(train_data_z, train_phen)

train_preds_en_model_z = elasticnet_model_z.predict(train_data_z)
test_preds_en_model_z = elasticnet_model_z.predict(test_data_z)

train_mse_z = metrics.mean_squared_error(train_phen, train_preds_en_model_z)
test_mse_z = metrics.mean_squared_error(test_phen, test_preds_en_model_z)

elasticnet_model_ct = linear_model.ElasticNet(alpha=elasticnetCV_model_ct.alpha_, l1_ratio=elasticnetCV_model_ct.l1_ratio_, fit_intercept=True, random_state=42).fit(train_data_ct, train_phen)

train_preds_en_model_ct = elasticnet_model_ct.predict(train_data_ct)
test_preds_en_model_ct = elasticnet_model_ct.predict(test_data_ct)

train_mse_ct = metrics.mean_squared_error(train_phen, train_preds_en_model_ct)
test_mse_ct = metrics.mean_squared_error(test_phen, test_preds_en_model_ct)

print(f'Train MSE Z model: {train_mse_z:.3f}')
print(f'Test MSE Z model: {test_mse_z:.3f}')
print(f'Train MSE CT model: {train_mse_ct:.3f}')
print(f'Test MSE CT model: {test_mse_ct:.3f}')

Train MSE Z model: 0.758
Test MSE Z model: 0.766
Train MSE CT model: 0.775
Test MSE CT model: 0.779


In [122]:
diff_test_mse = test_mse_ct - test_mse_z

In [123]:
print(f'Cortical thickness - Deviation Test MSE: {diff_test_mse:.3f}')

Cortical thickness - Deviation Test MSE: 0.014
