Skip to content

Commit

Permalink
Merge pull request #395 from pykale/add_main_multisite_neuroimg_adapt
Browse files Browse the repository at this point in the history
Create main.py, cross_validation for multisite_neuroimg_adapt example
  • Loading branch information
haipinglu committed Oct 4, 2023
2 parents 0139b14 + d47c5d1 commit cc561ef
Show file tree
Hide file tree
Showing 7 changed files with 254 additions and 82 deletions.
8 changes: 8 additions & 0 deletions docs/source/kale.evaluate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ Evaluate
Submodules
----------

kale.evaluate.cross\_validation module
----------------------------

.. automodule:: kale.evaluate.cross_validation
:members:
:undoc-members:
:show-inheritance:

kale.evaluate.metrics module
----------------------------

Expand Down
12 changes: 4 additions & 8 deletions examples/multisite_neuroimg_adapt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,10 @@ autism detection.

### References

[1] Cameron Craddock, Yassine Benhajali, Carlton Chu, Francois Chouinard, Alan Evans, András Jakab, Budhachandra Singh
Khundrakpam, John David Lewis, Qingyang Li, Michael Milham, Chaogan Yan, Pierre Bellec (2013). The Neuro Bureau
Preprocessing Initiative: open sharing of preprocessed neuroimaging data and derivatives. In *Neuroinformatics 2013*,
Stockholm, Sweden.
[1] Craddock C., Benhajali Y., Chu C., Chouinard F., Evans A., Jakab A., Khundrakpam BS., Lewis JD., Li Q., Milham M., Yan C. and Bellec P. (2013). [The Neuro Bureau Preprocessing Initiative: Open Sharing of Preprocessed Neuroimaging Data and Derivatives](https://doi.org/10.3389/conf.fninf.2013.09.00041). Frontiers in Neuroinformatics, 7.

[2] Abraham, A., Pedregosa, F., Eickenberg, M., Gervais, P., Mueller, A., Kossaifi, J., ... & Varoquaux, G. (2014).
Machine learning for neuroimaging with scikit-learn. *Frontiers in neuroinformatics*, 14.
[2] Abraham A., Pedregosa F., Eickenberg M., Gervais P., Mueller A., Kossaifi J., Gramfort A., Thirion B. and Varoquaux G. (2014). [Machine Learning for Neuroimaging with scikit-learn](https://doi.org/10.3389/fninf.2014.00014). Frontiers in Neuroinformatics, 8.

[3] Zhou, S., Li, W., Cox, C.R., & Lu, H. (2020). [Side Information Dependence as a Regularizer for Analyzing Human Brain Conditions across Cognitive Experiments](https://ojs.aaai.org//index.php/AAAI/article/view/6179). in *AAAI 2020*, New York, USA.
[3] Zhou S., Li W., Cox C. and Lu H. (2020). [Side Information Dependence as a Regularizer for Analyzing Human Brain Conditions across Cognitive Experiments](https://doi.org/10.1609/aaai.v34i04.6179). Proceedings of the AAAI Conference on Artificial Intelligence, 34(04), 6957-6964.

[4] Zhou, S. (2022). [Interpretable Domain-Aware Learning for Neuroimage Classification](https://etheses.whiterose.ac.uk/31044/1/PhD_thesis_ShuoZhou_170272834.pdf) (Doctoral dissertation, University of Sheffield).
[4] Zhou S. (2022). [Interpretable Domain-Aware Learning for Neuroimage Classification](https://etheses.whiterose.ac.uk/31044/1/PhD_thesis_ShuoZhou_170272834.pdf) (Doctoral Dissertation, University of Sheffield).
5 changes: 5 additions & 0 deletions examples/multisite_neuroimg_adapt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
# options: {rois_aal, rois_cc200, rois_cc400, rois_dosenbach160, rois_ez, rois_ho, rois_tt}
_C.DATASET.SITE_IDS = None # list of site ids to use, if None, use all sites
_C.DATASET.TARGET = "NYU" # target site ids, e.g. "UM_1", "UCLA_1", "USM"
# ---------------------------------------------------------
# Solver
# ---------------------------------------------------------
_C.SOLVER = CfgNode()
_C.SOLVER.SEED = 2023
# ---------------------------------------------------------------------------- #
# Machine learning pipeline
# ---------------------------------------------------------------------------- #
Expand Down
97 changes: 97 additions & 0 deletions examples/multisite_neuroimg_adapt/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""
Autism Detection: Domain Adaptation for Multi-Site Neuroimaging Data Analysis
Reference:
[1] Craddock C., Benhajali Y., Chu C., Chouinard F., Evans A., Jakab A., Khundrakpam BS., Lewis JD., Li Q., Milham M., Yan C. and Bellec P. (2013). The Neuro Bureau Preprocessing Initiative: Open Sharing of Preprocessed Neuroimaging Data and Derivatives. Frontiers in Neuroinformatics, 7. https://doi.org/10.3389/conf.fninf.2013.09.00041
[2] Abraham A., Pedregosa F., Eickenberg M., Gervais P., Mueller A., Kossaifi J., Gramfort A., Thirion B. and Varoquaux G. (2014). Machine Learning for Neuroimaging with scikit-learn. Frontiers in Neuroinformatics, 8. https://doi.org/10.3389/fninf.2014.00014
[3] Zhou S., Li W., Cox C. and Lu H. (2020). Side Information Dependence as a Regularizer for Analyzing Human Brain Conditions across Cognitive Experiments. Proceedings of the AAAI Conference on Artificial Intelligence, 34(04), 6957-6964. https://doi.org/10.1609/aaai.v34i04.6179
[4] Zhou S. (2022). Interpretable Domain-Aware Learning for Neuroimage Classification (Doctoral Dissertation, University of Sheffield). https://etheses.whiterose.ac.uk/31044/1/PhD_thesis_ShuoZhou_170272834.pdf
"""
import argparse
import os

import numpy as np
import pandas as pd
from config import get_cfg_defaults
from nilearn.connectome import ConnectivityMeasure
from nilearn.datasets import fetch_abide_pcp
from sklearn.linear_model import RidgeClassifier

import kale.utils.seed as seed
from kale.evaluate import cross_validation
from kale.pipeline.multi_domain_adapter import CoIRLS


def arg_parse():
parser = argparse.ArgumentParser(
description="Autism Detection: Domain Adaptation for Multi-Site Neuroimaging Data Analysis"
)
parser.add_argument("--cfg", required=True, help="path to config file", type=str)
args = parser.parse_args()
return args


def main():
args = arg_parse()

# ---- Set up configs ----
cfg = get_cfg_defaults()
cfg.merge_from_file(args.cfg)
cfg.freeze()
seed.set_seed(cfg.SOLVER.SEED)

# ---- Fetch ABIDE fMRI timeseries ----
fetch_abide_pcp(
data_dir=cfg.DATASET.ROOT,
pipeline=cfg.DATASET.PIPELINE,
band_pass_filtering=True,
global_signal_regression=False,
derivatives=cfg.DATASET.ATLAS,
quality_checked=False,
SITE_ID=cfg.DATASET.SITE_IDS,
verbose=1,
)

# ---- Read Phenotypic data ----
pheno_file = os.path.join(cfg.DATASET.ROOT, "ABIDE_pcp/Phenotypic_V1_0b_preprocessed1.csv")
pheno_info = pd.read_csv(pheno_file, index_col=0)

# ---- Read timeseries from files ----
data_dir = os.path.join(cfg.DATASET.ROOT, "ABIDE_pcp/%s/filt_noglobal" % cfg.DATASET.PIPELINE)
use_idx = []
time_series = []
for i in pheno_info.index:
data_file_name = "%s_%s.1D" % (pheno_info.loc[i, "FILE_ID"], cfg.DATASET.ATLAS)
data_path = os.path.join(data_dir, data_file_name)
if os.path.exists(data_path):
time_series.append(np.loadtxt(data_path, skiprows=0))
use_idx.append(i)

# ---- Use "DX_GROUP" (autism vs control) as labels, and "SITE_ID" as covariates ----
pheno = pheno_info.loc[use_idx, ["SITE_ID", "DX_GROUP"]].reset_index(drop=True)

# ---- Extracting Brain Networks Features ----
correlation_measure = ConnectivityMeasure(kind="correlation", vectorize=True)
brain_networks = correlation_measure.fit_transform(time_series)

# ---- Machine Learning for Multi-site Data ----
print("Baseline")
estimator = RidgeClassifier()
results = cross_validation.leave_one_group_out(
brain_networks, pheno["DX_GROUP"].values, pheno["SITE_ID"].values, estimator
)
print(pd.DataFrame.from_dict(results))

print("Domain Adaptation")
estimator = CoIRLS(kernel=cfg.MODEL.KERNEL, lambda_=cfg.MODEL.LAMBDA_, alpha=cfg.MODEL.ALPHA)
results = cross_validation.leave_one_group_out(
brain_networks, pheno["DX_GROUP"].values, pheno["SITE_ID"].values, estimator, use_domain_adaptation=True
)
print(pd.DataFrame.from_dict(results))


if __name__ == "__main__":
main()
109 changes: 35 additions & 74 deletions examples/multisite_neuroimg_adapt/tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@
" - [Data loading](#Data-Preparation)\n",
" - [Construct brain networks](#Extracting-Brain-Networks-Features)\n",
"- Machine learning pipeline:\n",
" - [Baseline: Ridge classifier](#Baseline)\n",
" - [Baseline: Ridge classifier](#Baseline-Model)\n",
" - [Domain adaptation](#Domain-Adaptation)\n",
"\n",
"**Reference:**\n",
"\n",
"[1] Cameron Craddock, Yassine Benhajali, Carlton Chu, Francois Chouinard, Alan Evans, Andr\u00e1s Jakab, Budhachandra Singh Khundrakpam, John David Lewis, Qingyang Li, Michael Milham, Chaogan Yan, Pierre Bellec (2013). The Neuro Bureau Preprocessing Initiative: open sharing of preprocessed neuroimaging data and derivatives. In *Neuroinformatics 2013*, Stockholm, Sweden.\n",
"[1] Craddock C., Benhajali Y., Chu C., Chouinard F., Evans A., Jakab A., Khundrakpam BS., Lewis JD., Li Q., Milham M., Yan C. and Bellec P. (2013). [The Neuro Bureau Preprocessing Initiative: Open Sharing of Preprocessed Neuroimaging Data and Derivatives](https://doi.org/10.3389/conf.fninf.2013.09.00041). Frontiers in Neuroinformatics, 7.\n",
"\n",
"[2] Abraham, A., Pedregosa, F., Eickenberg, M., Gervais, P., Mueller, A., Kossaifi, J., ... & Varoquaux, G. (2014). Machine learning for neuroimaging with scikit-learn. *Frontiers in neuroinformatics*, 14.\n",
"[2] Abraham A., Pedregosa F., Eickenberg M., Gervais P., Mueller A., Kossaifi J., Gramfort A., Thirion B. and Varoquaux G. (2014). [Machine Learning for Neuroimaging with scikit-learn](https://doi.org/10.3389/fninf.2014.00014). Frontiers in Neuroinformatics, 8.\n",
"\n",
"[3] Zhou, S., Li, W., Cox, C.R., & Lu, H. (2020). [Side Information Dependence as a Regularizer for Analyzing Human Brain Conditions across Cognitive Experiments](https://ojs.aaai.org//index.php/AAAI/article/view/6179). in *AAAI 2020*, New York, USA. \n",
"[3] Zhou S., Li W., Cox C. and Lu H. (2020). [Side Information Dependence as a Regularizer for Analyzing Human Brain Conditions across Cognitive Experiments](https://doi.org/10.1609/aaai.v34i04.6179). Proceedings of the AAAI Conference on Artificial Intelligence, 34(04), 6957-6964.\n",
"\n",
"[4] Zhou, S. (2022). [Interpretable Domain-Aware Learning for Neuroimage Classification](https://etheses.whiterose.ac.uk/31044/1/PhD_thesis_ShuoZhou_170272834.pdf) (Doctoral dissertation, University of Sheffield)."
"[4] Zhou S. (2022). [Interpretable Domain-Aware Learning for Neuroimage Classification](https://etheses.whiterose.ac.uk/31044/1/PhD_thesis_ShuoZhou_170272834.pdf) (Doctoral Dissertation, University of Sheffield)."
],
"cell_type": "markdown"
},
Expand All @@ -50,13 +50,21 @@
" !pip uninstall --yes imgaug && pip uninstall --yes albumentations && pip install git+https://github.com/aleju/imgaug.git\n",
" !git clone https://github.com/pykale/pykale.git\n",
" %cd pykale\n",
" !pip install .[image,example] \n",
" !pip install .[image,example]\n",
" %cd examples/multisite_neuroimg_adapt\n",
"else:\n",
" print('Not running on CoLab')"
],
"cell_type": "code",
"outputs": [],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Not running on CoLab\n"
]
}
],
"execution_count": null
},
{
Expand All @@ -70,18 +78,16 @@
"metadata": {},
"source": [
"import os\n",
"\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from nilearn.datasets import fetch_abide_pcp\n",
"import pandas as pd\n",
"from config import get_cfg_defaults\n",
"from nilearn.connectome import ConnectivityMeasure\n",
"from nilearn.datasets import fetch_abide_pcp\n",
"from sklearn.linear_model import RidgeClassifier\n",
"\n",
"import sys\n",
"\n",
"from kale.utils.download import download_file_by_url\n",
"from kale.interpret import visualize"
"import kale.utils.seed as seed\n",
"from kale.evaluate import cross_validation\n",
"from kale.pipeline.multi_domain_adapter import CoIRLS"
],
"cell_type": "code",
"outputs": [],
Expand All @@ -90,11 +96,12 @@
{
"metadata": {},
"source": [
"cfg_path = \"configs/tutorial.yaml\" # Path to `.yaml` config file\n",
"\n",
"# Path to `.yaml` config file\n",
"cfg_path = \"configs/tutorial.yaml\" \n",
"cfg = get_cfg_defaults()\n",
"cfg.merge_from_file(cfg_path)\n",
"cfg.freeze()\n",
"seed.set_seed(cfg.SOLVER.SEED)\n",
"print(cfg)"
],
"cell_type": "code",
Expand All @@ -117,10 +124,10 @@
"pipeline = cfg.DATASET.PIPELINE # fmri pre-processing pipeline\n",
"atlas = cfg.DATASET.ATLAS\n",
"site_ids = cfg.DATASET.SITE_IDS\n",
"abide = fetch_abide_pcp(data_dir=root_dir, pipeline=pipeline, \n",
" band_pass_filtering=True, global_signal_regression=False, \n",
"abide = fetch_abide_pcp(data_dir=root_dir, pipeline=pipeline,\n",
" band_pass_filtering=True, global_signal_regression=False,\n",
" derivatives=atlas, quality_checked=False,\n",
" SITE_ID=site_ids, \n",
" SITE_ID=site_ids,\n",
" verbose=0)"
],
"cell_type": "code",
Expand Down Expand Up @@ -210,8 +217,6 @@
{
"metadata": {},
"source": [
"from nilearn.connectome import ConnectivityMeasure\n",
"\n",
"correlation_measure = ConnectivityMeasure(kind='correlation', vectorize=True)\n",
"brain_networks = correlation_measure.fit_transform(time_series)"
],
Expand All @@ -228,50 +233,6 @@
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": [
"from sklearn.metrics import accuracy_score\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"import torch\n",
"\n",
"def cross_validation(x, y, covariates, estimator, domain_adaptation=False):\n",
" results = {\"Target\": [], \"Num_samples\": [], \"Accuracy\": []}\n",
" unique_covariates = np.unique(covariates)\n",
" n_covariates = len(unique_covariates)\n",
" enc = OneHotEncoder(handle_unknown=\"ignore\")\n",
" covariate_mat = enc.fit_transform(covariates.reshape(-1, 1)).toarray()\n",
" \n",
" for tgt in unique_covariates:\n",
" idx_tgt = np.where(covariates == tgt)\n",
" idx_src = np.where(covariates != tgt)\n",
" x_tgt = brain_networks[idx_tgt]\n",
" x_src = brain_networks[idx_src]\n",
" y_tgt = y[idx_tgt]\n",
" y_src = y[idx_src] \n",
" \n",
" if domain_adaptation:\n",
" estimator.fit(np.concatenate((x_src, x_tgt)), y_src, \n",
" np.concatenate((covariate_mat[idx_src], covariate_mat[idx_tgt])))\n",
" else: \n",
" estimator.fit(x_src, y_src)\n",
" y_pred = estimator.predict(x_tgt)\n",
" results[\"Accuracy\"].append(accuracy_score(y_tgt, y_pred))\n",
" results[\"Target\"].append(tgt)\n",
" results[\"Num_samples\"].append(x_tgt.shape[0])\n",
" \n",
" mean_acc = sum([results[\"Num_samples\"][i] * results[\"Accuracy\"][i] for i in range(n_covariates)])\n",
" mean_acc /= x.shape[0]\n",
" results[\"Target\"].append(\"Average\")\n",
" results[\"Num_samples\"].append(x.shape[0])\n",
" results[\"Accuracy\"].append(mean_acc)\n",
" \n",
" return pd.DataFrame(results)"
],
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": [
Expand All @@ -282,10 +243,10 @@
{
"metadata": {},
"source": [
"from sklearn.linear_model import RidgeClassifier\n",
"\n",
"estimator = RidgeClassifier()\n",
"res_df = cross_validation(brain_networks, pheno[\"DX_GROUP\"].values, pheno[\"SITE_ID\"].values, estimator)"
"results = cross_validation.leave_one_group_out(\n",
" brain_networks, pheno[\"DX_GROUP\"].values, pheno[\"SITE_ID\"].values, estimator\n",
")"
],
"cell_type": "code",
"outputs": [],
Expand All @@ -294,7 +255,7 @@
{
"metadata": {},
"source": [
"res_df"
"pd.DataFrame.from_dict(results)"
],
"cell_type": "code",
"outputs": [],
Expand All @@ -310,10 +271,10 @@
{
"metadata": {},
"source": [
"from kale.pipeline.multi_domain_adapter import CoIRLS\n",
"estimator = CoIRLS(kernel=cfg.MODEL.KERNEL, lambda_=cfg.MODEL.LAMBDA_, alpha=cfg.MODEL.ALPHA)\n",
"res_df = cross_validation(brain_networks, pheno[\"DX_GROUP\"].values, pheno[\"SITE_ID\"].values, \n",
" estimator, domain_adaptation=True)"
"results = cross_validation.leave_one_group_out(\n",
" brain_networks, pheno[\"DX_GROUP\"].values, pheno[\"SITE_ID\"].values, estimator, use_domain_adaptation=True\n",
")"
],
"cell_type": "code",
"outputs": [],
Expand All @@ -322,7 +283,7 @@
{
"metadata": {},
"source": [
"res_df"
"pd.DataFrame.from_dict(results)"
],
"cell_type": "code",
"outputs": [],
Expand Down

0 comments on commit cc561ef

Please sign in to comment.