The fMRI time series are taken from https://paris-saclay-cds.github.io/autism_challenge/.

Check out their GitHub repository at: https://github.com/ramp-kits/autism/.

If you run this notebook on Binder, the data have already been downloaded automatically for you.

In [None]:
import numpy as np
import pandas as pd
import os
import pathlib

# Fetch the dataset

Below you may find some general instructions how to fetch the dataset.

1. `cd /opt/Temp/`
2. `git clone https://github.com/ramp-kits/autism.git`
3. `cd autism/data/fmri/`
4. `wget -c wget https://zenodo.org/record/3625740/files/msdl.zip`
5. `unzip msdl.zip`

# Parse dataset 

In [None]:
curr_dir = pathlib.Path("./")
rsfmri_basedir = (curr_dir / "autism/").resolve()

The following code is heavily based on the code provided by the competition's organizers. 

In [None]:
def parse_dataset():
    _target_column_name = 'asd'
    _prediction_label_names = [0, 1]
    
    subject_id = pd.read_csv(os.path.join(rsfmri_basedir, 'data', 'train.csv'), header=None)
    # read the list of the subjects
    df_participants = pd.read_csv(os.path.join(rsfmri_basedir, 'data', 'participants.csv'), index_col=0)
    df_participants.columns = ['participants_' + col for col in df_participants.columns]
    
    # load the structural and functional MRI data
    df_anatomy = pd.read_csv(os.path.join(rsfmri_basedir, 'data', 'anatomy.csv'), index_col=0)
    df_anatomy.columns = ['anatomy_' + col for col in df_anatomy.columns]
    df_fmri = pd.read_csv(os.path.join(rsfmri_basedir, 'data', 'fmri_filename.csv'), index_col=0)
    df_fmri.columns = ['fmri_' + col for col in df_fmri.columns]
    
    # load the QC for structural and functional MRI data
    df_anatomy_qc = pd.read_csv(os.path.join(rsfmri_basedir, 'data', 'anatomy_qc.csv'), index_col=0)
    df_fmri_qc = pd.read_csv(os.path.join(rsfmri_basedir, 'data', 'fmri_qc.csv'), index_col=0)
    
    # rename the columns for the QC to have distinct names
    df_anatomy_qc = df_anatomy_qc.rename(columns={"select": "anatomy_select"})
    df_fmri_qc = df_fmri_qc.rename(columns={"select": "fmri_select"})

    X = pd.concat([df_participants, df_anatomy, df_anatomy_qc, df_fmri, df_fmri_qc], axis=1)
    X = X.loc[subject_id[0]]
    
    y = X['participants_asd']
    y.columns = [_target_column_name]
    
    X = X.drop('participants_asd', axis=1)

    return X, y.values

In [None]:
data, labels = parse_dataset()

In [None]:
fmri_data = data[[col for col in data.columns if col.startswith('fmri')]]

In [None]:
fmri_msdl_filenames = fmri_data['fmri_msdl']

In [None]:
fmri = np.array([pd.read_csv(rsfmri_basedir + "/" + subject_filename, header=None).values 
                 for subject_filename in fmri_msdl_filenames])

In [None]:
anatomy = data[[col for col in data.columns if col.startswith('anatomy')]]
anatomy = anatomy.drop(columns='anatomy_select')

# Dump arrays

In [None]:
np.save('data/fmri_autism_ts.npy', fmri)
np.save('data/fmri_autism_anatomy.npy', anatomy)
np.save('data/fmri_autism_labels.npy', labels)