In [2]:
# Import necessary libraries

import xml.etree.ElementTree as ET # Library for XML preprocessing 
import os # ???
import requests # ???
from nilearn import image # Part of Nilearn library; used for neuroimaging image manipulation and analysis
from sklearn.preprocessing import StandardScaler # Part of scikit-learn library; provides tools for data preprocessing
from sklearn.decomposition import PCA # Part of scikit-learn library; provides tools for diensionality reduction
from sklearn.svm import SVC # ???
from sklearn.model_selection import train_test_split # ???
from sklearn.metrics import accuracy_score # ???
import pywt # Library used for wavelet transform; used for time-frequency analysis on EEG data
import numpy as np # Library for handling complex arrays & mathematical operations
import shutil # ???

In [3]:
# DATA COLLECTION (Getting Catalog_XML file of fMRI dataset)

# Specify the path to the XML file
xml_file_path = "ABIDE1_Dataset.xml"   # ABIDE I (fMRI) Dataset

# Read the XML data from the file
with open(xml_file_path, "r") as xml_file:
    # Read the XML content, and remove any leading non-XML characters
    xml_data = xml_file.read().lstrip()

# Parse the XML data
root = ET.fromstring(xml_data)

In [4]:
# DATA PREPROCESSING (function for fMRI data preprocessing)

def preprocess_fmri_data(fmri_data_path):
    # Implement preprocessing steps (motion correction, slice timing correction, etc.)
    
    # Load the fMRI data
    fMRI_data = image.load_img(fmri_data_path)

    # Motion Correction - ???
    fMRI_data_corrected = image.smooth_ing(fMRI_data, fwhm=5)

    # Slice Timing Correction - ???
    # Assumes TR (Repetition Time) is known - ???
    TR = 0 # replace with the actual TR value
    fMRI_data_corrected = image.concat_imgs(
        image.slicetimer.slice_time_corrector(fMRI_data_corrected, TR=TR))

    # Spatial Smoothing - ???
    fMRI_data_smoothed = image.smooth_ing(fMRI_data_corrected, fwhm=8)

    # Intensity Normalization - ???
    scaler = StandardScaler()
    fMRI_data_normalized = scaler.fit_transform(fMRI_data_smoothed.get_fdata())

    # Any other preprocessing methods required ???

    return fMRI_data_normalized

In [5]:
# DATA PREPROCESSING (function for EEG data preprocessing)

def preprocess_eeg_data(eeg_data):
    # Implement preprocessing steps for EEG data - ???
    # Perform Time-Frequency Analysis for EEG (e.g., using wavelet transform) - ???
    # Assuming EEG_data is in the shape (channels, time points)
    # Adjust parameters as needed based on your data
    wavelet_transformed_data = pywt.cwt(eeg_data, scales=np.arange(1, 128), wavelet='cmor')

    return wavelet_transformed_data

In [6]:
# ?????

def load_your_ROI_masks():
    pass

In [7]:
# FEATURE EXTRACTION (function for selecting 'key features' from preprocessed data)
# Currently, only for fMRI data (dataset is only fMRI data)

def feature_extraction(fMRI_data_normalized): # modify to add EEG_data_normalized as argument
    # Assuming ROIs are defined for fMRI feature extraction - ???
    ROI_masks = load_your_ROI_masks()

    # Extracting mean fMRI signals from ROIs - ???
    fMRI_ROI_signals = []
    for mask in ROI_masks:
        fMRI_ROI_signals.append(image.mean_img(fMRI_data_normalized, mask=mask))
    
    # Feature Selection using PCA (for both fMRI and EEG) - ???
    # Assuming you want to reduce dimensionality to 10 components - ???
    pca = PCA(n_components=10)
    fMRI_features_selected = pca.fit_transform(fMRI_ROI_signals)
    # eeg_features_selected = pca.fit_transform(EEG_data_normalized)

    # Resulting selected features: fMRI_features_selected, eeg_features_selected
    return fMRI_features_selected #, eeg_features_selected

In [8]:
# TRAINING AND TESTING SVM MODEL

# General function for training/testing SVM model
def train_test_svm(X_train, X_test, y_train, y_test):
    # Initialize SVM model
    svm_model = SVC()

    # Train the SVM model
    svm_model.fit(X_train, y_train)

    # Test the SVM model
    predictions = svm_model.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions)

    return accuracy

# Function for training/testing fMRI data with SVM model
def train_test_fMRI_data(fMRI_features, labels):
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(fMRI_features, labels, test_size=0.1, random_state=42)

    # Train and test SVM model
    accuracy = train_test_svm(X_train, X_test, y_train, y_test)

    return accuracy

In [9]:
# DATA COLLECTION (extracting fMRI in batches) AND _____

# Directory to save downloaded fMRI datasets
download_dir = "downloaded_fmri_datasets"
os.makedirs(download_dir, exist_ok=True)

# Navigate to entrySet elements with the ID "RAW"
raw_entry_sets = root.findall('.//cat:entrySet[@ID="RAW"]', namespaces={'cat': 'http://nrg.wustl.edu/catalog'})

# Iterate through RAW entrySets to extract and download fMRI data
for raw_entry_set in raw_entry_sets:
    # Navigate to entries element under the RAW entrySet
    entries_element = raw_entry_set.find('.//cat:entries', namespaces={'cat': 'http://nrg.wustl.edu/catalog'})

    # Navigate to entry elements under the entries element
    entries = entries_element.findall('.//cat:entry', namespaces={'cat': 'http://nrg.wustl.edu/catalog'})

    # Extract subject name from entrySet
    subject_name = raw_entry_set.get('ID')

    # Labels for your SVM model (modify based on your actual labels)
    labels = np.random.randint(0, 2, len(entries))

    # Initialize arrays to store features and labels
    all_features = []
    all_labels = []

    # Iterate through entries and download fMRI data in batches
    for entry in entries:
        # Extract relevant information, such as URI and name
        entry_uri = entry.get('URI')
        entry_name = entry.get('name')

        # Construct a URL based on the extracted URI
        base_url = "https://www.nitrc.org/ir/data"  
        full_url = f"{base_url}{entry_uri}"

        # Download the fMRI files using requests library
        response = requests.get(full_url, stream=True)

        # Specify the directory to save the downloaded files
        base_dir = "/workspaces/ASD-Diagnostic_Research_2023-24/ABIDE I/"
        save_dir = os.path.join(base_dir, subject_name)
        os.makedirs(save_dir, exist_ok=True)

        # Save the downloaded fMRI files with the entry name
        file_path = os.path.join(download_dir, entry_name)
        with open(file_path, 'wb') as file:
            for chunk in response.iter_content(chunk_size=128):
                file.write(chunk)

        # Perform preprocessing on the downloaded fMRI data
        preprocess_fmri_data(file_path)

        # Load the preprocessed fMRI data (modify based on actual data loading) - ???
        preprocessed_fMRI_data = image.load_img(file_path)

        fMRI_features = feature_extraction(preprocessed_fMRI_data)
        
        # EEG data - gather, preprocess, extract features - ???
        # EEG_data = np.random.rand(EEG_data_shape)
        # preprocess_eeg_data(EEG_data)
        # preprocessed_EEG_data = image.load_img(EEG_data)
        # fMRI_features, EEG_features = feature_extraction(preprocessed_fMRI_data, preprocessed_EEG_data)

        # Append features and labels - ???
        all_features.append(fMRI_features)
        all_labels.append(labels)

        # Delete the downloaded and processsed data
        shutil.rmtree(save_dir)
    

    # Stack features & labels - ???
    all_features = np.vstack(all_features)
    all_labels = np.concatenate(all_labels)

    # Train and test SVM model
    accuracy = train_test_fMRI_data(all_features, all_labels)

FileNotFoundError: [Errno 2] No such file or directory: 'downloaded_fmri_datasets/ABIDE/Stanford_51176/Stanford_51176/anat/NIfTI'