In [9]:
# Import the script function that downloads preprocessed fMRI data
from download_abide_preprocessed_dataset import collect_and_download

In [10]:
# Variables to specify download settings (modify these values as needed)
desired_derivative = 'rois_cc200'  # Derivative of interest (e.g. 'reho')
desired_pipeline = 'cpac'     # Pipeline used to preprocess the data (e.g. 'cpac')    
desired_strategy = 'filt_global'  # Noise-removal strategy used during preprocessing
download_asd_dir = 'abide_preprocessed_dataset/ASD'  # Path to local folder to download files to for ASD data
download_tdc_dir = 'abide_preprocessed_dataset/TDC' # Path to local folder to download files to for controls
desired_diagnosis_asd = 'asd'  # 'asd', 'tdc', or 'both' corresponding to the diagnosis of the participants for whom data should be downloaded
desired_diagnosis_tdc = 'tdc'  # 'asd', 'tdc', or 'both' corresponding to the diagnosis of the participants for whom data should be downloaded

# Call the collect and download routine
collect_and_download(desired_derivative, desired_pipeline, desired_strategy, download_asd_dir, desired_diagnosis_asd)
collect_and_download(desired_derivative, desired_pipeline, desired_strategy, download_tdc_dir, desired_diagnosis_tdc)

Could not find abide_preprocessed_dataset/ASD, creating now...
Retrieving: abide_preprocessed_dataset/ASD/Outputs/cpac/filt_global/rois_cc200/Pitt_0050004_rois_cc200.1D
0.245% percent complete
Retrieving: abide_preprocessed_dataset/ASD/Outputs/cpac/filt_global/rois_cc200/Pitt_0050005_rois_cc200.1D
0.490% percent complete
Retrieving: abide_preprocessed_dataset/ASD/Outputs/cpac/filt_global/rois_cc200/Pitt_0050006_rois_cc200.1D
0.735% percent complete
Retrieving: abide_preprocessed_dataset/ASD/Outputs/cpac/filt_global/rois_cc200/Pitt_0050007_rois_cc200.1D
0.980% percent complete
Retrieving: abide_preprocessed_dataset/ASD/Outputs/cpac/filt_global/rois_cc200/Pitt_0050008_rois_cc200.1D
1.225% percent complete
Retrieving: abide_preprocessed_dataset/ASD/Outputs/cpac/filt_global/rois_cc200/Pitt_0050009_rois_cc200.1D
1.471% percent complete
Retrieving: abide_preprocessed_dataset/ASD/Outputs/cpac/filt_global/rois_cc200/Pitt_0050010_rois_cc200.1D
1.716% percent complete
Retrieving: abide_preproces

In [3]:
# Organizing Features and Labels to input into ML algorithm

# importing necessary files
import os
import numpy as np

def gather_features(data_dir):
    features = []

    for file_name in os.listdir(data_dir):
        subject_path = os.path.join(data_dir, file_name)

        # Check if the file is a timeseries file 
        if file_name.endswith('.1D'):
            # Load the timeseries data 
            timeseries_data = load_timeseries_data(subject_path)

            # Append features
            features.append(timeseries_data)

    features_list = [np.array(feature) for feature in features]    
    return features_list

def load_timeseries_data(file_path):
    # Load timeseries data from the 1D file
    with open(file_path, 'r') as f:
        timeseries = np.loadtxt(f)

    return timeseries

# Define a function for padding time series data
def pad_timeseries_to_length(timeseries, target_length):
    if len(timeseries) >= target_length:
        return timeseries[:target_length]
    else:
        padding = np.zeros((target_length - len(timeseries), timeseries.shape[1]))
        return np.vstack([timeseries, padding])

# Specify the path to the directory containing the downloaded preprocessed data for ASD data
download_asd_dir = 'abide_preprocessed_dataset/ASD/Outputs/cpac/filt_global/rois_cc200'
# Make features and labels arrays for ASD data
features_asd = gather_features(download_asd_dir)
labels_asd = np.ones(len(features_asd)) # Label 1 for ASD, 0 for TDC

# Specify the path to the directory containing the downloaded preprocessed data for TDC data
download_tdc_dir = 'abide_preprocessed_dataset/TDC/Outputs/cpac/filt_global/rois_cc200'
# Make features and labels arrays for TDC data
features_tdc = gather_features(download_tdc_dir)
labels_tdc = np.zeros(len(features_tdc)) # Label 1 for ASD, 0 for TDC

# Concatenate ASD and TDC features 
all_features = []
all_features.extend(features_asd) 
all_features.extend(features_tdc)

# Padding timeseries and # of features for features 
max_length = max(len(ts) for ts in all_features) # Find the maximum length among all timeseries
all_features_padded = [pad_timeseries_to_length(ts, max_length) for ts in all_features] # Pad each time series to the maximum length
all_features = np.array(all_features_padded) # Convert the list of padded arrays to a 2D NumPy array
all_features = all_features.reshape((len(all_features), -1)) # Flatten each time series in all_features to make it a 2D array that is readable for scikit-learn's ML algorithms

# Concatenate ASD and TDC labels
all_labels = np.zeros(len(labels_asd) + len(labels_tdc)) # Create an array of zeros with the total length
all_labels[:len(labels_asd)] = labels_asd # Assign the ASD labels to the first part of the array
all_labels[len(labels_asd):] = labels_tdc # Assign the TDC labels to the remaining part of the array
all_labels = np.array(all_labels)

print("Number of rows in all_features:", len(all_features))
print("Number of rows in all_labels:", len(all_labels))
print("Shape of all_features: ", all_features.shape)
print("Shape of all_labels: ", all_labels.shape)

Number of rows in all_features: 884
Number of rows in all_labels: 884
Shape of all_features:  (884, 63200)
Shape of all_labels:  (884,)


In [6]:
# Part of scikit-learn library; Support Vector Classification (SVC), a type of SVM used for classification tasks
from sklearn.svm import SVC 

# Function from scikit-learn; splits datasets into training and testing sets
from sklearn.model_selection import train_test_split 

# Function from scikit-learn; computes parameters relating to the model's performance
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# TRAINING AND TESTING SVM MODEL

# General function for training/testing SVM model
def train_test_svm(X_train, X_test, y_train, y_test):
    # Initialize SVM model
    svm_model = SVC()

    # Train the SVM model
    svm_model.fit(X_train, y_train)

    # Test the SVM model
    y_predictions = svm_model.predict(X_test)

    accuracy = accuracy_score(y_test, y_predictions) # Compute accuracy
    conf_matrix = confusion_matrix(y_test, y_predictions) # Compute confusion matrix
    tn, fp, fn, tp = conf_matrix.ravel() # Extract true positives, false positives, true negatives, and false negatives from confusion matrix
    sensitivity = tp / (tp + fn) # Compute sensitivity (recall)
    specificity = tn / (tn + fp) # Compute specificity
    precision = tp / (tp + fp) # Compute precision
    f1_score = 2 * (precision * sensitivity) / (precision + sensitivity) # Compute F1 score
    class_report = classification_report(y_test, y_predictions)

    return accuracy, conf_matrix, sensitivity, specificity, precision, f1_score, class_report

# Function for training/testing fMRI data with SVM model
def train_test_fMRI_data(fMRI_features, labels):
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(fMRI_features, labels, test_size=0.2, random_state=42)

    # Train and test SVM model
    accuracy, conf_matrix, sensitivity, specificity, precision, f1_score, class_report = train_test_svm(X_train, X_test, y_train, y_test)

    print("Accuracy: ", accuracy)
    print("Confusion Matrix: \n", conf_matrix)
    print("Sensitivity: ", sensitivity)
    print("Specificity: ", specificity)
    print("Precision: ", precision)
    print("F1 Score: ", f1_score)
    print("Classification Report: \n", class_report)

In [7]:
train_test_fMRI_data(all_features, all_labels)

Accuracy:  0.519774011299435
Confusion Matrix: 
 [[91  0]
 [85  1]]
Sensitivity:  0.011627906976744186
Specificity:  1.0
Precision:  1.0
F1 Score:  0.022988505747126436
Classification Report: 
               precision    recall  f1-score   support

         0.0       0.52      1.00      0.68        91
         1.0       1.00      0.01      0.02        86

    accuracy                           0.52       177
   macro avg       0.76      0.51      0.35       177
weighted avg       0.75      0.52      0.36       177

