# Classification of low/high confidence in Iowa Gambling Task

In [1]:
import os
import pip
import sys
print(sys.executable)
#Check that we have something installed.
import nilearn
import nibabel as nib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import glob
import seaborn as sns
from nilearn.glm.first_level import FirstLevelModel
from nilearn import plotting
import pickle
import os

/usr/bin/python


ModuleNotFoundError: No module named 'nilearn'

In [2]:
# Define the base data directory
data_dir = os.path.join(os.path.expanduser('/work/fMRI_data/BIDS_2024E'), 'derivatives')
print("Data directory:", data_dir)

# Dictionaries to store relevant files for each subject
anat_data = {}
func_data = {}

# Loop through all folders in the derivatives directory that start with "sub-XXXX" and are directories
for subject_folder in sorted(os.listdir(data_dir)):
    subject_path = os.path.join(data_dir, subject_folder)
    if subject_folder.startswith("sub-") and os.path.isdir(subject_path):  # Ensure it's a directory
        # Define the session folder path
        ses_dir = os.path.join(subject_path, 'ses-001')
        
        # Define paths for anat and func folders within each session
        anat_dir = os.path.join(ses_dir, 'anat')
        func_dir = os.path.join(ses_dir, 'func')
        
        # Store anat data paths if folder exists
        if os.path.exists(anat_dir):
            anat_data[subject_folder] = sorted(os.listdir(anat_dir))
        
        # Load functional data
        if os.path.exists(func_dir):
            # Initialize subject-specific dictionaries for func data
            func_data[subject_folder] = {}
            
            # Define paths for BOLD, brain mask, and confounds files
            bold_file = [f for f in os.listdir(func_dir) if 'desc-preproc_bold.nii.gz' in f]
            confounds_file = [f for f in os.listdir(func_dir) if 'desc-confounds_timeseries.tsv' in f]
            brain_mask_file = [f for f in os.listdir(func_dir) if f.endswith('desc-brain_mask.nii.gz')]  # Search for exact mask filename ending

            # Load confounds data if the file is present
            if confounds_file:
                confounds_path = os.path.join(func_dir, confounds_file[0])
                confounds_df = pd.read_csv(confounds_path, sep='\t')
                func_data[subject_folder]['confounds'] = confounds_df

            # Store BOLD and brain mask paths if files are present
            if bold_file:
                bold_path = os.path.join(func_dir, bold_file[0])
                func_data[subject_folder]['bold'] = bold_path
            
            if brain_mask_file:  # Check and assign brain mask if found
                brain_mask_path = os.path.join(func_dir, brain_mask_file[0])
                func_data[subject_folder]['brain_mask'] = brain_mask_path

# Print out loaded data structure for verification
print("Anatomical Data:", anat_data)
print("Functional Data:", func_data)


Data directory: /work/fMRI_data/BIDS_2024E/derivatives
Anatomical Data: {'sub-0136': ['sub-0136_ses-001_acq-T1sequence_run-1_desc-brain_mask.json', 'sub-0136_ses-001_acq-T1sequence_run-1_desc-brain_mask.nii.gz', 'sub-0136_ses-001_acq-T1sequence_run-1_desc-preproc_T1w.json', 'sub-0136_ses-001_acq-T1sequence_run-1_desc-preproc_T1w.nii.gz', 'sub-0136_ses-001_acq-T1sequence_run-1_dseg.nii.gz', 'sub-0136_ses-001_acq-T1sequence_run-1_from-MNI152NLin2009cAsym_to-T1w_mode-image_xfm.h5', 'sub-0136_ses-001_acq-T1sequence_run-1_from-T1w_to-MNI152NLin2009cAsym_mode-image_xfm.h5', 'sub-0136_ses-001_acq-T1sequence_run-1_label-CSF_probseg.nii.gz', 'sub-0136_ses-001_acq-T1sequence_run-1_label-GM_probseg.nii.gz', 'sub-0136_ses-001_acq-T1sequence_run-1_label-WM_probseg.nii.gz', 'sub-0136_ses-001_acq-T1sequence_run-1_space-MNI152NLin2009cAsym_desc-brain_mask.json', 'sub-0136_ses-001_acq-T1sequence_run-1_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz', 'sub-0136_ses-001_acq-T1sequence_run-1_space-MNI152

In [6]:
# Define the base directory for events data and load behavioral data
events_data_dir = os.path.expanduser('/work/fMRI_data/BIDS_2024E')
behavioral_data = pd.read_csv("/work/SabrinaSchrollZakiHansen#5217/ACN_F24/ACN_Portfolio_2/in/detailed_confidence_data.csv")

# Initialize a dictionary to store each subject's merged DataFrame
subject_events_data = {}

# Process each subject folder to collect and merge events with behavioral data
for subject_folder in sorted(os.listdir(events_data_dir)):
    if not subject_folder.startswith("sub-"):
        continue

    # Identify subject numeric ID
    subj_numeric_id = int(subject_folder.split('-')[1])
    
    # Define path to session's functional data folder
    ses_dir = os.path.join(events_data_dir, subject_folder, 'ses-001', 'func')
    if not os.path.isdir(ses_dir):
        continue

    # Initialize a list to gather DataFrames for each session
    subject_data_list = []
    
    # Process each event file in the session directory
    for event_file in filter(lambda f: f.endswith("events.tsv"), os.listdir(ses_dir)):
        event_path = os.path.join(ses_dir, event_file)
        events_df = pd.read_csv(event_path, sep='\t')

        # Filter to include only "decks" trial type
        events_df = events_df[events_df['trial_type'] == 'decks']

        # Extract run number and filter behavioral data accordingly
        run_number = int(event_file.split('_')[3].split('-')[1])
        behavioral_subset = behavioral_data[
            (behavioral_data['subject_id'] == subj_numeric_id) &
            (behavioral_data['session'] == run_number)
        ][['onset_decks', 'confidence']]
        
        # Merge events with behavioral data based on onset times
        merged_df = events_df.merge(
            behavioral_subset, how='left', left_on='onset', right_on='onset_decks'
        ).loc[:, ['onset', 'duration', 'confidence']]  # Retain only necessary columns
        
        # Map confidence to HC and LC, generate trial_type with subject ID
        def generate_trial_type(row, trial_idx):
            expression = "HighConf" if row['confidence'] == 2 else "LowConf"
            return f"tt_{str(trial_idx).zfill(2)}ID{subj_numeric_id}STIM{expression}"

        # Apply the function to generate the trial_type for each trial in the DataFrame
        merged_df['trial_type'] = [
            generate_trial_type(row, idx) for idx, row in merged_df.iterrows()
        ]
        
        # Append this session's DataFrame to the list
        subject_data_list.append(merged_df)
    
    # Concatenate all session DataFrames for the subject
    subject_events_data[subj_numeric_id] = pd.concat(subject_data_list, ignore_index=True)

# Now subject_events_data contains merged event and behavioral data with custom trial_type using subject ID


In [6]:
subject_events_data[136]

Unnamed: 0,onset,duration,confidence,trial_type
0,1.004390,5.0,2,tt_00ID136STIMHighConf
1,15.985176,5.0,2,tt_01ID136STIMHighConf
2,31.019258,5.0,2,tt_02ID136STIMHighConf
3,46.019909,5.0,2,tt_03ID136STIMHighConf
4,61.020791,5.0,1,tt_04ID136STIMLowConf
...,...,...,...,...
155,526.038804,5.0,2,tt_35ID136STIMHighConf
156,541.039531,5.0,2,tt_36ID136STIMHighConf
157,556.040207,5.0,1,tt_37ID136STIMLowConf
158,571.041003,5.0,2,tt_38ID136STIMHighConf


In [7]:
# Initialize dictionaries to store models and t-maps for each subject
flm_models = {}
t_maps = {}

# Directory to save the models and t-maps
output_dir = '/work/SabrinaSchrollZakiHansen#5217/ACN_F24'
os.makedirs(output_dir, exist_ok=True)

# Loop through each subject in func_data
for subject_id, files in func_data.items():
    # Check if necessary data files are available
    if 'bold' in files and 'confounds' in files and 'brain_mask' in files:
        
        # Set paths and load the data
        func = files['bold']
        func_mask = files['brain_mask']
        
        # Clean the confounds DataFrame by selecting relevant columns and handling NaNs
        confounds_cleaned = files['confounds'].loc[:, ['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z']]
        confounds_cleaned = confounds_cleaned.fillna(method='ffill').fillna(method='bfill')
        
        # Access events data for this specific subject
        subj_numeric_id = int(subject_id.split('-')[1])
        events_df = subject_events_data[subj_numeric_id]
        
        # Initialize FirstLevelModel with subject-specific mask and parameters
        flm_face = FirstLevelModel(
            t_r=0.7,
            slice_time_ref=0.5,
            mask_img=func_mask,
            hrf_model='glover',
            drift_model='cosine',
            high_pass=0.01,
            smoothing_fwhm=None,
            minimize_memory=True,
            noise_model='ols'
        )
        
        # Fit the model
        flm_face.fit(func, events=events_df, confounds=confounds_cleaned)
        
        # Generate a t-map for the first trial
        contrast_name = flm_face.design_matrices_[0].columns[0]
        tmap = flm_face.compute_contrast(contrast_name, output_type='effect_size')
        
        # Store the model and tmap in dictionaries
        flm_models[subject_id] = flm_face
        t_maps[subject_id] = tmap
        
        # Save each flm model and tmap individually as files
        with open(os.path.join(output_dir, f'{subject_id}_flm_model.pkl'), 'wb') as model_file:
            pickle.dump(flm_face, model_file)
        
        tmap.to_filename(os.path.join(output_dir, f'{subject_id}_tmap.nii.gz'))

        print(f"Model and t-map for {subject_id} saved.")

# Save the dictionaries for future reference
with open(os.path.join(output_dir, 'all_flm_models.pkl'), 'wb') as f:
    pickle.dump(flm_models, f)

with open(os.path.join(output_dir, 'all_t_maps.pkl'), 'wb') as f:
    pickle.dump(t_maps, f)

print("All models and t-maps saved.")


  confounds_cleaned = confounds_cleaned.fillna(method='ffill').fillna(method='bfill')


KeyboardInterrupt: 

In [8]:
# Load all models
with open(os.path.join(output_dir, 'all_flm_models.pkl'), 'rb') as f:
    flm_models = pickle.load(f)

# Load all t-maps
with open(os.path.join(output_dir, 'all_t_maps.pkl'), 'rb') as f:
    t_maps = pickle.load(f)

In [42]:
flm_models["sub-0136"]

In [22]:
from nilearn.masking import apply_mask, unmask

R_face = []
for col in events_df_filt['trial_type']:
    img = flm_face.compute_contrast(col, output_type='effect_size')
    R_face.append(apply_mask(img, func_mask))
    
R_face = np.vstack(R_face)


ValueError: Given mask is not made of 2 values: [-465107.75938662 -453276.79413062 -375693.78344587 ...  260379.15969583
  317794.6654359   569603.21460985]. Cannot interpret as true or false.

In [18]:
# Load all t-maps
with open(('/work/SabrinaSchrollZakiHansen#5217/ACN_F24/ACN_Portfolio_2/out/all_masked_data.pkl'), 'rb') as f:
    R_face_load = pickle.load(f)

In [40]:
R_face_load["sub-0136"].shape

(79, 111452)

0      2
1      2
2      2
3      2
4      1
      ..
155    2
156    2
157    1
158    2
159    2
Name: confidence, Length: 160, dtype: int64

In [38]:
# Convert your data into numpy arrays for machine learning
X = np.array(R_face_load["sub-0136"])  # Features: brain activity patterns
y = np.array(subject_events_data[136]["confidence"])  # Labels: high confidence, low confidence

# Optionally, you might want to split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


ValueError: Found input variables with inconsistent numbers of samples: [79, 160]