In [3]:
import os
import nibabel as nib
import numpy as np
import torch
from nipype.interfaces import fsl
from tqdm import tqdm
import pandas as pd

def run_bet(in_file, out_file):
    bet = fsl.BET(in_file=in_file, out_file=out_file, frac=0.3)
    return bet.run()

def run_flirt(in_file, ref_file, out_file, out_matrix_file, dof=12):
    flirt = fsl.FLIRT()
    flirt.inputs.in_file = in_file
    flirt.inputs.reference = ref_file
    flirt.inputs.out_file = out_file
    flirt.inputs.out_matrix_file = out_matrix_file
    flirt.inputs.dof = dof
    flirt.inputs.cost = 'corratio'
    return flirt.run()

def preprocess_fmri_3d_pointing(func_file, standard_file, output_dir, subject_id, session_id):
    os.makedirs(output_dir, exist_ok=True)

    func_img = nib.load(func_file)
    func_data = func_img.get_fdata()
    num_volumes = func_data.shape[3]

    subject_output_dir = os.path.join(output_dir, 'img', f"sub-{subject_id}_ses-{session_id}")
    os.makedirs(subject_output_dir, exist_ok=True)

    for i in tqdm(range(num_volumes), desc=f"Processing volumes for subject {subject_id}"):
        volume = func_data[:,:,:,i]
        volume_img = nib.Nifti1Image(volume, func_img.affine)
        volume_file = os.path.join(output_dir, f"volume_{i}.nii.gz")
        nib.save(volume_img, volume_file)

        bet_output = os.path.join(output_dir, f"bet_volume_{i}.nii.gz")
        run_bet(volume_file, bet_output)

        mni_output = os.path.join(output_dir, f"mni_volume_{i}.nii.gz")
        mni_matrix = os.path.join(output_dir, f"mni_matrix_{i}.mat")
        run_flirt(bet_output, standard_file, mni_output, mni_matrix, dof=12)

        mni_volume = nib.load(mni_output).get_fdata()
        mni_volume_norm = (mni_volume - np.mean(mni_volume)) / np.std(mni_volume)

        # Save as .pt file
        torch.save(torch.from_numpy(mni_volume_norm).float(), 
                   os.path.join(subject_output_dir, f"frame_{i:04d}.pt"))

        for file in [volume_file, bet_output, mni_output, mni_matrix]:
            if os.path.exists(file):
                os.remove(file)

def process_all_subjects(base_dir, output_dir, standard_file):
    subjects_processed = []

    for subject_folder in os.listdir(base_dir):
        subject_id = subject_folder.split('_')[0]
        session_id = subject_folder.split('_')[2]
        
        func_file = os.path.join(base_dir, subject_folder, "func2", "NIFTI", f"sub-{subject_id}_ses-{session_id}_task-rest_run-02_bold.nii.gz")
        
        if os.path.exists(func_file):
            print(f"Processing subject {subject_id}, session {session_id}")
            preprocess_fmri_3d_pointing(func_file, standard_file, output_dir, subject_id, session_id)
            subjects_processed.append((subject_id, session_id))

    return subjects_processed

def create_metadata_file(subjects_processed, output_dir):
    metadata = pd.DataFrame(subjects_processed, columns=['subject_id', 'session_id'])
    metadata['target'] = 0  # 여기에 실제 타겟 값을 설정해야 합니다
    metadata_file = os.path.join(output_dir, 'metadata', 'metafile.csv')
    os.makedirs(os.path.join(output_dir, 'metadata'), exist_ok=True)
    metadata.to_csv(metadata_file, index=False)
    print(f"Metadata file created: {metadata_file}")

# 메인 실행 코드
base_dir = "/home/minyoungxi/MINYOUNGXI/XFL/Data/OASISSamples7.1"
output_dir = "/home/minyoungxi/MINYOUNGXI/XFL/Data/OASIS_MNI_to_TRs"
standard_file = os.path.join(os.environ['FSLDIR'], 'data', 'standard', 'MNI152_T1_1mm_brain.nii.gz')

subjects_processed = process_all_subjects(base_dir, output_dir, standard_file)
create_metadata_file(subjects_processed, output_dir)

print("Preprocessing and data structuring completed.")

Processing subject OAS30033, session d0133


Processing volumes for subject OAS30033: 100%|██████████| 164/164 [13:43<00:00,  5.02s/it]


Processing subject OAS30124, session d0046


Processing volumes for subject OAS30124: 100%|██████████| 164/164 [13:22<00:00,  4.89s/it]


Processing subject OAS30064, session d0687


Processing volumes for subject OAS30064: 100%|██████████| 164/164 [13:28<00:00,  4.93s/it]


Processing subject OAS30084, session d0470


Processing volumes for subject OAS30084: 100%|██████████| 164/164 [13:19<00:00,  4.88s/it]


Processing subject OAS30088, session d0093


Processing volumes for subject OAS30088: 100%|██████████| 164/164 [13:33<00:00,  4.96s/it]


Processing subject OAS30016, session d0021


Processing volumes for subject OAS30016: 100%|██████████| 164/164 [12:44<00:00,  4.66s/it]


Processing subject OAS30011, session d0055


Processing volumes for subject OAS30011: 100%|██████████| 164/164 [13:55<00:00,  5.09s/it]


Processing subject OAS30087, session d0260


Processing volumes for subject OAS30087: 100%|██████████| 164/164 [13:35<00:00,  4.97s/it]


Processing subject OAS30061, session d0035


Processing volumes for subject OAS30061: 100%|██████████| 164/164 [13:48<00:00,  5.05s/it]

Metadata file created: /home/minyoungxi/MINYOUNGXI/XFL/Data/OASIS_MNI_to_TRs/metadata/metafile.csv
Preprocessing and data structuring completed.





# Clinical data

In [4]:
def create_metadata_file(subjects_processed, output_dir, clinical_data_path):

    clinical_data = pd.read_csv(clinical_data_path)
    
    processed_metadata = pd.DataFrame(subjects_processed, columns=['subject_id', 'session_id'])
    
    merged_data = pd.merge(
        processed_metadata,
        clinical_data,
        left_on=['subject_id', 'session_id'],
        right_on=['OASISID', 'OASIS_session_label'],
        how='left'
    )
    
    merged_data['target'] = merged_data['CDRTOT']
    
    final_metadata = merged_data[['subject_id', 'session_id', 'target', 'age at visit', 'MMSE', 'dx1', 'CDRSUM']]
    
    final_metadata = final_metadata.fillna(-1)
    
    metadata_file = os.path.join(output_dir, 'metadata', 'metafile.csv')
    os.makedirs(os.path.join(output_dir, 'metadata'), exist_ok=True)
    final_metadata.to_csv(metadata_file, index=False)
    print(f"Metadata file created: {metadata_file}")
    
    print("\nCDR Distribution:")
    print(final_metadata['target'].value_counts(dropna=False))
    print("\nDiagnosis Distribution:")
    print(final_metadata['dx1'].value_counts(dropna=False))

clinical_data_path = "/home/minyoungxi/MINYOUNGXI/XFL/Data/Clinical_data/20240429_csv_OASIS3_all_files_XNAT_download_Suji_to_XFL_Share_240606/Suji-20240429_111322_OASIS3_Actions_Download Images_Whole_csv/OASIS3_data_files/UDSb4/csv/OASIS3_UDSb4_cdr.csv"  # Clinical 데이터 파일 경로
subjects_processed = process_all_subjects(base_dir, output_dir, standard_file)
create_metadata_file(subjects_processed, output_dir, clinical_data_path)

Processing subject OAS30033, session d0133


Processing volumes for subject OAS30033: 100%|██████████| 164/164 [13:29<00:00,  4.94s/it]


Processing subject OAS30124, session d0046


Processing volumes for subject OAS30124: 100%|██████████| 164/164 [13:08<00:00,  4.81s/it]


Processing subject OAS30064, session d0687


Processing volumes for subject OAS30064: 100%|██████████| 164/164 [13:32<00:00,  4.95s/it]


Processing subject OAS30084, session d0470


Processing volumes for subject OAS30084: 100%|██████████| 164/164 [13:33<00:00,  4.96s/it]


Processing subject OAS30088, session d0093


Processing volumes for subject OAS30088: 100%|██████████| 164/164 [13:56<00:00,  5.10s/it]


Processing subject OAS30016, session d0021


Processing volumes for subject OAS30016: 100%|██████████| 164/164 [13:05<00:00,  4.79s/it]


Processing subject OAS30011, session d0055


Processing volumes for subject OAS30011: 100%|██████████| 164/164 [8:58:29<00:00, 197.01s/it]    


Processing subject OAS30087, session d0260


Processing volumes for subject OAS30087: 100%|██████████| 164/164 [13:45<00:00,  5.03s/it]


Processing subject OAS30061, session d0035


Processing volumes for subject OAS30061: 100%|██████████| 164/164 [14:03<00:00,  5.14s/it]

Metadata file created: /home/minyoungxi/MINYOUNGXI/XFL/Data/OASIS_MNI_to_TRs/metadata/metafile.csv

CDR Distribution:
target
-1.0    9
Name: count, dtype: int64

Diagnosis Distribution:
dx1
-1    9
Name: count, dtype: int64



