# 1.) Imports

In [1]:
from google.colab import drive
import os
import pandas as pd
import nibabel as nib

# 2.) Download and Extract Data

In [2]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Define base path
base_path = '/content/drive/MyDrive/BrainAgeRegression/data'
os.makedirs(base_path, exist_ok=True)

# List of disc numbers to download and extract
disc_numbers = [11, 12]

# Base URL
base_url = 'https://download.nrg.wustl.edu/data/oasis_cross-sectional_disc'

for i in disc_numbers:
    filename = f'oasis_cross-sectional_disc{i}.tar.gz'
    url = f'{base_url}{i}.tar.gz'
    file_path = os.path.join(base_path, filename)

    # Download
    print(f'📥 Downloading disc {i}...')
    !wget -q -O "{file_path}" "{url}"
    print(f'✅ Downloaded {filename}')

    # Extract
    print(f'📂 Extracting {filename}...')
    !tar -xvzf "{file_path}" -C "{base_path}"
    print(f'✅ Extracted disc {i}\n')

📥 Downloading disc 11...
✅ Downloaded oasis_cross-sectional_disc11.tar.gz
📂 Extracting oasis_cross-sectional_disc11.tar.gz...
disc11/
disc11/OAS1_0383_MR1/
disc11/OAS1_0383_MR1/RAW/
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-4_anon.hdr
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-2_anon.hdr
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-1_anon.img
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-2_anon.img
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-3_anon.img
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-3_anon.hdr
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-1_anon.hdr
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-4_anon.img
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-1_anon_sag_66.gif
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-2_anon_sag_66.gif
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-3_anon_sag_66.gif
disc11/OAS1_0383_MR1/RAW/OAS1_0383_MR1_mpr-4_anon_sag_66.gif
disc11/OAS1_0383_MR1/PROCESSED/
disc11/OAS1_0383_MR1/PROCESSED/MPRAGE/
disc11/OAS1_0383_MR1/PROCESSED/MPRAGE/T88_111/
disc11/OAS1_0383_MR1/PRO

# 3.) Convert .hdr + .img into .nii.gz

In [4]:
import os
import nibabel as nib

base_path = '/content/drive/MyDrive/BrainAgeRegression/data'
nifti_path = os.path.join(base_path, 'nifti')
os.makedirs(nifti_path, exist_ok=True)

disc_folders = ['disc11', 'disc12']

for disc in disc_folders:
    disc_path = os.path.join(base_path, disc)
    for folder in os.listdir(disc_path):
        subj_path = os.path.join(disc_path, folder)
        if os.path.isdir(subj_path) and folder.startswith('OAS1_'):
            hdr_file = os.path.join(subj_path, 'PROCESSED', 'MPRAGE', 'T88_111',
                                    f'{folder}_mpr_n4_anon_111_t88_gfc.hdr')
            if os.path.exists(hdr_file):
                try:
                    img = nib.load(hdr_file)
                    out_file = os.path.join(nifti_path, f'{folder}.nii.gz')
                    nib.save(img, out_file)
                    print(f'✅ Converted {folder}')
                except Exception as e:
                    print(f'❌ Failed to convert {folder}: {e}')


✅ Converted OAS1_0383_MR1
✅ Converted OAS1_0385_MR1
✅ Converted OAS1_0386_MR1
✅ Converted OAS1_0387_MR1
✅ Converted OAS1_0388_MR1
✅ Converted OAS1_0389_MR1
✅ Converted OAS1_0390_MR1
✅ Converted OAS1_0392_MR1
✅ Converted OAS1_0394_MR1
✅ Converted OAS1_0395_MR1
✅ Converted OAS1_0395_MR2
✅ Converted OAS1_0396_MR1
✅ Converted OAS1_0397_MR1
✅ Converted OAS1_0398_MR1
✅ Converted OAS1_0399_MR1
✅ Converted OAS1_0400_MR1
✅ Converted OAS1_0402_MR1
✅ Converted OAS1_0403_MR1
✅ Converted OAS1_0404_MR1
✅ Converted OAS1_0405_MR1
✅ Converted OAS1_0406_MR1
✅ Converted OAS1_0407_MR1
✅ Converted OAS1_0408_MR1
✅ Converted OAS1_0411_MR1
✅ Converted OAS1_0413_MR1
✅ Converted OAS1_0415_MR1
✅ Converted OAS1_0416_MR1
✅ Converted OAS1_0417_MR1
✅ Converted OAS1_0418_MR1
✅ Converted OAS1_0419_MR1
✅ Converted OAS1_0420_MR1
✅ Converted OAS1_0421_MR1
✅ Converted OAS1_0422_MR1
✅ Converted OAS1_0424_MR1
✅ Converted OAS1_0425_MR1
✅ Converted OAS1_0426_MR1
✅ Converted OAS1_0428_MR1
✅ Converted OAS1_0429_MR1
✅ Converted 

In [5]:
base_path = '/content/drive/MyDrive/BrainAgeRegression/data'
nifti_path = os.path.join(base_path, 'nifti')

In [6]:
nifti_files = os.listdir(nifti_path)
print(f'🧠 Total NIfTI files: {len(nifti_files)}')


🧠 Total NIfTI files: 375


# 4.) Load our Cross Sectional CSV and Match Scans to Metadata

In [7]:
# Paths
nifti_dir = '/content/drive/MyDrive/BrainAgeRegression/data/nifti'
xlsx_path = '/content/drive/MyDrive/BrainAgeRegression/data/oasis_cross-sectional.xlsx'

# Load Excel metadata
df = pd.read_excel(xlsx_path)
df['ID'] = df['ID'].astype(str).str.strip()

# Extract subject ID from Excel (e.g., "OAS1_0001" from "OAS1_0001_MR1")
df['SubjectID'] = df['ID'].str.extract(r'(OAS1_\d{4})')

# Get subject IDs from NIfTI filenames
nifti_ids = {
    f.split('_MR')[0]
    for f in os.listdir(nifti_dir)
    if f.endswith('.nii.gz')
}

# Filter metadata to matched subjects
matched_df = df[df['SubjectID'].isin(nifti_ids)].copy()
print(f'✅ Matched {len(matched_df)} subjects')

# Optional: Save matched metadata
matched_csv_path = '/content/drive/MyDrive/BrainAgeRegression/matched_metadata.csv'
matched_df.to_csv(matched_csv_path, index=False)
print(f'💾 Saved matched metadata to {matched_csv_path}')


✅ Matched 375 subjects
💾 Saved matched metadata to /content/drive/MyDrive/BrainAgeRegression/matched_metadata.csv
