### **Install and Import Necessary Libraries**

In [1]:
# !pip install nibabel

In [2]:
import os
import re
import shutil
import tarfile
import nibabel as nib
import numpy as np
from PIL import Image
from tqdm import tqdm
import pandas as pd
from sklearn.model_selection import train_test_split

### **Extract TAR Files**

In [3]:
# Set paths
extracted = '../datasets/OASIS-2/extracted'
src_p1 = '../datasets/OASIS-2/downloads/OAS2_RAW_PART1.tar.gz'
src_p2 = '../datasets/OASIS-2/downloads/OAS2_RAW_PART2.tar.gz'
part1_path = os.path.join(extracted, 'OAS2_RAW_PART1')
part2_path = os.path.join(extracted, 'OAS2_RAW_PART2')
new_name = os.path.join(extracted, 'OAS2_RAW')

# Create the extraction directory if it doesn't exist
os.makedirs(extracted, exist_ok=True)

# Extract tarballs
for src in [src_p1, src_p2]:
    with tarfile.open(src, 'r:gz') as tar:
        tar.extractall(path=extracted)

# Move contents from OAS2_RAW_PART2 to OAS2_RAW_PART1
for item in os.listdir(part2_path):
    item_path = os.path.join(part2_path, item)
    destination_path = os.path.join(part1_path, item)
    shutil.move(item_path, destination_path)

# Rename OAS2_RAW_PART1 to OAS2_RAW
os.rename(part1_path, new_name)

# Delete OAS2_RAW_PART2
shutil.rmtree(part2_path)

print("Extraction completed successfully.")

  tar.extractall(path=extracted)


Extraction completed successfully.


### **Convert 3D MRI Data in NIfTI Format to 2D JPG**

In [4]:
def extract_and_save_views(src_dir, output_dir, slides, l_axial, r_axial, l_coronal, r_coronal, l_sagittal, r_sagittal):
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Calculate total number of files to process (for the progress bar)
    total_files = 0
    for root, dirs, files in os.walk(src_dir):
        if 'RAW' in root:
            total_files += len([f for f in files if f.endswith(('.img', '.hdr'))])

    print(f"Total files to process: {total_files}")

    with tqdm(total=total_files, desc="Processing files", unit="file", ncols=100) as pbar:
        for root, dirs, files in os.walk(src_dir):
            parent_folder = os.path.basename(os.path.dirname(root))
            relative_path = os.path.relpath(root, src_dir)
            output_subfolder = os.path.join(output_dir, relative_path)

            if not os.path.exists(output_subfolder):
                os.makedirs(output_subfolder)

            for file_name in files:
                if file_name.endswith(('.img', '.hdr')):
                    nifti_file_path = os.path.join(root, file_name)

                    try:
                        img = nib.load(nifti_file_path)
                    except Exception as e:
                        print(f"Error loading {nifti_file_path}: {e}")
                        continue

                    img_data = img.get_fdata()

                    # Determine how many slices are available
                    if len(img_data.shape) == 3:
                        num_slices = img_data.shape[2]
                    elif len(img_data.shape) == 4:
                        img_data_avg = np.mean(img_data, axis=3)
                        num_slices = img_data_avg.shape[2]
                        img_data = img_data_avg
                    else:
                        print(f"Skipping unsupported image with shape: {img_data.shape}")
                        continue

                    # Sample slices
                    for view_name in ['axial', 'coronal', 'sagittal']:
                        if view_name == 'axial':
                            slices = [img_data[:, i, :] for i in range(0, img_data.shape[0], max(1, img_data.shape[0] // slides))]
                            l_skip = l_axial
                            r_skip = r_axial
                        elif view_name == 'coronal':
                            slices = [img_data[i, :, :] for i in range(0, img_data.shape[1], max(1, img_data.shape[1] // slides))]
                            l_skip = l_coronal
                            r_skip = r_coronal
                        elif view_name == 'sagittal':
                            slices = [img_data[:, :, i] for i in range(0, num_slices, max(1, num_slices // slides))]
                            l_skip = l_sagittal
                            r_skip = r_sagittal
                            
                        # Save each slice as an image
                        for i, img_slice in enumerate(slices[l_skip:-r_skip]):
                            if img_slice.size == 0:
                                continue

                            try:
                                max_value = np.max(img_slice)
                                img_2d_normalized = np.uint8(255 * (img_slice / max_value))
                                img_pil = Image.fromarray(img_2d_normalized)
                                
                                # Apply different rotations based on the view_name
                                if view_name == 'axial' or view_name == 'coronal':
                                    img_pil = img_pil.rotate(180, expand=True)
                                elif view_name == 'sagittal' :
                                    img_pil = img_pil.rotate(90, expand=True)
         
                                # Modify the output file name to include the parent folder name
                                output_file_name = f"{parent_folder}_{file_name.replace('.img', '').replace('.hdr', '').replace('.nifti', '')}_{view_name}_slice_{i}.jpg"
                                output_file_path = os.path.join(output_subfolder, view_name)
                                if not os.path.exists(output_file_path):
                                    os.makedirs(output_file_path)

                                # Save the image as JPG
                                img_pil.save(os.path.join(output_file_path, output_file_name))
                            except Exception as e:
                                print(f"Error processing view {view_name} for {nifti_file_path}: {e}")

                    pbar.update(1)

    print("All files processed successfully.")

src = '../datasets/OASIS-2/extracted/OAS2_RAW'
des = '../datasets/OASIS-2/extracted/OAS2_RAW_JPG'

extract_and_save_views(
    src_dir = src, 
    output_dir = des, 
    slides = 255,
    l_axial = 100,
    r_axial = 90,
    l_coronal = 90,
    r_coronal = 110,
    l_sagittal = 40,
    r_sagittal = 40
)

Total files to process: 2736


Processing files: 100%|███████████████████████████████████████| 2736/2736 [13:23<00:00,  3.40file/s]

All files processed successfully.





### **Organize Images into Class-Specific Folders**

In [5]:
df = pd.read_excel('../datasets/OASIS-2/downloads/OAS2_metadata.xlsx')
df

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1986.550000,0.696106,0.883440
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004.479526,0.681062,0.875539
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678.290000,0.736336,1.045710
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1737.620000,0.713402,1.010000
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1697.911134,0.701236,1.033623
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,OAS2_0185,OAS2_0185_MR2,Demented,2,842,M,R,82,16,1.0,28.0,0.5,1692.880000,0.693926,1.036690
369,OAS2_0185,OAS2_0185_MR3,Demented,3,2297,M,R,86,16,1.0,26.0,0.5,1688.009649,0.675457,1.039686
370,OAS2_0186,OAS2_0186_MR1,Nondemented,1,0,F,R,61,13,2.0,30.0,0.0,1319.020000,0.801006,1.330540
371,OAS2_0186,OAS2_0186_MR2,Nondemented,2,763,F,R,63,13,2.0,30.0,0.0,1326.650000,0.795981,1.322890


In [6]:
def get_cdr(df, mri_id):
    
    # Find the row where the MRI ID matches
    result_row = df[df['MRI ID'] == mri_id]
    
    # Check if there's a matching row
    if not result_row.empty:
        # Return the CDR value from the row
        return result_row['CDR'].values[0]
    else:
        return "MRI ID not found in the CSV."

In [7]:
# Directory path
cls_dir = '../datasets/OASIS-2/views'

# List of classes
cls = ['nondemented', 'moderate_dementia']

# Create the main directory if it doesn't exist
if not os.path.exists(cls_dir):
    os.makedirs(cls_dir)

# Subfolder names (views)
views = ['axial', 'coronal', 'sagittal']

# Create subfolders for each view and the class folders inside them
for view in views:
    # Path to the view folder
    view_path = os.path.join(cls_dir, view)
    
    # Create the view folder if it doesn't exist
    if not os.path.exists(view_path):
        os.makedirs(view_path)
    
    # Create class folders inside each view folder
    for class_name in cls:
        class_path = os.path.join(view_path, class_name.strip())  # Strip any unwanted spaces
        if not os.path.exists(class_path):
            os.makedirs(class_path)  # Create the class folder if it doesn't exist

print("View and class subfolders created successfully.")

View and class subfolders created successfully.


In [8]:
src_dir = '../datasets/OASIS-2/extracted/OAS2_RAW_JPG'
des_dir = '../datasets/OASIS-2/views'

# List the immediate subdirectories (direct children) under OAS2_RAW_JPG
subfolders = [d for d in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, d))]

def copy_images(src_axial, des_axial):

    # List all the files in the source 'axial' folder
    if os.path.exists(src_axial):
        src_files = [f for f in os.listdir(src_axial) if os.path.isfile(os.path.join(src_axial, f))]
        
        # Copy each file from the source to the destination
        for file_name in src_files:
            src_file = os.path.join(src_axial, file_name)
            des_file = os.path.join(des_axial, file_name)

            try:
                shutil.copy(src_file, des_file)
            except Exception as e:
                print(f"Error copying {file_name}: {e}")
    
views = ['axial', 'coronal', 'sagittal']

# Print the folder names
for folder in tqdm(subfolders, desc='Processing subfolders', unit='folder', ncols=100):
    cdr = get_cdr(df, folder)
    if cdr == 0.0:
        for view in views:
            src = os.path.join(src_dir, folder, 'RAW', view)
            des = os.path.join(des_dir, view, 'nondemented')
            copy_images(src, des)
    if cdr == 2.0:
        for view in views:
            src = os.path.join(src_dir, folder, 'RAW', view)
            des = os.path.join(des_dir, view, 'moderate_dementia')
            copy_images(src, des)

Processing subfolders: 100%|██████████████████████████████████| 373/373 [38:17<00:00,  6.16s/folder]


### **Split Data into Training and Testing Sets**

In [9]:
src_dir = '../datasets/OASIS-2/views'
des_dir = '../datasets/OASIS-2/preprocessed'

train_dir = os.path.join(des_dir, 'train')
test_dir = os.path.join(des_dir, 'test')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

def split_and_move_files(src, train_dest, test_dest, split_ratio=0.9):
    files = [f for f in os.listdir(src) if os.path.isfile(os.path.join(src, f))]
    train_files, test_files = train_test_split(files, train_size=split_ratio, random_state=42)
    for file in train_files:
        shutil.move(os.path.join(src, file), os.path.join(train_dest, file))
    for file in test_files:
        shutil.move(os.path.join(src, file), os.path.join(test_dest, file))

tasks = []
views = ['axial', 'coronal', 'sagittal']
classes = ['nondemented', 'moderate_dementia']
for view in views:
    for cls in classes:
        tasks.append((view, cls))

with tqdm(total=len(tasks), desc="Processing datasets", ncols=100) as pbar:
    for view, cls in tasks:
        class_src_dir = os.path.join(src_dir, view, cls)
        train_class_dir = os.path.join(train_dir, view, cls)
        test_class_dir = os.path.join(test_dir, view, cls)
        
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)

        split_and_move_files(class_src_dir, train_class_dir, test_class_dir)

        pbar.update(1)

print("Train-test split completed!")

Processing datasets: 100%|████████████████████████████████████████████| 6/6 [01:27<00:00, 14.56s/it]

Train-test split completed!





### **Delete Unnecessary Files and Folders**

In [10]:
ext_dir = '../datasets/OASIS-2/extracted'
src_dir = '../datasets/OASIS-2/views'

for dir_path in [src_dir, ext_dir]:
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)