# Segmentation of Multiple Sclerosis with Brain MRI dataset

This Jupyter notebook is designed to explore and utilize a Brain MRI dataset of patients with Multiple Sclerosis (MS) for the purpose of predicting lesions. Multiple Sclerosis is a chronic illness characterized by the presence of lesions in the brain and spinal cord, leading to a wide range of neurological symptoms. Detecting and monitoring these lesions using MRI is a critical part of diagnosing and managing the disease.

The dataset used in this notebook is sourced from a research publication by M Muslim (2022) and includes MRI scans along with consensus manual lesion segmentation. This provides an adequate resource for training and evaluating AI models in medical imaging applications.


## Data Loading

In this section, we will download the Brain MRI dataset from the Mendeley Data repository, organize the data into training, testing, and validation sets, and prepare it for analysis.

The dataset consists of MRI scans from 60 patients, each stored in separate folders. We will:

1. Download the dataset, which is provided as a ZIP file.
2. Extract the contents of the ZIP file.
3. Randomly allocate 40 patient folders to a training set, 10 to a testing set, and the remaining 10 to a validation set, ensuring reproducibility by setting a random seed.


In [1]:
import os
import random
import shutil
import requests
from zipfile import ZipFile

N_TRAIN = 40
N_TEST = 10

# Define the URL of the dataset
dataset_url = "https://data.mendeley.com/public-files/datasets/8bctsm8jz7/files/9356efeb-dcd8-4213-a2d4-8febe9f1a5db/file_downloaded"
zip_filename = "Brain MRI Dataset of Multiple Sclerosis with Consensus Manual Lesion Segmentation and Patient Meta Information.zip"
extracted_folder = "brain_mri_dataset"

# Define the root folder for data organization
data_folder = "data"

# Define destination directories under the data folder
train_folder = os.path.join(data_folder, "train")
test_folder = os.path.join(data_folder, "test")
validation_folder = os.path.join(data_folder, "validation")

# Set a random seed for reproducibility
random.seed(42)

# Function to download the dataset
def download_dataset(url, filename):
    response = requests.get(url, stream=True)
    with open(filename, 'wb') as f:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
    print(f"Downloaded {filename}")

# Function to extract the dataset
def extract_dataset(zip_filename, extract_to):
    with ZipFile(zip_filename, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Extracted {zip_filename} to {extract_to}")

# Function to organize patient data into train, test, and validation folders
def organize_data(extracted_folder, train_folder, test_folder, validation_folder):
    # Create the data folder if it doesn't exist
    os.makedirs(data_folder, exist_ok=True)

    # Get the list of patient folders
    patient_folders = [folder for folder in os.listdir(extracted_folder) if folder.startswith("Patient")]

    # Shuffle the list of patient folders
    random.shuffle(patient_folders)

    # Split into train, test, and validation
    train_patients = patient_folders[:N_TRAIN]
    test_patients = patient_folders[N_TRAIN:N_TRAIN+N_TEST]
    validation_patients = patient_folders[N_TRAIN+N_TEST:]

    # Create directories if they don't exist
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)
    os.makedirs(validation_folder, exist_ok=True)

    # Move patient folders to respective directories
    for patient in train_patients:
        shutil.move(os.path.join(extracted_folder, patient), train_folder)

    for patient in test_patients:
        shutil.move(os.path.join(extracted_folder, patient), test_folder)

    for patient in validation_patients:
        shutil.move(os.path.join(extracted_folder, patient), validation_folder)

    print("Organized data into train, test, and validation folders.")

# Main execution
if __name__ == "__main__":
    # Download the dataset
    download_dataset(dataset_url, zip_filename)

    # Extract the dataset
    extract_dataset(zip_filename, extracted_folder)

    # Organize the data into train, test, and validation folders
    organize_data(extracted_folder, train_folder, test_folder, validation_folder)


Downloaded Brain MRI Dataset of Multiple Sclerosis with Consensus Manual Lesion Segmentation and Patient Meta Information.zip
Extracted Brain MRI Dataset of Multiple Sclerosis with Consensus Manual Lesion Segmentation and Patient Meta Information.zip to brain_mri_dataset
Organized data into train, test, and validation folders.


## References

M Muslim, Ali (2022), “Brain MRI Dataset of Multiple Sclerosis with Consensus Manual Lesion Segmentation and Patient Meta Information”, Mendeley Data, V1, doi: 10.17632/8bctsm8jz7.1