# Classify Brain MRI Images (Multi-Class Classification)

## Pre-requisites
Install [kagglehub](https://pypi.org/project/kagglehub/)

## 1 Load Dataset


In [14]:
import kagglehub
import os
from glob import glob
import pandas as pd

# Download dataset and locate it in machine
data_dirname = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")
# print(data_dirname)
train_dirname = os.path.join(data_dirname, 'Training')
test_dirname = os.path.join(data_dirname, 'Testing')
classes = ['glioma', 'meningioma', 'notumor', 'pituitary']

# Get training files
tr_gl_files = glob(os.path.join(train_dirname, classes[0], '*.jpg'))
tr_me_files = glob(os.path.join(train_dirname, classes[1], '*.jpg'))
tr_no_files = glob(os.path.join(train_dirname, classes[2], '*.jpg'))
tr_pi_files = glob(os.path.join(train_dirname, classes[3], '*.jpg'))
# print(len(tr_gl_files), len(tr_me_files), len(tr_no_files), len(tr_pi_files))
train_files = tr_gl_files + tr_me_files + tr_no_files + tr_pi_files
train_labels = [classes[0]] * len(tr_gl_files) + \
    [classes[1]] * len(tr_me_files) + \
    [classes[2]] * len(tr_no_files) + \
    [classes[-1]] * len(tr_pi_files)
train_dict = {'path': train_files, 'label': train_labels}
df_train = pd.DataFrame(train_dict)
# print(df_train)
df_train.to_csv('annotation_train.csv', header=False, index=False)

# Get testing files
te_gl_files = glob(os.path.join(test_dirname, classes[0], '*.jpg'))
te_me_files = glob(os.path.join(test_dirname, classes[1], '*.jpg'))
te_no_files = glob(os.path.join(test_dirname, classes[2], '*.jpg'))
te_pi_files = glob(os.path.join(test_dirname, classes[3], '*.jpg'))
# print(len(te_gl_files), len(te_me_files), len(te_no_files), len(te_pi_files))
test_files = te_gl_files + te_me_files + te_no_files + te_pi_files
test_labels = [classes[0]] * len(te_gl_files) + \
    [classes[1]] * len(te_me_files) + \
    [classes[2]] * len(te_no_files) + \
    [classes[-1]] * len(te_pi_files)
test_dict = {'path': test_files, 'label': test_labels}
df_test = pd.DataFrame(test_dict)
# print(df_train)
df_test.to_csv('annotation_test.csv', header=False, index=False)

### 1.1 Create PyTorch Dataset

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import cv2 as cv
import numpy as np


# Create customized dataset
class TumorDataset(Dataset):
    def __init__(self, annotations_file):
        self.imgs_info = pd.read_csv(annotations_file, header=None)

    def __len__(self):
        return len(self.imgs_info)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_path = self.imgs_info.iloc[idx, 0]
        image_raw = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
        image = cv.resize(image_raw, (100, 100))
        category = 1. if self.imgs_info.iloc[idx, 1] == 'dog' else 0.
        sample = {'image': image, 'category': category}
        return sample

# Loop training dataset
dataset_train = CatsDogsDataset(annotations_file='annotation_train.csv')
for i, sample in enumerate(dataset_train):
    image = sample['image']
    category = sample['category']
    if not i%100:
        print(i, image.shape, category)
print(i, image.shape, category)
    
dataset_test = CatsDogsDataset(annotations_file='annotation_test.csv')

# Create shuffled data loader 
dataloader_train = DataLoader(dataset_train, batch_size=1000, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=1000, shuffle=True)
samples = next(iter(dataloader_train))
fig, axs = plt.subplots(1, 4)
for i in range(4):
    image = samples['image'][i]
    category = samples['category'][i]
    axs[i] = plt.subplot(1, 4, i + 1)
    axs[i].set_title(f'Sample #{i+1}: {category}')
    axs[i].axis('off')
    axs[i].imshow(image)
    plt.tight_layout()