# Diagnose Brain MRI Images

## Pre-requisites
Install [kagglehub](https://pypi.org/project/kagglehub/)

## 1 Load Dataset
### 1.1 Download Data and Generate Annotation Files

In [1]:
import kagglehub
import os
from glob import glob
import pandas as pd

# Download dataset and locate it in machine
data_dirname = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")
# print(data_dirname)
train_dirname = os.path.join(data_dirname, 'Training')
test_dirname = os.path.join(data_dirname, 'Testing')
classes = ['glioma', 'meningioma', 'notumor', 'pituitary']

# Get training files
tr_gl_files = glob(os.path.join(train_dirname, classes[0], '*.jpg'))
tr_me_files = glob(os.path.join(train_dirname, classes[1], '*.jpg'))
tr_no_files = glob(os.path.join(train_dirname, classes[2], '*.jpg'))
tr_pi_files = glob(os.path.join(train_dirname, classes[3], '*.jpg'))
# print(len(tr_gl_files), len(tr_me_files), len(tr_no_files), len(tr_pi_files))
train_files = tr_gl_files + tr_me_files + tr_no_files + tr_pi_files
train_labels = [classes[0]] * len(tr_gl_files) + \
    [classes[1]] * len(tr_me_files) + \
    [classes[2]] * len(tr_no_files) + \
    [classes[-1]] * len(tr_pi_files)
train_dict = {'path': train_files, 'label': train_labels}
df_train = pd.DataFrame(train_dict)
# print(df_train)
df_train.to_csv('annotation_train.csv', header=False, index=False)

# Get testing files
te_gl_files = glob(os.path.join(test_dirname, classes[0], '*.jpg'))
te_me_files = glob(os.path.join(test_dirname, classes[1], '*.jpg'))
te_no_files = glob(os.path.join(test_dirname, classes[2], '*.jpg'))
te_pi_files = glob(os.path.join(test_dirname, classes[3], '*.jpg'))
# print(len(te_gl_files), len(te_me_files), len(te_no_files), len(te_pi_files))
test_files = te_gl_files + te_me_files + te_no_files + te_pi_files
test_labels = [classes[0]] * len(te_gl_files) + \
    [classes[1]] * len(te_me_files) + \
    [classes[2]] * len(te_no_files) + \
    [classes[-1]] * len(te_pi_files)
test_dict = {'path': test_files, 'label': test_labels}
df_test = pd.DataFrame(test_dict)
# print(df_train)
df_test.to_csv('annotation_test.csv', header=False, index=False)


  from .autonotebook import tqdm as notebook_tqdm




### 1.2 Create PyTorch Dataset

In [7]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import v2
from torchvision.io import read_image
# import cv2 as cv
import numpy as np

classes = ('glioma', 'meningioma', 'notumor', 'pituitary')
trans = v2.Compose(
    [
        v2.ToTensor(),
        v2.Normalize((0.5,), (0.5,))
    ]
)

class TumorDataset(Dataset):
    def __init__(self, annotations_file, transform=None, target_transform=None):
        self.imgs_info = pd.read_csv(annotations_file, header=None)
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.imgs_info)
    
    def __getitem__(self, idx):
        img_path = self.imgs_info.iloc[idx, 0]
        image = read_image(img_path)
        label = self.imgs_info.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
        # img_resize = cv.resize(img_raw, (128, 128))
        # image = np.float32(img_resize / 255)
        # if self.imgs_info.iloc[idx, 1] == classes[0]:
        #     category = 0
        # elif self.imgs_info.iloc[idx, 1] == classes[1]:
        #     category = 1
        # elif self.imgs_info.iloc[idx, 1] == classes[2]:
        #     category = 2
        # else:
        #     category = 3
        # sample = {'image': image, 'category': category}
        # return sample
    
dataset_train = TumorDataset(annotations_file='annotation_train.csv')
# for i, sample in enumerate(dataset_train):
#     image = sample['image']
#     label = sample['category']
#     if not i%100:  # i % 100 != 0
#         print(i, image.shape, label)
# print(i, image.shape, label)
# dataset_test = TumorDataset(annotations_file='annotation_test.csv')

# dataloader_train = DataLoader(dataset_train, batch_size=64, shuffle=True)
# dataloader_test = DataLoader(dataset_test, batch_size=64, shuffle=True)

# for i, sample_batch in enumerate(dataloader_train):
#     print(f"Shape of X [N, H, W]: {sample_batch['image'].shape}")
#     print(f"Shape of y: {sample_batch['category'].shape}")
#     break



In [17]:
dataset_train[0][0][0, 200, 200]

tensor(81, dtype=torch.uint8)