# Deep Learning Diagnose on Brain MRI Images using PyTorch

## Create Dataset and Dataloader

In [24]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import cv2 as cv
import numpy as np

classes = ['glioma', 'meningioma', 'notumor', 'pituitary']

class TumorDataset(Dataset):
    def __init__(self, annotations_file):
        self.imgs_info = pd.read_csv(annotations_file, header=None)

    def __len__(self):
        return len(self.imgs_info)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_path = self.imgs_info.iloc[idx, 0]
        img_raw = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
        img_resize = cv.resize(img_raw, (128, 128))
        image = np.float32(img_resize / 255)
        if self.imgs_info.iloc[idx, 1] == classes[0]:
            category = 0
        elif self.imgs_info.iloc[idx, 1] == classes[1]:
            category = 1
        elif self.imgs_info.iloc[idx, 1] == classes[2]:
            category = 2
        else:
            category = 3
        sample = {'image': image, 'category': category}
        return sample
    
dataset_train = TumorDataset(annotations_file='annotation_train.csv')
# for i, sample in enumerate(dataset_train):
#     image = sample['image']
#     label = sample['category']
#     if not i%100:  # i % 100 != 0
#         print(i, image.shape, label)
# print(i, image.shape, label)
dataset_test = TumorDataset(annotations_file='annotation_test.csv')

dataloader_train = DataLoader(dataset_train, batch_size=256, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=256, shuffle=True)

In [25]:
samples = next(iter(dataloader_train))

In [26]:
samples['image'].shape

torch.Size([256, 128, 128])