<a href="https://colab.research.google.com/github/mkpvasu/Brain-Tumor-Classification/blob/main/BT_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<b><center>DATASET PREPARATION</center></b>

The [dataset](https://figshare.com/articles/dataset/brain_tumor_dataset/1512427) used for our brain tumor classification model is published by Jun Chen.

More information about the dataset:
<p align = 'justify'>It contains 3064 T1-weighted contrast-inhanced MRI images of patients with three kinds of brain tumor: meningioma (708 slices),  glioma (1426 slices), and pituitary tumor (930 slices).</p>

---
This data is organized in matlab data format (.mat file). Each file stores a struct containing the following fields for an image:

<b>cjdata.label</b> : 1 for meningioma, 2 for glioma, 3 for pituitary tumor 

* <b>cjdata.PID</b> : patient ID
* <b>cjdata.image</b> : image data
* <b>cjdata.tumorBorder</b> : a vector storing the coordinates of discrete points on tumor border
* <b>cjdata.tumorMask</b> : a binary image with 1s indicating tumor region

<p align = 'justify'>We have also added one other category which contains MRI images of patients with no tumors to make the model predict whether the patients are having brain tumors or not to make the model cater to broader categories of classification.</p>

###DATASET PREPARATION

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from torchvision.utils import make_grid
import os
import pickle
import random
import time
import h5py
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!rm rf '/content/drive/MyDrive/Colab Notebooks/dataset'

rm: cannot remove 'rf': No such file or directory
rm: cannot remove '/content/drive/MyDrive/Colab Notebooks/dataset': No such file or directory


In [None]:
import zipfile
with zipfile.ZipFile('/content/drive/MyDrive/Colab Notebooks/braintumordata/set1.zip') as zf:
  zip_dir = zf.namelist()[0]
  zf.extractall('/content/drive/My Drive/Colab Notebooks/braintumordata/bd_set1')

with zipfile.ZipFile('/content/drive/MyDrive/Colab Notebooks/braintumordata/set2.zip') as zf:
  zip_dir = zf.namelist()[0]
  zf.extractall('/content/drive/My Drive/Colab Notebooks/braintumordata/bd_set2')

with zipfile.ZipFile('/content/drive/MyDrive/Colab Notebooks/braintumordata/set3.zip') as zf:
  zip_dir = zf.namelist()[0]
  zf.extractall('/content/drive/My Drive/Colab Notebooks/braintumordata/bd_set3')

with zipfile.ZipFile('/content/drive/MyDrive/Colab Notebooks/braintumordata/set4.zip') as zf:
  zip_dir = zf.namelist()[0]
  zf.extractall('/content/drive/My Drive/Colab Notebooks/braintumordata/bd_set4')

In [None]:
if not os.path.exists('/content/drive/MyDrive/Colab Notebooks/braintumordata/data'):
  os.mkdir('/content/drive/MyDrive/Colab Notebooks/braintumordata/data')

In [None]:
!mv '/content/drive/My Drive/Colab Notebooks/braintumordata/'bd_set1/*.mat '/content/drive/MyDrive/Colab Notebooks/braintumordata/data'
!mv '/content/drive/My Drive/Colab Notebooks/braintumordata/'bd_set2/*.mat '/content/drive/MyDrive/Colab Notebooks/braintumordata/data'
!mv '/content/drive/My Drive/Colab Notebooks/braintumordata/'bd_set3/*.mat '/content/drive/MyDrive/Colab Notebooks/braintumordata/data'
!mv '/content/drive/My Drive/Colab Notebooks/braintumordata/'bd_set4/*.mat '/content/drive/MyDrive/Colab Notebooks/braintumordata/data'

In [None]:
!rm -rf '/content/drive/My Drive/Colab Notebooks/braintumordata/bd_set1'
!rm -rf '/content/drive/My Drive/Colab Notebooks/braintumordata/bd_set2'
!rm -rf '/content/drive/My Drive/Colab Notebooks/braintumordata/bd_set3'
!rm -rf '/content/drive/My Drive/Colab Notebooks/braintumordata/bd_set4'

In [None]:
arrays = {}
img = None

with h5py.File('/content/drive/My Drive/Colab Notebooks/braintumordata/data/1.mat', 'r') as f:
    for item in f.items():
      print(item)
    for key, val in f['cjdata'].items():
      print(key, val)
    img = f['cjdata']['image']
    label = f['cjdata']['label'][0][0]
    tumorBorder = f['cjdata']['tumorBorder'][0]
    mask = f['cjdata']['tumorMask']
    fig = plt.figure(2)
    plt.axis('off')
    plt.imshow(img, cmap='gray')
    print("Image shape: ", img.shape)
    print("Label", label)
    print("Coords: ", tumorBorder)
    print("Mask shape: ", mask.shape)
    fig = plt.figure(3)
    plt.axis('off')
    plt.imshow(mask, cmap='gray')

In [None]:
if not os.path.exists('/content/drive/MyDrive/Colab Notebooks/braintumordata/bt_data'):
  os.mkdir('/content/drive/MyDrive/Colab Notebooks/braintumordata/bt_data')

In [None]:
labels = []

for filename in range(1, 3065):
  with h5py.File('/content/drive/My Drive/Colab Notebooks/braintumordata/data/{}.mat'.format(filename), 'r') as f:
      img = f['cjdata']['image']
      label = f['cjdata']['label'][0][0]
     
      labels.append(int(label))
      img = np.array(img, dtype=np.float32)
      
      plt.axis('off')
      plt.imsave("/content/drive/My Drive/Colab Notebooks/braintumordata/bt_data/{}.jpg".format(filename), img, cmap='gray')
      
      
print("{} files successfully saved".format(filename))

In [None]:
labels = np.array(labels, dtype = np.int64)
labels.shape

(3064,)

In [None]:
p_out = open('/content/drive/My Drive/Colab Notebooks/braintumordata/bt_data/labels.pickle','wb')
pickle.dump(labels, p_out)
p_out.close()

In [None]:
train_data = []

for i in range(1,3065):
  img = cv2.imread("/content/drive/My Drive/Colab Notebooks/braintumordata/bt_data/{}.jpg".format(i), cv2.IMREAD_GRAYSCALE)
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
  img = cv2.resize(img, (512,512))
  label = labels[i-1]
  train_data.append([img,label])

pickle_out = open("/content/drive/My Drive/Colab Notebooks/braintumordata/train_data.pickle","wb")                    
pickle.dump(train_data, pickle_out)
pickle_out.close()