In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import albumentations as A
import cv2
import torch.nn as nn
import torch.optim as optim
import torchvision
from PIL import Image
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler
from torchvision.utils import make_grid
from torchvision import transforms as T
from torchvision import models
from torchmetrics import Accuracy
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tqdm.notebook import tqdm
np.random.seed(27)
torch.manual_seed(27)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device=='cuda':
    print(torch.cuda.get_device_properties(device))

In [2]:
pth = '/kaggle/input/brain-tumor-detection/'
yes_pth = os.path.join(pth, 'yes')
no_pth = os.path.join(pth, 'no')

yes_labels = [1] * len(os.listdir(yes_pth))
no_labels = [0] * len(os.listdir(no_pth))

print(f'# Samples with Tumor : {len(os.listdir(yes_pth))}')
print(f'# Samples without Tumor : {len(os.listdir(no_pth))}')

In [3]:
train_imgs = [os.path.join(yes_pth, p) for p in os.listdir(yes_pth)]+[os.path.join(no_pth, p) for p in os.listdir(no_pth)]
train_labels = yes_labels + no_labels
total_samples = len(train_labels)
train_data = list(zip(train_imgs, train_labels))
train_data = np.array(train_data)

In [4]:
train_idx, valid_idx = train_test_split(np.arange(total_samples), test_size=.1, stratify=train_labels)
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
print(f'Train Samples : {len(train_idx)}')
print(f'Validation Samples : {len(valid_idx)}')

In [5]:
train_yes = sum([int(i[1]) for i in train_data[train_idx]])/(len(train_idx))
train_no = 1 - train_yes
print(f'Train Yes % : {train_yes}')
print(f'Train No % : {train_no}')

print('-'*50)

valid_yes = sum([int(i[1]) for i in train_data[valid_idx]])/(len(valid_idx))
valid_no = 1 - valid_yes
print(f'Valid Yes % : {valid_yes}')
print(f'Valid No % : {valid_no}')

In [6]:
class BrainTumorDataset(Dataset):
    def __init__(self, data, transforms):
        self.data = data
        self.transforms = transforms
    
    def __len__(self):
        return len(data)
    
    def __getitem__(self, index):
        current_data = self.data[index]
        img_loc = current_data[0]
        label = int(current_data[1])
        image = cv2.imread(img_loc)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transforms:
            image = self.transforms(image=image)['image']
        return image, label

In [7]:
train_transforms = A.Compose([
    A.ToGray(always_apply=True),
    A.Resize(224,224),
    A.HorizontalFlip(),
    A.Rotate(),
    A.ColorJitter(),
    ToTensorV2()
])

valid_transforms = A.Compose([
    A.ToGray(always_apply=True),
    A.Resize(224,224),
    ToTensorV2()
])

In [9]:
BTdataset_train = BrainTumorDataset(train_data, train_transforms)
BTdataset_valid = BrainTumorDataset(train_data, valid_transforms)

In [10]:
batch_size = 16
dataloaders = {
    'train' : DataLoader(dataset = BTdataset_train, sampler=train_sampler, num_workers=3, drop_last=True, batch_size=batch_size),
    'valid' : DataLoader(dataset = BTdataset_valid, sampler=valid_sampler, num_workers=3, drop_last=False, batch_size=batch_size)
}

In [11]:
for x, y in dataloaders['train']:
    plt.figure(figsize=(10,10))
    plt.title('Train Samples')
    plt.imshow(make_grid(x, nrow=4, padding=5, pad_value=250).permute(2,1,0))
    break
    
for x, y in dataloaders['valid']:
    plt.figure(figsize=(10,10))
    plt.title('Valid Samples')
    plt.imshow(make_grid(x, nrow=4, padding=5, pad_value=250).permute(2,1,0))
    break

In [12]:
clf = models.resnet34(pretrained=True)
for param in clf.parameters():
    param.requires_grad = False
    
in_features = clf.fc.in_features
clf.fc = nn.Sequential(
    nn.Linear(in_features=in_features, out_features=64),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(in_features=64, out_features=64),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(in_features=64, out_features=2),
    nn.Softmax(dim=1)
)
clf = clf.to(device)

In [13]:
def fit(model, loader, optimizer, loss, epochs=5):
    TL, TA, VL, VA =[], [], [], []
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    train_metric = Accuracy().to(device)
    valid_metric = Accuracy().to(device)
    
    for epoch in tqdm(range(epochs), desc='Epochs', leave=True):
        
        model.train()
        cst = 0
        for x, y in tqdm(loader['train'], desc='Train Step'):
            x = x.float().to(device)
            y = y.to(device)
            preds = model(x).to(device)
            acc = train_metric(preds, y)
            cost = loss(preds, y)
            cst += cost.item()
            cost.backward()
            optimizer.step()
            optimizer.zero_grad()
        
        cst /= len(loader['train'])
        TL.append(cst)
        TA.append(train_metric.compute().item())
        if not epoch%2:
            print(f"TL : {cst}\tTA : {train_metric.compute()}")
        model.eval()
        cst = 0
        for x, y in tqdm(loader['valid'], desc='Valid Step'):
            x = x.float().to(device)
            y = y.to(device)
            preds = model(x).to(device)
            acc = valid_metric(preds, y)
            cost = loss(preds, y)
            cst += cost.item()
        
        cst /= len(loader['valid'])
        VL.append(cst)
        VA.append(valid_metric.compute().item())
        if not epoch%2:
            print(f"VL : {cst}\tVA : {valid_metric.compute()}")
        
    plt.figure(figsize=(20, 7))
    plt.subplot(121)
    plt.plot(TL, '-xr', VL, '-xg')
    plt.title('Cost Curve')
    plt.legend(['Train Loss', 'Valid Loss'])
    plt.subplot(122)
    plt.plot(TA, '-xr', VA, '-xg')
    plt.title('Accuracy Curve')
    plt.legend(['Train Acc', 'Valid Acc'])
    plt.tight_layout()
    plt.grid(True)
        
    return model

In [14]:
lr = 1e-4
wd = 1e-4
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(lr=lr, params=clf.parameters(), weight_decay=wd)

In [15]:
md = fit(clf, dataloaders, optimizer, loss, epochs=50)

In [16]:
print('---------------------------Train Classification Report---------------------------')
md.train()
p, t = [], []
metric = Accuracy().to(device)
cst = 0
for x, y in tqdm(dataloaders['train'], desc='Test Step'):
    x = x.float().to(device)
    y = y.to(device)
    preds = md(x).to(device)
    p += preds.argmax(dim=1).detach().cpu().tolist()
    t += y.detach().cpu().tolist()
    acc = metric(preds, y)
    cost = loss(preds, y)
    cst += cost.item()
acc = metric.compute()
p, t = np.array(p), np.array(t)
print(classification_report(t, p))
print('---------------------------Train Confusion Matrix---------------------------')
print(confusion_matrix(t, p))

In [17]:
print('---------------------------Valid Classification Report---------------------------')
md.eval()
p, t = [], []
metric = Accuracy().to(device)
cst = 0
for x, y in tqdm(dataloaders['valid'], desc='Test Step'):
    x = x.float().to(device)
    y = y.to(device)
    preds = md(x).to(device)
    p += preds.argmax(dim=1).detach().cpu().tolist()
    t += y.detach().cpu().tolist()
    acc = metric(preds, y)
    cost = loss(preds, y)
    cst += cost.item()
acc = metric.compute()
p, t = np.array(p), np.array(t)
print(classification_report(t, p))
print('---------------------------Validation Confusion Matrix---------------------------')
print(confusion_matrix(t, p))