In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
from tqdm import tqdm 
import time, os, pickle, json, random  
from PIL import Image 
import cv2 
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score, roc_auc_score
from sklearn.model_selection import train_test_split 
import requests 
from statistics import mean 

import torch 
from torch.utils.data import DataLoader 
from torchvision import transforms 
import torch.nn as nn 

# Const 

In [None]:
# Const 
SEED = 42 
ROOT = "../input/rsna-process/"
LR = 0.0005
BATCH_SIZE = 32 
EPOCH = 20

device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch.manual_seed(SEED)
np.random.seed(SEED)

In [None]:
train_df = pd.read_csv(ROOT+"output/train.csv")
test_df = pd.read_csv(ROOT+"output/test.csv")

train_df, val_df = train_test_split(train_df, random_state=SEED, test_size=0.3, stratify=train_df.labels)
print(train_df.shape[0], val_df.shape[0], test_df.shape[0])

In [None]:
train_df.head()

# Preprocess Class

In [None]:
mp_mri_type = ['FLAIR', 'T1w', 'T1wCE', 'T2w']

class Dataset():
    def __init__(self, df, is_train="train"):
        self.df = df
        self.is_train = is_train 
        self.transform1 = transforms.Compose([
                                             transforms.ToTensor(),
                                             transforms.RandomRotation(degrees=(-20, 20))
        ])
        self.transfrom2 = transforms.Compose([transforms.ToTensor()])
        
    def transpose(self, x):
        a, b, c = x.shape 
        frame = min(a, b, c)
        if frame == a:
            return x.transpose(1, 2, 0)
        elif frame == b:
            return x.transpose(0, 2, 1)
        elif frame == c:
            return x.transpose(0, 1, 2)

    def load_npy(self, f):
        x = np.load(f) 
        x = self.transpose(x) # (w, h, f)
        if self.is_train == "train" and random.random() > 0.5:
            x = self.transform1(x)
        else:
            x = self.transfrom2(x)
        return x # (64, 224, 224)

    def __getitem__(self, idx):
        x = self.df.iloc[[idx]]
        result = {}
        for mri_type in mp_mri_type:
            result[mri_type] = self.load_npy(str(ROOT)+str(x[mri_type].values[0])) # PATHの修正
        if self.is_train in ["train", "val"]:
            result["labels"] = torch.tensor(x["labels"].values[0], dtype=torch.float)
        result = {k: v.to(device) for k, v in result.items()}
        return result 
    def __len__(self):
        return self.df.shape[0]


In [None]:
sample_ds = Dataset(train_df)
sample_scan_id = 0
sample_num_frame = 8

fig, axes = plt.subplots(8, 4, figsize=(7, 14))
ax = axes.ravel()
for f in range(sample_num_frame):
    for i, mri_type in enumerate(mp_mri_type):
        ax[4*f+i].imshow(sample_ds[sample_scan_id][mri_type][f].detach().cpu(), cmap="gray")      
        ax[4*f+i].set_xticks([])
        ax[4*f+i].set_yticks([])        
        if f == 0:
            ax[4*f+i].set_title(mri_type, c="g")
plt.subplots_adjust(wspace=0, hspace=0)

# Model

In [None]:
class Net(nn.Module):
    def __init__(self, n_classes=1):
        super(Net, self).__init__()
        self.layer0 = self.conv2normRelu(64, 128)
        self.layer1 = self.conv2normRelu(128, 256)
        self.layer2 = self.conv2normRelu(256, 512)
        
        self.fc0 = nn.Linear(25088, 512)
        self.drop = nn.Dropout(0.2)
        self.fc1 = nn.Linear(512, 1)
        
    def conv2normRelu(self, in_c, out_c, kernel_size=3, stride=1, padding=0):
        layer = []
        layer.append(nn.Conv2d(in_c, out_c, kernel_size, stride, padding=padding))
        layer.append(nn.LeakyReLU(0.3, inplace=True))
        layer.append(nn.MaxPool2d((3, 3)))
        layer.append(nn.BatchNorm2d(out_c))
        return nn.Sequential(*layer)
    
    def forward(self, x):
        out = self.layer2(self.layer1(self.layer0(x)))
        out = out.view(x.size()[0], -1)
        out = self.fc1(self.drop(self.fc0(out)))
        return out 

# Train 

In [None]:
def train_batch(train_dl, mri_type, net, criterion, optimizer, is_train=True):
    total_loss = []
    for data in train_dl:
        x = data[mri_type]
        t = data["labels"]
        if is_train:
            out = net(x)
        else:
            with torch.no_grad():
                out = net(x)
        loss = criterion(out.view(-1), t)
        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        total_loss.append(loss.item())
        del x, t, loss 
    return mean(total_loss)

def train_epoch(train_dl, val_dl, mri_type):
    net = Net()
    net.train()
    net.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=LR)
    tr_loss, va_loss = [], []
    best_net, best_loss_va = None, np.inf
    for e in range(EPOCH):
        loss_tr = train_batch(train_dl, mri_type, net, criterion, optimizer)
        loss_va = train_batch(val_dl, mri_type, net, criterion, None, False)
        tr_loss.append(loss_tr)
        va_loss.append(loss_va)
        logger(loss_tr, mri_type, e, "train")
        logger(loss_va, mri_type, e, "val")
        checkpoint(mri_type, net, e)
        if best_loss_va > loss_va:
            best_net = net 
            best_loss_va = loss_va 
    del net 
    checkpoint(mri_type, best_net, "best")
    results = {
        "train_loss": tr_loss, 
        "val_loss": va_loss, 
        "best_loss": best_loss_va,
        "type": mri_type,
        "epoch": e 
    }
    logger(results, mri_type, None, None, is_print=False)
    show_losses(tr_loss, va_loss, mri_type)

def train_type(train_df, val_df):
    train_ds = Dataset(train_df)
    val_ds = Dataset(val_df, is_train="val")
    train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
    start = time.time()
    for mri_type in mp_mri_type:
        train_epoch(train_dl, val_dl, mri_type)
    now = time.time()
    torch.cuda.empty_cache()
    print(f"dulation time: {now-start}s")


def checkpoint(mri_type, net, e):
    os.makedirs(f"./models/{mri_type}", exist_ok=True)
    torch.save(net.state_dict(), f"./models/{mri_type}/{e}.pth")
    print("successed saving model")


def logger(loss, mri_type, e, mode, is_print=True):
    if is_print:
        print(f"MRI: {mri_type} | EPOCH: {e+1} | MODE: {mode} | LOSS: {loss:.4f} |")
    else:
        os.makedirs(f"./log/{mri_type}", exist_ok=True)
        with open(f"./log/{mri_type}/train.log", "wb") as f:
            pickle.dump([loss], f)
    
def show_losses(tr_loss, va_loss, mri_type):
    plt.figure(figsize=(16, 6))
    plt.plot(np.arange(len(tr_loss)).tolist(), tr_loss, c="r")
    plt.plot(np.arange(len(va_loss)).tolist(), va_loss, c="b")
    plt.legend(["Train", "Val"])
    plt.grid()
    plt.title(mri_type)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.show()

In [None]:
train_type(train_df, val_df)

# Evaluate

In [None]:
def load_net(mri_type):
    net = Net()
    net.load_state_dict(torch.load(f"models/{mri_type}/best.pth", map_location={"cuda:0": "cpu"}))
    net.to(device)
    net.eval()
    return net 

def metrics_score(pred, corr):
    f1 = f1_score(pred, corr)
    prec = precision_score(pred, corr)
    rec = recall_score(pred, corr)
    acc = accuracy_score(pred, corr)
    roc = roc_auc_score(pred, corr)
    return f1, prec, rec, acc, roc 

def val_batch(val_dl, mri_type):
    with torch.no_grad():
        net = load_net(mri_type)
        predict_proba, predict, correct = [], [], []
        for data in val_dl:
            x = data[mri_type].to(device)
            t = data["labels"].to(device)

            out = net(x)
            out = torch.sigmoid(out).cpu().numpy().squeeze()
            for p in np.where(out > 0.5, 1, 0):
                predict.append(p)
            for c in t.detach().cpu().numpy().tolist():
                correct.append(c)
            for p in out:
                predict_proba.append(p)
        f1, prec, rec, acc, roc = metrics_score(predict, correct)
        result = [f1, prec, rec, acc, roc]
        del x, t
    del net 
    return result, np.array(predict_proba), correct 
    

def val_type(val_df):
    val_ds = Dataset(val_df)
    val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
    results, all_proba = [], []
    for mri_type in mp_mri_type:
        result, proba, correct = val_batch(val_dl, mri_type)
        results.append(result)
        all_proba.append(proba)
        
    all_pred = np.where(np.mean(all_proba, axis=0) > 0.5, 1, 0).tolist()
    f1, prec, rec, acc, roc = metrics_score(all_pred, correct)
    results.append([f1, prec, rec, acc, roc])
    del all_pred 
    
    index = ['FLAIR', 'T1w', 'T1wCE', 'T2w', "All"]
    columns = ["f1_score", "Precision", "recall", "accuracy", "roc_score"]
    df = pd.DataFrame(results, index=index, columns=columns)
    df.to_csv("result.csv", index=False)
    
    torch.cuda.empty_cache()
    return df, all_proba 

In [None]:
df, val_proba = val_type(val_df)

In [None]:
sns.histplot(val_proba)
plt.title("Val")
plt.legend(mp_mri_type)
plt.show()

In [None]:
df.style.background_gradient(cmap="coolwarm")

In [None]:
def test_batch(test_dl, mri_type):
    net = load_net(mri_type)
    proba = []
    for data in test_dl:
        x = data[mri_type]
        
        with torch.no_grad():
            out = net(x)
            out = torch.sigmoid(out).squeeze().detach().cpu().numpy().tolist()
            for o in out:
                proba.append(o)
        del x 
    del net 
    return np.array(proba) 

def test_type(test_df):
    test_ds = Dataset(test_df, "test")
    test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
    total_proba = []
    for mri_type in mp_mri_type:
        proba = test_batch(test_dl, mri_type)
        total_proba.append(proba)
    # All mean 
    total_proba = np.mean(total_proba, axis=0).tolist()
    return total_proba  

def test_type_flair(test_df):
    test_ds = Dataset(test_df, "test")
    test_dl = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
    proba = test_batch(test_dl, mp_mri_type[0])
    return proba.tolist()
    

def submit(test_df):
    proba = test_type(test_df)
    submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv", index_col="BraTS21ID")
    submission["MGMT_value"] = proba 
    submission["MGMT_value"].to_csv("submission.csv")
    print("successed submit")
    return proba 

In [None]:
test_proba = submit(test_df)

In [None]:
sns.histplot(test_proba)
plt.title("Test")
plt.show()