In [None]:
"""
Creates a EfficientNetV2 Model as defined in:
Mingxing Tan, Quoc V. Le. (2021). 
EfficientNetV2: Smaller Models and Faster Training
arXiv preprint arXiv:2104.00298.
import from https://github.com/d-li14/mobilenetv2.pytorch
"""

import torch
import torch.nn as nn
import math

__all__ = ['effnetv2_s', 'effnetv2_m', 'effnetv2_l', 'effnetv2_xl']


def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


# SiLU (Swish) activation function
if hasattr(nn, 'SiLU'):
    SiLU = nn.SiLU
else:
    # For compatibility with old PyTorch versions
    class SiLU(nn.Module):
        def forward(self, x):
            return x * torch.sigmoid(x)

class SELayer(nn.Module):
    def __init__(self, inp, oup, reduction=4):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
                nn.Linear(oup, _make_divisible(inp // reduction, 8)),
                SiLU(),
                nn.Linear(_make_divisible(inp // reduction, 8), oup),
                nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y


def conv_3x3_bn(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        SiLU()
    )


def conv_1x1_bn(inp, oup):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        SiLU()
    )


class MBConv(nn.Module):
    def __init__(self, inp, oup, stride, expand_ratio, use_se):
        super(MBConv, self).__init__()
        assert stride in [1, 2]

        hidden_dim = round(inp * expand_ratio)
        self.identity = stride == 1 and inp == oup
        if use_se:
            self.conv = nn.Sequential(
                # pw
                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hidden_dim),
                SiLU(),
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                SiLU(),
                SELayer(inp, hidden_dim),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )
        else:
            self.conv = nn.Sequential(
                # fused
                nn.Conv2d(inp, hidden_dim, 3, stride, 1, bias=False),
                nn.BatchNorm2d(hidden_dim),
                SiLU(),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )


    def forward(self, x):
        if self.identity:
            return x + self.conv(x)
        else:
            return self.conv(x)


class EffNetV2(nn.Module):
    def __init__(self, cfgs, num_classes=1000, width_mult=1.):
        super(EffNetV2, self).__init__()
        self.cfgs = cfgs

        # building first layer
        input_channel = _make_divisible(24 * width_mult, 8)
        layers = [conv_3x3_bn(3, input_channel, 2)]
        # building inverted residual blocks
        block = MBConv
        for t, c, n, s, use_se in self.cfgs:
            output_channel = _make_divisible(c * width_mult, 8)
            for i in range(n):
                layers.append(block(input_channel, output_channel, s if i == 0 else 1, t, use_se))
                input_channel = output_channel
        self.features = nn.Sequential(*layers)
        # building last several layers
        output_channel = _make_divisible(1792 * width_mult, 8) if width_mult > 1.0 else 1792
        self.conv = conv_1x1_bn(input_channel, output_channel)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Linear(output_channel, num_classes)

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.conv(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.001)
                m.bias.data.zero_()


def effnetv2_s(**kwargs):
    """
    Constructs a EfficientNetV2-S model
    """
    cfgs = [
        # t, c, n, s, SE
        [1,  24,  2, 1, 0],
        [4,  48,  4, 2, 0],
        [4,  64,  4, 2, 0],
        [4, 128,  6, 2, 1],
        [6, 160,  9, 1, 1],
        [6, 256, 15, 2, 1],
    ]
    return EffNetV2(cfgs, **kwargs)


def effnetv2_m(**kwargs):
    """
    Constructs a EfficientNetV2-M model
    """
    cfgs = [
        # t, c, n, s, SE
        [1,  24,  3, 1, 0],
        [4,  48,  5, 2, 0],
        [4,  80,  5, 2, 0],
        [4, 160,  7, 2, 1],
        [6, 176, 14, 1, 1],
        [6, 304, 18, 2, 1],
        [6, 512,  5, 1, 1],
    ]
    return EffNetV2(cfgs, **kwargs)


def effnetv2_l(**kwargs):
    """
    Constructs a EfficientNetV2-L model
    """
    cfgs = [
        # t, c, n, s, SE
        [1,  32,  4, 1, 0],
        [4,  64,  7, 2, 0],
        [4,  96,  7, 2, 0],
        [4, 192, 10, 2, 1],
        [6, 224, 19, 1, 1],
        [6, 384, 25, 2, 1],
        [6, 640,  7, 1, 1],
    ]
    return EffNetV2(cfgs, **kwargs)


def effnetv2_xl(**kwargs):
    """
    Constructs a EfficientNetV2-XL model
    """
    cfgs = [
        # t, c, n, s, SE
        [1,  32,  4, 1, 0],
        [4,  64,  8, 2, 0],
        [4,  96,  8, 2, 0],
        [4, 192, 16, 2, 1],
        [6, 256, 24, 1, 1],
        [6, 512, 32, 2, 1],
        [6, 640,  8, 1, 1],
    ]
    return EffNetV2(cfgs, **kwargs)

In [None]:
import os
import json
import glob
import random
import collections

import numpy as np
import pandas as pd
import pydicom as dicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import sys 

import time

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional

#from sklearn.model_selection import StratifiedKFold

## Support Function

In [None]:
def load_dicom(path):
    image = dicom.read_file(path)
    data = image.pixel_array
    data = data - np.min(data)
    if(np.max(data) != 0):
        data = data/np.max(data)
    data = (data *256).astype(np.uint8)
    data = cv2.resize(data, (256, 256))
    data = cv2.cvtColor(data,cv2.COLOR_GRAY2RGB)
    return data

In [None]:
def is_valid_image(path, threshold=32768):
    data = load_dicom(path)
    if (np.count_nonzero(data) > threshold):
        return True
    else:
        return False

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(42)

## Visualize on labels

In [None]:
class DataCustomer(torch_data.Dataset):
    def __init__(self, paths, labels):
        self.paths = paths
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, index):
        data_path = self.paths[index]
        data = load_dicom(data_path)
        data = torch.tensor(data).float()
        
        data = torch.reshape(data, (3,256,256))
        Y = torch.tensor(self.labels[index]).float()
        return {"X":data, "y":Y}

In [None]:
class LossMeter:
    def __init__(self):
        self.avg = 0
        self.n = 0

    def update(self, val):
        self.n += 1
        # incremental update
        self.avg = val / self.n + (self.n - 1) / self.n * self.avg

        
class AccMeter:
    def __init__(self):
        self.avg = 0
        self.n = 0
        
    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().astype(int)
        y_pred = y_pred.cpu().numpy() >= 0
        last_n = self.n
        self.n += len(y_true)
        true_count = np.sum(y_true == y_pred)
        # incremental update
        self.avg = true_count / self.n + last_n / self.n * self.avg

In [None]:
class Trainer:
    def __init__(
        self, 
        model, 
        device, 
        optimizer, 
        criterion, 
        loss_meter, 
        score_meter
    ):
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.criterion = criterion
        self.loss_meter = loss_meter
        self.score_meter = score_meter
        
        self.best_valid_score = -np.inf
        self.n_patience = 0
        
        self.messages = {
            "epoch": "[Epoch {}: {}] loss: {:.5f}, score: {:.5f}, time: {} s",
            "checkpoint": "The score improved from {:.5f} to {:.5f}. Save model to '{}'",
            "patience": "\nValid score didn't improve last {} epochs."
        }
    
    def fit(self, epochs, train_loader, valid_loader, save_path, patience):        
        for n_epoch in range(1, epochs + 1):
            self.info_message("EPOCH: {}", n_epoch)
            
            train_loss, train_score, train_time = self.train_epoch(train_loader)
            valid_loss, valid_score, valid_time = self.valid_epoch(valid_loader)
            
            self.info_message(
                self.messages["epoch"], "Train", n_epoch, train_loss, train_score, train_time
            )
            
            self.info_message(
                self.messages["epoch"], "Valid", n_epoch, valid_loss, valid_score, valid_time
            )

            if True:
#             if self.best_valid_score < valid_score:
                self.info_message(
                    self.messages["checkpoint"], self.best_valid_score, valid_score, save_path
                )
                self.best_valid_score = valid_score
                self.save_model(n_epoch, save_path)
                self.n_patience = 0
            else:
                self.n_patience += 1
            
            if self.n_patience >= patience:
                self.info_message(self.messages["patience"], patience)
                break
            
    def train_epoch(self, train_loader):
        self.model.train()
        t = time.time()
        train_loss = self.loss_meter()
        train_score = self.score_meter()
        
        for step, batch in enumerate(train_loader, 1):
            X = batch["X"].to(self.device)
            targets = batch["y"].to(self.device)
            self.optimizer.zero_grad()
            outputs = torch.sigmoid(self.model(X)).squeeze(1)
            
            loss = self.criterion(outputs, targets)
            loss.backward()

            train_loss.update(loss.detach().item())
            train_score.update(targets, outputs.detach())

            self.optimizer.step()
            
            _loss, _score = train_loss.avg, train_score.avg
            message = 'Train Step {}/{}, train_loss: {:.5f}, train_score: {:.5f}'
            self.info_message(message, step, len(train_loader), _loss, _score, end="\r")
        
        return train_loss.avg, train_score.avg, int(time.time() - t)
    
    def valid_epoch(self, valid_loader):
        self.model.eval()
        t = time.time()
        valid_loss = self.loss_meter()
        valid_score = self.score_meter()

        for step, batch in enumerate(valid_loader, 1):
            with torch.no_grad():
                X = batch["X"].to(self.device)
                targets = batch["y"].to(self.device)
                
                #torch.sigmoid(model(batch["X"].to(device)))
                
                outputs = torch.sigmoid(self.model(X)).squeeze(1)
                loss = self.criterion(outputs, targets)

                valid_loss.update(loss.detach().item())
                valid_score.update(targets, outputs)
                
            _loss, _score = valid_loss.avg, valid_score.avg
            message = 'Valid Step {}/{}, valid_loss: {:.5f}, valid_score: {:.5f}'
            self.info_message(message, step, len(valid_loader), _loss, _score, end="\r")
        
        return valid_loss.avg, valid_score.avg, int(time.time() - t)
    
    def save_model(self, n_epoch, save_path):
        torch.save(
            {
                "model_state_dict": self.model.state_dict(),
                "optimizer_state_dict": self.optimizer.state_dict(),
                "best_valid_score": self.best_valid_score,
                "n_epoch": n_epoch,
            },
            save_path,
        )
    
    @staticmethod
    def info_message(message, *args, end="\n"):
        print(message.format(*args), end=end)

In [None]:
df = pd.read_csv("../input/dfdxinnhathemattroi/filetrainxin.csv", index_col = False)

indx = df['patient_id'].unique()
indx_train, indx_val = sk_model_selection.train_test_split(
    indx,
    test_size = 0.2,
    random_state = 42,
)
df_train = df[df['patient_id'].isin(indx_train)]
df_valid = df[df['patient_id'].isin(indx_val)]
display(len(df_train['patient_id'].unique()))
display(len(df_valid['patient_id'].unique()))

In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# #device = "cpu"


# train_data_retriever = DataCustomer(
#     df_train["file_paths"].values, 
#     df_train["label"].values, 
# )

# valid_data_retriever = DataCustomer(
#     df_valid["file_paths"].values, 
#     df_valid["label"].values,
# )

# train_loader = torch_data.DataLoader(
#     train_data_retriever,
#     batch_size=64,
#     shuffle=True,
#     num_workers=8,
# )

# valid_loader = torch_data.DataLoader(
#     valid_data_retriever, 
#     batch_size=64,
#     shuffle=False,
#     num_workers=8,
# )

# model = effnetv2_s(num_classes = 1)
# model.to(device)

# checkpoint = torch.load("../input/v336epoch/best-modelv3.pth")
# model.load_state_dict(checkpoint["model_state_dict"])

# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# criterion = torch_functional.binary_cross_entropy_with_logits

# trainer = Trainer(
#     model, 
#     device, 
#     optimizer, 
#     criterion, 
#     LossMeter, 
#     AccMeter
# )

# history = trainer.fit(
#     7, 
#     train_loader, 
#     valid_loader, 
#     "best-modelv4.pth", 
#     100,
# )

# FILTER TESTDATA WITH THRESHOLD = 10 AND ONLY USE T2w SCAN TYPE

In [None]:
sample_df = pd.read_csv('../input/dfdxinnhathemattroi/filetestxin.csv', index_col = False)
sample_df.shape

In [None]:
tmp = sample_df.paths.values
tmp[0]

In [None]:
IMG_PATH_TEST = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/test"
f = []
for (dirpath, dirnames, filenames) in os.walk(IMG_PATH_TEST):
    f.extend(os.path.join(dirpath, x) for x in filenames)
    
test_file_paths_df = pd.DataFrame({'file_paths': f})
test_file_paths_df['directory'] = IMG_PATH_TEST
test_file_paths_df['dataset'] = test_file_paths_df['file_paths'].str.split("/", n = 7, expand = True)[3]
test_file_paths_df['patient_id'] = test_file_paths_df['file_paths'].str.split("/", n = 7, expand = True)[4]
test_file_paths_df['scan_type'] = test_file_paths_df['file_paths'].str.split("/", n = 7, expand = True)[5]
test_file_paths_df['file'] = test_file_paths_df['file_paths'].str.split("/", n = 7, expand = True)[6]
display(test_file_paths_df.head(2))
test_file_paths_df.shape[0]

In [None]:
test_df=test_file_paths_df[test_file_paths_df['file_paths'].isin(tmp)]
display(test_df.shape)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
models = []
for i in range(1):
    model = effnetv2_s(num_classes = 1)
    model.to(device)
    
    checkpoint = torch.load("../input/v557epoch/v5-57epoch.pth",map_location=torch.device('cpu'))
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    models.append(model)

In [None]:
_id = test_df['patient_id'].map(int).tolist()

In [None]:
class TestDataCustomer(torch_data.Dataset):
    def __init__(self, paths):
        self.paths = paths
        
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        data_path = self.paths[index]
        data = load_dicom(data_path)
        
        data = torch.tensor(data).float()
        data = torch.reshape(data, (3,256,256))
        
        return {"X": data, "id": _id[index]}

In [None]:
submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

test_data_retriever = TestDataCustomer( 
    test_df['file_paths'].values,
)

test_loader = torch_data.DataLoader(
    test_data_retriever,
    batch_size=64,
    shuffle=False,
    num_workers=8,
)

In [None]:
y_pred = []
ids = []

for e, batch in enumerate(test_loader):
    print(f"{e}/{len(test_loader)}", end="\r")
    with torch.no_grad():
        tmp_pred = np.zeros((batch["X"].shape[0], ))
        for model in models:
            tmp_res = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            tmp_pred += tmp_res
        y_pred.extend(tmp_pred)
        ids.extend(batch["id"].numpy().tolist())

In [None]:
submission = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred})
submission = submission.groupby(['BraTS21ID'], as_index = False).median()
submission.to_csv("submission.csv",float_format='{:.1f}'.format, encoding='utf-8', index=False)
submission

In [None]:
plt.figure(figsize=(5, 5))
plt.hist(submission["MGMT_value"]);

## WORK IN PROGRESS...

In [None]:
# # def is_valid_image(path, threshold=10):
# #     data = load_dicom(path)
# #     if np.mean(data)<threshold:
# #         return False
# #     else:
# #         return True


# def change_path(path):
#     path = path.replace("rsna-miccai-png","rsna-miccai-brain-tumor-radiogenomic-classification")
#     path = path.replace(".png", ".dcm")
#     return path