# Pipeline 3

In [None]:
package_path = "../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master/"
import sys 
sys.path.append(package_path)

import os
import glob
import time
import random

import numpy as np
import pandas as pd

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2

import torch
from torch import nn
from torch.utils import data as torch_data
from torch.nn import functional as F

import efficientnet_pytorch

from torch.utils.data import Dataset, DataLoader


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
seed = 123

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(seed)

class X3D:
    XS=0
    S=1
    M=2
    L=3
    
x3d_config = {
    'input_clip_length': [4, 13, 16, 16],
    'depth_factor': [2.2, 2.2, 2.2, 5.0],
    'width_factor': [1, 1, 1, 2.9]
}

class CFG:
    img_size = 256
    n_frames = 10
    
    cnn_features = 256
    lstm_hidden = 32
    
    n_fold = 5
    n_epochs = 10

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.map = nn.Conv2d(in_channels=4, out_channels=3, kernel_size=1)
        self.net = efficientnet_pytorch.EfficientNet.from_name("efficientnet-b2")
#         checkpoint = torch.load("../input/efficientnet-pytorch/efficientnet-b0-08094119.pth")
#         checkpoint = torch.load("../input/efficientnet-pytorch/efficientnet-b3-c8376fa2.pth")
#         self.net.load_state_dict(checkpoint)
        
        n_features = self.net._fc.in_features
        
#         print(n_features)
        
        self.net._fc = nn.Linear(in_features=n_features, out_features=CFG.cnn_features, bias=True)
    
    def forward(self, x):
        x = F.relu(self.map(x))
        out = self.net(x)
        return out

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.cnn = CNN()
        self.rnn = nn.LSTM(CFG.cnn_features, CFG.lstm_hidden, 2, batch_first=True)
        self.fc = nn.Linear(CFG.lstm_hidden, 1, bias=True)

    def forward(self, x):
        # x shape: BxTxCxHxW
        batch_size, timesteps, C, H, W = x.size()
        c_in = x.view(batch_size * timesteps, C, H, W)
        c_out = self.cnn(c_in)
        r_in = c_out.view(batch_size, timesteps, -1)
        output, (hn, cn) = self.rnn(r_in)
        
        out = self.fc(hn[-1])
        return out

In [None]:
def load_dicom(path):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
        
    data = np.float32(cv2.resize(data, (CFG.img_size, CFG.img_size)))
    return torch.tensor(data)

# def load_dicom_line(path):
#     t_paths = sorted(
#         glob.glob(os.path.join(path, "*")), 
#         key=lambda x: int(x[:-4].split("-")[-1]),
#     )
#     images = []
#     for filename in t_paths:
#         data = load_dicom(filename)
#         if data.max() == 0:
#             continue
#         images.append(data)
        
#     return images

# def load_image(path):
#     image = cv2.imread(path, 0)
#     if image is None:
#         return np.zeros((CFG.img_size, CFG.img_size))
    
#     image = cv2.resize(image, (CFG.img_size, CFG.img_size)) / 255
#     return torch.tensor(image)

# def get_valid_frames(t_paths):
#     res = []
#     for path in t_paths:
#         img = load_dicom(path)
#         if img.view(-1).mean(0) != 0:
#             res.append(path)
#     return res
    

def uniform_temporal_subsample(x, num_samples):
    '''
        Moddified from https://github.com/facebookresearch/pytorchvideo/blob/d7874f788bc00a7badfb4310a912f6e531ffd6d3/pytorchvideo/transforms/functional.py#L19
        Args:
            x: input list
            num_samples: The number of equispaced samples to be selected
        Returns:
            Output list     
    '''
    t = len(x)
    indices = torch.linspace(0, t - 1, num_samples)
    indices = torch.clamp(indices, 0, t - 1).long()
    return [x[i] for i in indices]

In [None]:
class TestDataRetriever(Dataset):
    def __init__(self, paths, transform=None):
        self.paths = paths
        self.transform = transform
          
    def __len__(self):
        return len(self.paths)
    
    def read_video(self, vid_paths):
        video = [load_dicom(path) for path in vid_paths]
        if len(video)==0:
            video = torch.zeros(CFG.n_frames, CFG.img_size, CFG.img_size)
        else:
            video = torch.stack(video) # T * C * H * W
#         video = torch.transpose(video, 0, 1) # C * T * H * W
        return video
    
    def __getitem__(self, index):
        _id = self.paths[index]
        patient_path = f"../input/rsna-miccai-brain-tumor-radiogenomic-classification/test/{str(_id).zfill(5)}/"
        channels = []
        for t in ["FLAIR","T1w", "T1wCE", "T2w"]:
            t_paths = sorted(
                glob.glob(os.path.join(patient_path, t, "*")), 
                key=lambda x: int(x[:-4].split("-")[-1]),
            )
            num_samples = CFG.n_frames
#             t_paths = get_valid_frames(t_paths)
            if len(t_paths) < num_samples:
                in_frames_path = t_paths
            else:
                in_frames_path = uniform_temporal_subsample(t_paths, num_samples)
            
            channel = self.read_video(in_frames_path)
            if channel.shape[0] == 0:
                print("1 channel empty")
                channel = torch.zeros(num_samples, CFG.img_size, CFG.img_size)
            channels.append(channel)
        
        channels = torch.stack(channels).transpose(0,1)
        return {"X": channels.float(), "id": _id}

In [None]:
df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
df.head(10)
# df.info()

In [None]:
models = []
for i in range(1, CFG.n_fold+1):
    model = Model()
    model.to(device)
    checkpoint = torch.load(f"../input/modelweight34/modelweight34/best-model-{i}.pth")
#     print(checkpoint["model_state_dict"])
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    models.append(model)

In [None]:
submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")
test_data_retriever = TestDataRetriever(
    submission["BraTS21ID"].values # ids in test data
)
print(test_data_retriever.read_video)

test_loader = torch_data.DataLoader(
    test_data_retriever,
    batch_size=4,
    shuffle=False,
    num_workers=8,
)


In [None]:
print(len(test_data_retriever))
print(test_data_retriever[85]['X'].shape)

In [None]:
test_loader.batch_size
len(test_loader) # no. of batches

In [None]:
y_pred = []
ids = []

for e, batch in enumerate(test_loader):
    print(f"{e}/{len(test_loader)}", end="\r")
    with torch.no_grad():
        print(batch["X"].shape)
        tmp_pred = np.zeros((batch["X"].shape[0], ))
        for model in models:
            tmp_res = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
#             print(tmp_pred)
            tmp_pred += tmp_res
            
        tmp_pred = tmp_pred/len(models)
        y_pred.extend(tmp_pred)
#         print(len(y_pred))
        ids.extend(batch["id"].numpy().tolist())

In [None]:
submission1 = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred})
# submission.to_csv("submission.csv", index=False)

In [None]:
submission1

# Pipeline 2

In [None]:
pip install '../input/rsna-monai-packages/monai-0.6.0-202107081903-py3-none-any.whl'

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import glob

In [None]:
import albumentations as A
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import roc_auc_score
from torch.optim import lr_scheduler
from tqdm import tqdm
import re

In [None]:
NUM_IMAGES_3D = 64
TRAINING_BATCH_SIZE = 8
TEST_BATCH_SIZE = 8
IMAGE_SIZE = 256
N_EPOCHS = 15
do_valid = True
n_workers = 4
type_ = "T1wCE"

In [None]:
def load_dicom_image(path, img_size=IMAGE_SIZE, voi_lut=True, rotate=0):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array

    if rotate > 0:
        rot_choices = [
            0,
            cv2.ROTATE_90_CLOCKWISE,
            cv2.ROTATE_90_COUNTERCLOCKWISE,
            cv2.ROTATE_180,
        ]
        data = cv2.rotate(data, rot_choices[rotate])

    data = cv2.resize(data, (img_size, img_size))
    return data

In [None]:
import random

import cv2
from torch.utils.data import Dataset


class BrainRSNADataset(Dataset):
    def __init__(
        self, data, transform=None, target="MGMT_value", mri_type="FLAIR", is_train=True
    ):
        self.target = target
        self.data = data
        self.type = mri_type

        self.transform = transform
        self.is_train = is_train
        self.folder = "train" if self.is_train else "test"

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.loc[index]
        case_id = int(row.BraTS21ID)
        target = int(row[self.target])
        _3d_images = self.load_dicom_images_3d(case_id)
        _3d_images = torch.tensor(_3d_images).float()
        if self.is_train:
            return {"image": _3d_images, "target": target}
        else:
            return {"image": _3d_images, "case_id": case_id}

    def load_dicom_images_3d(
        self,
        case_id,
        num_imgs=NUM_IMAGES_3D,
        img_size=IMAGE_SIZE,
        rotate=0,
    ):
        case_id = str(case_id).zfill(5)

        path = f"../input/rsna-miccai-brain-tumor-radiogenomic-classification/{self.folder}/{case_id}/{self.type}/*.dcm"
#         path = f"../input/brain-tumor-test/test1/test1/{case_id}/{self.type}/*.dcm"
        
        files = sorted(
            glob.glob(path),
            key=lambda var: [
                int(x) if x.isdigit() else x for x in re.findall(r"[^0-9]|[0-9]+", var)
            ],
        )

        middle = len(files) // 2
        num_imgs2 = num_imgs // 2
        p1 = max(0, middle - num_imgs2)
        p2 = min(len(files), middle + num_imgs2)
        image_stack = [load_dicom_image(f, rotate=rotate) for f in files[p1:p2]]
        
        img3d = np.stack(image_stack).T
        if img3d.shape[-1] < num_imgs:
            n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
            img3d = np.concatenate((img3d, n_zero), axis=-1)
#         print(img3d.shape)
        if np.min(img3d) < np.max(img3d):
            img3d = img3d - np.min(img3d)
            img3d = img3d / np.max(img3d)

        return np.expand_dims(img3d, 0)



In [None]:
ls ../input/

In [None]:
import monai

# model 
model = monai.networks.nets.resnet34(spatial_dims=3, n_input_channels=1, n_classes=1)
device = torch.device("cuda")
model.to(device);
all_weights = os.listdir("../input/modelweights33/modelweights33")
fold_files = [f for f in all_weights if type_ in f]
# print(np.array(fold_files).shape)
criterion = nn.BCEWithLogitsLoss()

In [None]:
fold_files

In [None]:
sample = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

In [None]:
tta_true_labels = []
tta_preds = []
test_dataset = BrainRSNADataset(data=sample, mri_type=type_, is_train=False)
test_dl = torch.utils.data.DataLoader(
        test_dataset, batch_size=8, shuffle=False, num_workers=4
    )

preds_f = np.zeros(len(sample))
for fold in range(5):
    image_ids = []
    model.load_state_dict(torch.load(f"../input/modelweights33/modelweights33/{fold_files[fold]}"))
    preds = []
    epoch_iterator_test = tqdm(test_dl)
    with torch.no_grad():
        for  step, batch in enumerate(epoch_iterator_test):
            model.eval()
            images = batch["image"].to(device)
            print(batch["image"].shape)
            outputs = model(images)
            preds.append(outputs.sigmoid().detach().cpu().numpy())
            image_ids.append(batch["case_id"].detach().cpu().numpy())
    

    preds_f += np.vstack(preds).T[0]/5

    ids_f = np.hstack(image_ids)

In [None]:
# print(np.array(test_dataset).shape)
test_dataset[40]['image'].shape

In [None]:
sample["BraTS21ID"] = ids_f
sample["MGMT_value"] = preds_f

In [None]:
submission2 = sample.sort_values(by="BraTS21ID").reset_index(drop=True)

In [None]:
# submission2.to_csv("submission.csv", index=False)

In [None]:
submission2

# Pipeline 1

In [None]:
# ====================================================
# Directory settings
# ====================================================
import os

OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    
# TRAIN_PATH = '../input/rsna-miccai-png/train'
TEST_PATH = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/test'

In [None]:
# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    num_workers=8
    model_name='efficientnet_b4'
    size=512
    batch_size=4
    seed=42
    target_size=2
    target_col='MGMT_value'
    n_fold=5
    trn_fold=[0,1,2,3,4]
    inference=True

In [None]:
# ====================================================
# Imports
# ====================================================
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

import albumentations as A
from albumentations.pytorch import ToTensorV2

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

import timm

import warnings 
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

In [None]:
test = os.listdir('../input/rsna-miccai-brain-tumor-radiogenomic-classification/test')
# print(np.array(test).shape)
test = pd.DataFrame({'BraTS21ID' : test})
# print(test)
test['BraTS21ID'] = test['BraTS21ID'].astype(int)
test

In [None]:
# ====================================================
# Dataset
# ====================================================
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['BraTS21ID'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        root = f'{TEST_PATH}/{str(self.file_names[idx]).zfill(5)}/'
        com = []
        for typ in ['FLAIR', 'T1w', 'T1wCE', 'T2w']:
            paths = os.listdir(root + typ)
            rnd = random.sample(paths, min(10, len(paths)))
            typ_imgs = []
            for f in rnd:
                file_path = f'{root}{typ}/{f}'
                dicom = pydicom.read_file(file_path)
                data = apply_voi_lut(dicom.pixel_array, dicom)
                if dicom.PhotometricInterpretation == "MONOCHROME1":
                    data = np.amax(data) - data
                data = data - np.min(data)
                data = data / np.max(data)
                image = (data * 255).astype(np.uint8)
                typ_imgs.append(cv2.resize(image, (CFG.size, CFG.size)))
            com.append(np.mean(typ_imgs, axis = 0))
        image = np.array(com).transpose((1,2,0)) / 255
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
            image = image.float()
        return image

In [None]:
# ====================================================
# Transforms
# ====================================================
def get_transforms(*, data):
    if data == 'valid':
        return A.Compose([
            ToTensorV2(),
        ])

In [None]:
# ====================================================
# MODEL
# ====================================================
class CustomEfficientNet(nn.Module):
    def __init__(self, model_name=CFG.model_name, pretrained=False):
        super().__init__()
        self.conv = nn.Conv2d(4,3,1)
        self.model = timm.create_model(CFG.model_name, pretrained=pretrained)
        n_features = self.model.classifier.in_features
#         print(n_features)
        self.model.classifier = nn.Linear(n_features, CFG.target_size)

    def forward(self, x):
        x = self.conv(x)
        x = self.model(x)
        return x

In [None]:
# ====================================================
# Helper functions
# ====================================================
def load_state_eff(model_path):
    state_dict = torch.load(model_path)['model']
    return state_dict

def inference(model_eff, states, test_loader, device):
    model_eff.to(device)
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    probs = []
    for i, (images) in tk0:
        images = images.to(device)
        avg_preds = []
        x=1
        for state in states:
            model_eff.load_state_dict(state)
            model_eff.eval()
            with torch.no_grad():
                y_preds = model_eff(images)
#                 print(images.shape) # (4, 4, 512, 512)
                print(y_preds.shape) # (4, 2)
            avg_preds.append(y_preds.softmax(1).to('cpu').numpy()) # append (4, 2) 5 times 
            x+=1
#         print(np.array(avg_preds).shape)    # (5, 4, 2) -> 5 folds 
        avg_preds = np.mean(avg_preds, axis=0)
        probs.append(avg_preds)
    probs = np.concatenate(probs)
    return probs

In [None]:
model_eff = CustomEfficientNet(CFG.model_name, pretrained=False)

states = [load_state_eff('../input/modelweight14/modelweight14/efficientnet_b4_fold0_best.pth'),
          load_state_eff('../input/modelweight14/modelweight14/efficientnet_b4_fold1_best.pth'),
          load_state_eff('../input/modelweight14/modelweight14/efficientnet_b4_fold2_best.pth'),
          load_state_eff('../input/modelweight14/modelweight14/efficientnet_b4_fold3_best.pth'),
          load_state_eff('../input/modelweight14/modelweight14/efficientnet_b4_fold4_best.pth'),
]


In [None]:
# ====================================================
# inference
# ====================================================


test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, 
                         num_workers=CFG.num_workers, pin_memory=True)
predictions = inference(model_eff, states, test_loader, device)

# submission
test['MGMT_value'] = predictions[:,1]
# test[['BraTS21ID', 'MGMT_value']].to_csv(OUTPUT_DIR+'submission.csv', index=False)
# test.head()
submission3 = test[['BraTS21ID', 'MGMT_value']]
# submission3.to_csv(OUTPUT_DIR+'submission.csv', index=False)
# submission3.head()

In [None]:
submission3 = submission3.sort_values(by="BraTS21ID").reset_index(drop=True)
submission3

# Final Submission

In [None]:
final = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")
final.head()

In [None]:
# final["MGMT_value"] = (0.3*submission1["MGMT_value"] + 0.5*submission2["MGMT_value"] + 0.2*submission3["MGMT_value"])
final["MGMT_value"] = submission3["MGMT_value"]

In [None]:
# final["MGMT_value"] = submission2["MGMT_value"]

In [None]:
# final["MGMT_value"] = (submission1["MGMT_value"] + submission2["MGMT_value"] + submission3["MGMT_value"])/3

In [None]:
final

In [None]:
final.to_csv("submission.csv", index=False)
final

In [None]:
# temp = pd.read_csv("../input/brain-tumor-test/train1_actual_labels.csv")
# actual_labels = temp["MGMT_value"].values
# predicted_labels = final["MGMT_value"].values
# temp

In [None]:
# pred = []
# for i in predicted_labels:
#     if i<0.5:
#         pred.append(0)
#     else:
#         pred.append(1)


In [None]:
# from sklearn.metrics import precision_score
# precision_score(actual_labels, pred, average="binary")

In [None]:
# submission1

In [None]:
# submission2

In [None]:
# submission3

In [None]:
# submission3 = submission3.sort_values(by = 'BraTS21ID')
# submission3

In [None]:
# final["MGMT_value"] = (submission1["MGMT_value"] + submission2["MGMT_value"] + submission3["MGMT_value"])/3

In [None]:
# precision_score(actual_labels, pred, average="binary")