In [1]:
import os
import sys
sys.path = [
    '/kaggle/input/abdominal-test',
    '../input/timm20221011/pytorch-image-models-master',
    '../input/smp20210127/segmentation_models.pytorch-master/segmentation_models.pytorch-master',
    '../input/smp20210127/pretrained-models.pytorch-master/pretrained-models.pytorch-master',
    '../input/smp20210127/EfficientNet-PyTorch-master/EfficientNet-PyTorch-master',
    '/kaggle/input/abdominal-utils'
] + sys.path 

In [2]:
# !pip -q install /kaggle/input/rsna2022whl/rsna-2022-whl/pydicom-2.3.0-py3-none-any.whl
!pip -q install ../input/pylibjpeg140py3/pylibjpeg-1.4.0-py3-none-any.whl
!pip install -q /kaggle/input/gdcm-0310/python_gdcm-3.0.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

!pip install -q /kaggle/input/dicomsdl--0-109-2/dicomsdl-0.109.2-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl

!cp -r ../input/timm-20220211/pytorch-image-models-master/timm ./timm4smp

In [3]:
import gc
import ast
import cv2
import time
import timm
import timm4smp
import pickle
import random
import pydicom
import dicomsdl
import argparse
import warnings
warnings.filterwarnings('ignore')
import threading
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from glob import glob
import albumentations
import matplotlib.pyplot as plt
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.unet.decoder import UnetDecoder
from segmentation_models_pytorch.base import SegmentationHead

import albumentations

import torch
import torch.nn as nn
import torch.optim as optim
import torch.cuda.amp as amp
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import gdcm
import zipfile
from joblib import Parallel, delayed
from pydicom.pixel_data_handlers.util import apply_voi_lut
from pylab import rcParams

device = torch.device('cuda')
torch.backends.cudnn.benchmark = True

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(device)

timm.__version__, timm4smp.__version__, pydicom.__version__

cuda:0


('0.6.12', '0.5.5', '2.4.2')

In [4]:
DEBUG = False

data_dir = '/kaggle/input/rsna-2023-abdominal-trauma-detection'

if DEBUG:
    data_type = 'train'
else:
    data_type = 'test'
    
test_dir = os.path.join(data_dir, f'{data_type}_images')

df = pd.read_csv(os.path.join(data_dir, f'{data_type}_series_meta.csv'))
df['psid'] = df['patient_id'].astype(str) + '_' + df['series_id'].astype(str)
df['image_folder'] = test_dir + '/' + df['patient_id'].astype(str) + '/' + df['series_id'].astype(str)

if DEBUG:
    df = df.head(10).reset_index(drop=True)
    # df = df.head(1500).reset_index(drop=True)


sub = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))

In [5]:
class Config:
    image_size_seg = (128, 128, 128)
    msk_size = image_size_seg[0]
    n_ch = 5 # for segmentation linespace
    batch_size_seg = 1
    num_workers = 2

class SegConfig1(Config):
    kernel_type = 'test'
    backbone = 'resnet18d'
    model_dir_seg = '/kaggle/input/abdominal-test/segmentations'
    
    
class OrganBowelConfig1(Config):
    image_size_cls = 224
    image_size_organ = 224 # output size after segmentation (for organ and bowel)
    image_size_bowel = 224
    n_slice_per_c = 15
    mul = 2
    in_chans = 6
    out_dim = 3
    
    slice_type = 1
    folds = 5
    
    organ_kernel_type = 'organ'
    organ_model_dir_cls = '/kaggle/input/abdominal-test/stage2-organ-type1/save/sz_224/min0_e20_lr1e4'
    organ_backbone = 'tf_efficientnetv2_s_in21ft1k'
    
    bowel_kernel_type = 'bowel'
    bowel_model_dir_cls = '/kaggle/input/abdominal-test/stage2-bowel-type1/ver1(clean data)'
    bowel_backbone = 'tf_efficientnetv2_s_in21ft1k'
    
    
class OrganBowelConfig2(Config):
    image_size_cls = 224
    image_size_organ = 224 # output size after segmentation (for organ and bowel)
    image_size_bowel = 224
    n_slice_per_c = 15
    mul = 2
    in_chans = 6
    out_dim = 3
    
    slice_type = 1
    folds = 5
    
    organ_kernel_type = 'organ'
    organ_model_dir_cls = '/kaggle/input/abdominal-organ-seres-1ch/organ/seres50_lr1e4_e20_ll'
    organ_backbone = 'seresnext50_32x4d'
    
    bowel_kernel_type = 'bowel'
    bowel_model_dir_cls = '/kaggle/input/abdominal-organ-seres-1ch/bowel/seres1e4-ce5'
    bowel_backbone = 'seresnext50_32x4d'
    
    
class ExtraConfig1(Config):
    """
        # segmentation head (fix label)
        feature extractor : sliding_5_384_e3_seres50_seghead
        sequence model    : s5_m192_sz384_gru_nodropout
    """
    image_size_extra = 384
    in_chans = 5 # slide window
    m_size = 192
    emb_dim = 2048
    lstm_size = 512
    extra_bs = 8
    
    forward_type = 1
    
    decoder_channels = (256, 128, 64, 32, 16)
    seg_head_input = 32
    
    feature_model_dir = '/kaggle/input/abdominal-extra-seg-gru/feature'
    feature_kernel_type = 'seg_head'
    feature_backbone = 'seresnext50_32x4d'
    
    sequence_model_dir = '/kaggle/input/abdominal-extra-seg-gru/sequence'
    sequence_kernel_type = 'seres_384_seghead_m192_gru_nodropout'
    
    
class ExtraConfig2(Config):
    """
        feature extractor : sliding_5_384_e3_effv2s
        sequence model    : s5_m192_sz384_gru_nodropout
    """
    image_size_extra = 384
    in_chans = 5 # slide window
    m_size = 192
    emb_dim = 1280
    lstm_size = 512
    extra_bs = 8
    
    forward_type = 2
    
    decoder_channels = (256, 160, 64, 48, 24)
    seg_head_input = 48
    
    feature_model_dir = '/kaggle/input/abdominal-extra-effv2s-seg-gru/feature'
    feature_kernel_type = 'effv2s_e3'
    feature_backbone = 'tf_efficientnetv2_s_in21ft1k'
    
    sequence_model_dir = '/kaggle/input/abdominal-extra-effv2s-seg-gru/sequence'
    sequence_kernel_type = 'effv2s_384_seghead_m192_gru_nodropout'

In [6]:
CFG = Config()
SegCFG1 = SegConfig1()
OBCFG1 = OrganBowelConfig1()
OBCFG2 = OrganBowelConfig2()
ExtraCFG1 = ExtraConfig1()
# ExtraCFG2 = ExtraConfig2()

seg_configs = [SegCFG1]

display(df)

Unnamed: 0,patient_id,series_id,aortic_hu,psid,image_folder
0,48843,295,401.25,48843_295,/kaggle/input/rsna-2023-abdominal-trauma-detec...
1,48843,62825,238.0,48843_62825,/kaggle/input/rsna-2023-abdominal-trauma-detec...
2,50046,24574,149.0,50046_24574,/kaggle/input/rsna-2023-abdominal-trauma-detec...
3,50046,60658,352.0,50046_60658,/kaggle/input/rsna-2023-abdominal-trauma-detec...
4,63706,39279,219.0,63706_39279,/kaggle/input/rsna-2023-abdominal-trauma-detec...
5,63706,41385,319.0,63706_41385,/kaggle/input/rsna-2023-abdominal-trauma-detec...


In [7]:
if not DEBUG:
    if len(glob(os.path.join(test_dir, f"48843/62825/*"))) == 1:
        fast_sub = True
    else:
        fast_sub = False
    print(fast_sub)

True


In [8]:
def dicomsdl_to_numpy_image(ds, index=0):
    info = ds.getPixelDataInfo()
    if info['SamplesPerPixel'] != 1:
        raise RuntimeError('SamplesPerPixel != 1')  # number of separate planes in this image
    shape = [info['Rows'], info['Cols']]
    dtype = info['dtype']
    outarr = np.empty(shape, dtype=dtype)
    ds.copyFrameData(index, outarr)
    return outarr


def dicomsdl_read_one(path, img_type):
    if '/'.join(path.split('/')[-3:]) == "3124/5842/514.dcm":
        norm = np.zeros((512,512), dtype=np.uint8)
    else:
        dcm = dicomsdl.open(path)
        pixel_array = dicomsdl_to_numpy_image(dcm)
        if dcm.PixelRepresentation == 1:
            bit_shift = dcm.BitsAllocated - dcm.BitsStored
            dtype = pixel_array.dtype
            pixel_array = (pixel_array << bit_shift).astype(dtype) >> bit_shift

        #processing
        pixel_array = pixel_array.astype(np.float32)
        pixel_array = dcm.RescaleSlope * pixel_array + dcm.RescaleIntercept
        xmin = dcm.WindowCenter-0.5-(dcm.WindowWidth-1)* 0.5
        xmax = dcm.WindowCenter-0.5+(dcm.WindowWidth-1)* 0.5
        norm = np.empty_like(pixel_array, dtype=np.uint8)
        dicomsdl.util.convert_to_uint8(pixel_array, norm, xmin, xmax)

        if dcm.PhotometricInterpretation == 'MONOCHROME1':
            norm = 255 - norm
        # --- (512, 512) np.uint8
        if img_type == 'seg':
            norm = cv2.resize(norm, (CFG.image_size_seg[1], CFG.image_size_seg[0]), interpolation=cv2.INTER_LINEAR)
        elif img_type == 'cls':
            pass
        elif img_type == 'extra':
            norm = (norm - norm.min()) / (norm.max() - norm.min() + 1e-6)
    return norm 



def load_dicomsdl_dir(t_paths, img_type):
    n_scans = len(t_paths)
    
    #check inversion
    min_index, max_index = t_paths[0], t_paths[-1]
    dcm0 = dicomsdl.open(min_index)
    dcmN = dicomsdl.open(max_index)
    sx0, sy0, sz0 = dcm0.ImagePositionPatient
    sxN, syN, szN = dcmN.ImagePositionPatient
    
    inversion = True if szN < sz0 else False
    if inversion:
        t_paths = t_paths[::-1]
        
    indices = np.quantile(list(range(n_scans)), np.linspace(0., 1., CFG.image_size_seg[2])).round().astype(int)
    seg_t_paths = [t_paths[i] for i in indices]
    
    images = []
    for filename in seg_t_paths:
        img = dicomsdl_read_one(filename, img_type)
        images.append(img)

    images = np.stack(images, -1)
    
    images = images - np.min(images)
    images = images / (np.max(images) + 1e-4)
    images = (images * 255).astype(np.uint8)
    return images, t_paths


class SegTestDataset(Dataset):

    def __init__(self, df):
        self.df = df.reset_index()

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        row = self.df.iloc[index]
        psid = str(row['psid'])
        pid, sid = str(row['patient_id']), str(row['series_id'])
        
        t_paths = sorted(glob(os.path.join(test_dir, pid, sid, "*")), key=lambda x: int(x.split('/')[-1].split(".")[0]))

        image, t_paths = load_dicomsdl_dir(t_paths, 'seg') # load : (0,1)
        
        if image.ndim < 4:
            image = np.expand_dims(image, 0)
        image = image.astype(np.float32).repeat(3, 0)  # to 3ch
        image = image / 255.
        
        return torch.tensor(image).float(), t_paths, psid

## Model

In [9]:
from timm4smp.models.layers.conv2d_same import Conv2dSame
from conv3d_same import Conv3dSame

def convert_3d(module):

    module_output = module
    if isinstance(module, torch.nn.BatchNorm2d):
        module_output = torch.nn.BatchNorm3d(
            module.num_features,
            module.eps,
            module.momentum,
            module.affine,
            module.track_running_stats,
        )
        if module.affine:
            with torch.no_grad():
                module_output.weight = module.weight
                module_output.bias = module.bias
        module_output.running_mean = module.running_mean
        module_output.running_var = module.running_var
        module_output.num_batches_tracked = module.num_batches_tracked
        if hasattr(module, "qconfig"):
            module_output.qconfig = module.qconfig
            
    elif isinstance(module, Conv2dSame):
        module_output = Conv3dSame(
            in_channels=module.in_channels,
            out_channels=module.out_channels,
            kernel_size=module.kernel_size[0],
            stride=module.stride[0],
            padding=module.padding[0],
            dilation=module.dilation[0],
            groups=module.groups,
            bias=module.bias is not None,
        )
        module_output.weight = torch.nn.Parameter(module.weight.unsqueeze(-1).repeat(1,1,1,1,module.kernel_size[0]))

    elif isinstance(module, torch.nn.Conv2d):
        module_output = torch.nn.Conv3d(
            in_channels=module.in_channels,
            out_channels=module.out_channels,
            kernel_size=module.kernel_size[0],
            stride=module.stride[0],
            padding=module.padding[0],
            dilation=module.dilation[0],
            groups=module.groups,
            bias=module.bias is not None,
            padding_mode=module.padding_mode
        )
        module_output.weight = torch.nn.Parameter(module.weight.unsqueeze(-1).repeat(1,1,1,1,module.kernel_size[0]))

    elif isinstance(module, torch.nn.MaxPool2d):
        module_output = torch.nn.MaxPool3d(
            kernel_size=module.kernel_size,
            stride=module.stride,
            padding=module.padding,
            dilation=module.dilation,
            ceil_mode=module.ceil_mode,
        )
    elif isinstance(module, torch.nn.AvgPool2d):
        module_output = torch.nn.AvgPool3d(
            kernel_size=module.kernel_size,
            stride=module.stride,
            padding=module.padding,
            ceil_mode=module.ceil_mode,
        )

    for name, child in module.named_children():
        module_output.add_module(
            name, convert_3d(child)
        )
    del module

    return module_output

### segmentation model

In [10]:
# segmentation model

class TimmSegModel(nn.Module):
    def __init__(self, backbone, segtype='unet', pretrained=False):
        super(TimmSegModel, self).__init__()

        self.encoder = timm4smp.create_model(
            backbone,
            in_chans=3,
            features_only=True,
            pretrained=pretrained
        )
        g = self.encoder(torch.rand(1, 3, 64, 64))
        encoder_channels = [1] + [_.shape[1] for _ in g]
        decoder_channels = [256, 128, 64, 32, 16]
        if segtype == 'unet':
            self.decoder = smp.unet.decoder.UnetDecoder(
                encoder_channels=encoder_channels[:n_blocks+1],
                decoder_channels=decoder_channels[:n_blocks],
                n_blocks=n_blocks,
            )
        self.segmentation_head = nn.Conv2d(decoder_channels[n_blocks-1], 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

    def forward(self,x):
        global_features = [0] + self.encoder(x)[:n_blocks]
        seg_features = self.decoder(*global_features)
        seg_features = self.segmentation_head(seg_features)
        return seg_features


### organ & bowel model

In [11]:
# Organ & Bowel Model

    
# Organ Model
class TimmModel(nn.Module):
    def __init__(self, backbone, pretrained=False):
        super(TimmModel, self).__init__()

        self.encoder = timm.create_model(
            backbone,
            in_chans=6,
            num_classes=3,
            features_only=False,
            drop_rate=0,
            drop_path_rate=0,
            pretrained=pretrained
        )

        if 'efficient' in backbone:
            hdim = self.encoder.conv_head.out_channels
            self.encoder.classifier = nn.Identity()
        elif 'convnext' in backbone:
            hdim = self.encoder.head.fc.in_features
            self.encoder.head.fc = nn.Identity()
        elif 'seresnext' in backbone or 'resnet' in backbone:
            hdim = self.encoder.fc.in_features
            self.encoder.fc = nn.Identity()


        self.lstm = nn.LSTM(hdim, 256, num_layers=2, dropout=0, bidirectional=True, batch_first=True)
        self.head = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0),
            nn.LeakyReLU(0.1),
            nn.Linear(256, 3),
        )

    def forward(self, x):  # (bs, nslice, ch, sz, sz)
        bs, n_slice_per_c, in_chans, image_size, image_size = x.shape
        
        x = x.view(bs * n_slice_per_c, in_chans, image_size, image_size)
        feat = self.encoder(x)
        feat = feat.view(bs, n_slice_per_c, -1)
        feat, _ = self.lstm(feat)
        feat = feat.contiguous().view(bs * n_slice_per_c, -1)
        feat = self.head(feat) # (2 * n_slice_per_c, 3)
        feat = feat.view(bs, n_slice_per_c, 3).contiguous()
        return feat # (bs, 15, 3)


# Bowel Model
class TimmBowelModel(nn.Module):
    def __init__(self, backbone, pretrained=False):
        super(TimmBowelModel, self).__init__()

        self.encoder = timm.create_model(
            backbone,
            in_chans=6,
            num_classes=1, # 1
            features_only=False,
            drop_rate=0,
            drop_path_rate=0,
            pretrained=pretrained
        )

        if 'efficient' in backbone:
            hdim = self.encoder.conv_head.out_channels
            self.encoder.classifier = nn.Identity()
        elif 'convnext' in backbone:
            hdim = self.encoder.head.fc.in_features
            self.encoder.head.fc = nn.Identity()
        elif 'seresnext' in backbone or 'resnet' in backbone:
            hdim = self.encoder.fc.in_features
            self.encoder.fc = nn.Identity()


        self.lstm = nn.LSTM(hdim, 256, num_layers=2, dropout=0, bidirectional=True, batch_first=True)
        self.image_head = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0),
            nn.LeakyReLU(0.1),
            nn.Linear(256, 1)
        ) # output : (bs * n_slice_per_c, 1)

        self.study_head = nn.Sequential(
            nn.Linear(1024, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0),
            nn.LeakyReLU(0.1),
            nn.Linear(256, 1)
        )


    def forward(self, x):  # (bs, nslice, ch, sz, sz)
        bs, n_slice_per_c, in_chans, image_size, image_size = x.shape
        x = x.view(bs * n_slice_per_c, in_chans, image_size, image_size)
        feat = self.encoder(x)
        feat = feat.view(bs, n_slice_per_c, -1)
        feat, _ = self.lstm(feat)

        # image level
        image_feat = feat.contiguous().view(bs * n_slice_per_c, -1)
        image_logit = self.image_head(image_feat) # (bs * n_slice_per_c, 1)
        image_logit = image_logit.view(bs, n_slice_per_c, 1).contiguous() # (bs, n_slice_per_c, 1)

        # study level
        avg_pool = torch.mean(feat, 1)   # (bs, 512)
        max_pool = torch.max(feat, 1)[0] # (bs, 512)
        study_feat = torch.cat((max_pool, avg_pool), 1) # (bs, 1024)
        study_logit = self.study_head(study_feat)
        
        return study_logit, image_logit



### extra models (feature extractor + sequence)

In [12]:
# Extra Model
class Attention(nn.Module):
    def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
        super(Attention, self).__init__(**kwargs)
        
        self.supports_masking = True

        self.bias = bias
        self.feature_dim = feature_dim
        self.step_dim = step_dim
        self.features_dim = 0
        
        weight = torch.zeros(feature_dim, 1)
        nn.init.xavier_uniform_(weight)
        self.weight = nn.Parameter(weight)
        
        if bias:
            self.b = nn.Parameter(torch.zeros(step_dim))
        
    def forward(self, x, mask=None):
        # x.shape 1024
        feature_dim = self.feature_dim # 1024
        step_dim = self.step_dim # 192 (m_size)
        
        eij = torch.mm(
            x.contiguous().view(-1, feature_dim), 
            self.weight
        ).view(-1, step_dim)
        
        if self.bias:
            eij = eij + self.b
            
        eij = torch.tanh(eij)
        a = torch.exp(eij)
        
        if mask is not None:
            a = a * mask

        a = a / torch.sum(a, 1, keepdim=True) + 1e-10

        weighted_input = x * torch.unsqueeze(a, -1)
        return torch.sum(weighted_input, 1)
        
        
# Seghead Model
class TimmFeatExtractorSeg(nn.Module):
    def __init__(self, backbone, decoder_channels, pretrained=False):
        super(TimmFeatExtractorSeg, self).__init__()

        self.encoder = timm.create_model(backbone, in_chans=5, features_only=True, drop_rate=0, drop_path_rate=0, pretrained=pretrained)

        if 'efficient' in backbone:
            output_dim = 1280
        elif 'seresnext50_32x4d' in backbone:
            output_dim = self.encoder.feature_info.channels()[-1]

        self.decoder = UnetDecoder(encoder_channels=self.encoder.feature_info.channels(), decoder_channels=decoder_channels, n_blocks=5, use_batchnorm=True)
        self.upsample = nn.UpsamplingBilinear2d(scale_factor=2)
        self.seg_head = SegmentationHead(in_channels=32, out_channels=1, activation=None, kernel_size=3)
        
        self.pool = nn.AdaptiveAvgPool2d((1,1))

        self.study_linear = nn.Sequential(nn.Linear(output_dim, 256), nn.BatchNorm1d(256), nn.Dropout(0), nn.LeakyReLU(0.1), nn.Linear(256, 1))
        self.image_linear = nn.Sequential(nn.Linear(output_dim, 256), nn.BatchNorm1d(256), nn.Dropout(0), nn.LeakyReLU(0.1), nn.Linear(256, 1))
        
    def forward(self, x): 
        features = self.encoder(x) 
        xseg = self.decoder(*features)
        xseg = self.upsample(xseg)
        xseg = self.seg_head(xseg) # (bs, 1, 384, 384)
        xseg = xseg.squeeze(1)

        pool = self.pool(features[-1])
        pool = pool.squeeze().squeeze()

        study_logit = self.study_linear(pool)
        image_logit = self.image_linear(pool)
        return study_logit, image_logit, xseg
    

class SeqGRUModel(nn.Module):
    def __init__(self, config):
        super(SeqGRUModel, self).__init__()
        self.lstm1 = nn.GRU(config.emb_dim*3, config.lstm_size, bidirectional=True, batch_first=True)
        self.lstm2 = nn.GRU(config.lstm_size*2, config.lstm_size, bidirectional=True, batch_first=True)
        self.image_linear = nn.Linear(config.lstm_size*2, 1)

        self.study_linear = nn.Sequential(nn.Linear(config.lstm_size*4, 256), nn.BatchNorm1d(256), nn.Dropout(0), nn.LeakyReLU(0.1), nn.Linear(256, 1))

        self.attention = Attention(config.lstm_size*2, config.m_size)

    def forward(self, x, mask): 
        # x = SpatialDropout(0.2)(x)
        feat, _ = self.lstm1(x) # (192, 1024)
        image_logits = self.image_linear(feat)
        feat, _ = self.lstm2(feat) # (192, 1024)
        max_pool, _ = torch.max(feat, 1) # (1024)
        att_pool = self.attention(feat, mask) # (1024)
        conc = torch.cat((max_pool, att_pool), 1) # (2048)
        logits = self.study_linear(conc)
        return logits, image_logits
    
    
class TimmEffFeatExtractorSeg(nn.Module):
    def __init__(self, backbone, decoder_channels, pretrained=False):
        super(TimmEffFeatExtractorSeg, self).__init__()

        encoder = timm.create_model(
            backbone,
            in_chans=5, # 3
            features_only=False,
            drop_rate=0,
            drop_path_rate=0,
            pretrained=pretrained
        )
        self.hidden_layer = nn.Sequential(*list(encoder.children())[-4:-1]) # conv_head, bn, pooling
        del encoder
        gc.collect()
        torch.cuda.empty_cache()
        
        self.encoder = timm.create_model(
            backbone,
            in_chans=5, # 3
            features_only=True,
            drop_rate=0,
            drop_path_rate=0,
            pretrained=pretrained
        )

        if 'efficient' in backbone:
            output_dim = 1280
        elif 'seresnext50_32x4d' in backbone:
            output_dim = self.encoder.feature_info.channels()[-1]

        self.decoder = UnetDecoder(encoder_channels=self.encoder.feature_info.channels(), decoder_channels=decoder_channels, n_blocks=5, use_batchnorm=True)
        self.upsample = nn.UpsamplingBilinear2d(scale_factor=2)
        self.seg_head = SegmentationHead(in_channels=48, out_channels=1, activation=None, kernel_size=3)
        self.pool = nn.AdaptiveAvgPool2d((1,1))

        self.study_linear = nn.Sequential(
            nn.Linear(output_dim, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0),
            nn.LeakyReLU(0.1),
            nn.Linear(256, 1)
        )
        self.image_linear = nn.Sequential(
            nn.Linear(output_dim, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0),
            nn.LeakyReLU(0.1),
            nn.Linear(256, 1)
        )
        
        
    def forward(self, x): 
        features = self.encoder(x) 
        xseg = self.decoder(*features)
        xseg = self.upsample(xseg)
        xseg = self.seg_head(xseg) # (bs, 1, 384, 384)
        xseg = xseg.squeeze(1)

        pool = self.hidden_layer(features[-1])

        study_logit = self.study_linear(pool)
        image_logit = self.image_linear(pool)
        return study_logit, image_logit, xseg



## Load Models

In [13]:
models_seg_all = []

for seg_config in seg_configs:
    
    models_seg = []
    
    n_blocks = 4
    for fold in range(5):
        model = TimmSegModel(seg_config.backbone, pretrained=False)
        model = convert_3d(model)
        model = model.to(device)
        load_model_file = os.path.join(seg_config.model_dir_seg, f'{seg_config.backbone}/{seg_config.kernel_type}_fold{fold}_best.pth')
        ###
        # sd = torch.load(load_model_file, map_location='cpu')
        sd = torch.load(load_model_file)
        ###
        if 'model_state_dict' in sd.keys():
            sd = sd['model_state_dict']
        sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
        model.load_state_dict(sd, strict=True)
        model.eval()
        models_seg.append(model)
    
    models_seg_all.append(models_seg)
    
del models_seg, model, load_model_file, sd
gc.collect()
    
print('models_seg_all :', len(models_seg_all), '/', len(models_seg_all[0]))

models_seg_all : 1 / 5


In [14]:
models_organ_1, models_bowel_1 = [], []
models_organ_2, models_bowel_2 = [], []

for i, ob_config in zip(range(1,3), [OBCFG1, OBCFG2]):
    
    models_organ, models_bowel = [], []
    
    # organ
    for fold in range(ob_config.folds):
        model = TimmModel(ob_config.organ_backbone, pretrained=False)
        load_model_file = os.path.join(ob_config.organ_model_dir_cls, f'{ob_config.organ_kernel_type}_fold{fold}_best.pth')
        sd = torch.load(load_model_file)
        if 'model_state_dict' in sd.keys():
            sd = sd['model_state_dict']
        sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
        model.load_state_dict(sd, strict=True)
        model = model.to(device)
        model.eval()
        models_organ.append(model)
    
    # bowel
    for fold in range(ob_config.folds):
        model = TimmBowelModel(ob_config.bowel_backbone, pretrained=False)
        load_model_file = os.path.join(ob_config.bowel_model_dir_cls, f'{ob_config.bowel_kernel_type}_fold{fold}_best.pth')
        sd = torch.load(load_model_file)
        if 'model_state_dict' in sd.keys():
            sd = sd['model_state_dict']
        sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
        model.load_state_dict(sd, strict=True)
        model = model.to(device)
        model.eval()
        models_bowel.append(model)
    
    if i == 1:
        models_organ_1, models_bowel_1 = models_organ, models_bowel
    if i == 2:
        models_organ_2, models_bowel_2 = models_organ, models_bowel
         
    del models_organ, models_bowel, model, sd
    gc.collect()
    torch.cuda.empty_cache()

print('1:', len(models_organ_1), len(models_bowel_1))
print('2:', len(models_organ_2), len(models_bowel_2))
# print('total:', len(models_organ_1), len(models_bowel_1))
gc.collect()
torch.cuda.empty_cache()

1: 5 5
2: 5 5


## Predict

In [15]:
display(df.head(), df.shape)

Unnamed: 0,patient_id,series_id,aortic_hu,psid,image_folder
0,48843,295,401.25,48843_295,/kaggle/input/rsna-2023-abdominal-trauma-detec...
1,48843,62825,238.0,48843_62825,/kaggle/input/rsna-2023-abdominal-trauma-detec...
2,50046,24574,149.0,50046_24574,/kaggle/input/rsna-2023-abdominal-trauma-detec...
3,50046,60658,352.0,50046_60658,/kaggle/input/rsna-2023-abdominal-trauma-detec...
4,63706,39279,219.0,63706_39279,/kaggle/input/rsna-2023-abdominal-trauma-detec...


(6, 5)

In [16]:
def main():
    def load_bone(msk, cid, t_paths, config, cropped_images):
        """
            t_paths : inverse or not
            x,y,z : segmented mask
            xx, yy, zz : cropped
        """
        
        n_scans = len(t_paths)
        bone = []

        msk_size = 128

        try:
            msk_b = msk[cid] > 0.2
            msk_c = msk[cid] > 0.05

            x = np.where(msk_b.sum(1).sum(1) > 0)[0]
            y = np.where(msk_b.sum(0).sum(1) > 0)[0]
            z = np.where(msk_b.sum(0).sum(0) > 0)[0]

            if len(x) == 0 or len(y) == 0 or len(z) == 0:
                x = np.where(msk_c.sum(1).sum(1) > 0)[0]
                y = np.where(msk_c.sum(0).sum(1) > 0)[0]
                z = np.where(msk_c.sum(0).sum(0) > 0)[0]

            x1, x2 = max(0, x[0] - 1), min(msk.shape[1], x[-1] + 1)
            y1, y2 = max(0, y[0] - 1), min(msk.shape[2], y[-1] + 1)
            z1, z2 = max(0, z[0] - 1), min(msk.shape[3], z[-1] + 1)
            zz1, zz2 = int(z1 / 128 * n_scans), int(z2 / 128 * n_scans)
            # z1 / 128 * 384 = z1 * 3

            if cid != 4:
                inds = np.linspace(zz1 ,zz2-1, config.n_slice_per_c).astype(int) # 15 slices
                inds_ = np.linspace(z1 ,z2-1, config.n_slice_per_c).astype(int)
            else: # bowel : want all slices
                inds = np.linspace(zz1 ,zz2-1, config.n_slice_per_c * config.mul).astype(int) # 30 slices
                inds_ = np.linspace(z1 ,z2-1, config.n_slice_per_c * config.mul).astype(int)


            if 0:
                print(f"{cid} | x {x1} {x2} | y {y1} {y2} | z {z1} {z2} | zz {zz1} {zz2} | n_scans {n_scans}")
                print('inds  :', inds)
                print('inds_ :',inds_)



            for sid, (ind, ind_) in enumerate(zip(inds, inds_)):

                msk_this = msk[cid, :, :, ind_]

                images = []
                for i in range(-config.n_ch//2+1, config.n_ch//2+1):
                    try:
                        if config.slice_type == 1:
                            image = dicomsdl_read_one(t_paths[ind+1], 'cls') # uint8 (0,255) (512, 512) (w/o resize) ############################################################## prob
                        elif config.slice_type == 2:
                            image = dicomsdl_read_one(t_paths[ind+i], 'cls') 
                        
                        images.append(image)
                    except:
                        # print(f'cid {cid} sid {sid} ch {i} except')
                        images.append(np.zeros((512, 512)))

                data = np.stack(images, -1)
                msk_this = msk_this[x1:x2, y1:y2]

                xx1 = int(x1 / msk_size * data.shape[0])
                xx2 = int(x2 / msk_size * data.shape[0])
                yy1 = int(y1 / msk_size * data.shape[1])
                yy2 = int(y2 / msk_size * data.shape[1])

                data = data[xx1:xx2, yy1:yy2]

                data = np.stack(
                    [cv2.resize(data[:, :, i], (config.image_size_cls, config.image_size_cls), 
                                interpolation = cv2.INTER_LINEAR) 
                        for i in range(5)], -1
                )

                msk_this = (msk_this * 255).astype(np.uint8)
                msk_this = cv2.resize(msk_this, (config.image_size_cls, config.image_size_cls), interpolation = cv2.INTER_LINEAR)

                data = np.concatenate([data, msk_this[:, :, np.newaxis]], -1)

                bone.append(torch.tensor(data))

        except:
            for sid in range(config.n_slice_per_c):
                bone.append(torch.ones((config.image_size_cls, config.image_size_cls, 5+1)).int())

        cropped_images[cid] = torch.stack(bone, 0)


    def load_cropped_images(msk, t_paths, config, n_ch=5):
        for cid in range(5):
            threads[cid] = threading.Thread(target=load_bone, args=(msk, cid, t_paths, config, cropped_images))
            threads[cid].start()
        for cid in range(5):
            threads[cid].join()

        return torch.cat(cropped_images, 0)

    
    ################################# predict organs ##################################################################
    
    liver_outputs = []
    spleen_outputs = []
    kidney_outputs = []
    bowel_outputs = []
    
    dataset_seg = SegTestDataset(df) # (3, 128, 128, 128)
    
    loader_seg = torch.utils.data.DataLoader(
        dataset_seg, 
        batch_size=1, 
        shuffle=False, 
        num_workers=CFG.num_workers
    )

    # bs 1 / by psid level

    d = {}
    
    with torch.no_grad():
        for batch_id, (images, t_paths, psid) in tqdm(enumerate(loader_seg), total=len(loader_seg)):
            images = images.cuda()
            

            t_paths = np.array(t_paths)
            if len(t_paths.shape) != 1:
                t_paths = t_paths.squeeze(-1)
            
            d[psid[0]] = t_paths

            ####### 1) Segmentation #######
            pred_masks = []
            for models_seg in models_seg_all:
                pred_mask = []
                for i, model in enumerate(models_seg):

                    pmask = model(images).sigmoid().float().detach() # (1, 5, 128, 128, 128)
                    pred_mask.append(pmask)

                pred_mask = torch.stack(pred_mask, 0).mean(0).cpu().numpy()
                pred_masks.append(pred_mask)
            pred_masks = np.stack(pred_masks, 0).mean(0) # (1, 5, 128, 128, 128)

            del images
            gc.collect()

            ####### 2) Build cls input #######
            
            _liver_outputs = []
            _spleen_outputs = []
            _kidney_outputs = []
            _bowel_outputs = []
            
            
            for cls_config, cls_organ_models, cls_bowel_models in zip([OBCFG1, OBCFG2], 
                                                                      [models_organ_1, models_organ_2], 
                                                                      [models_bowel_1, models_bowel_2]):
        
                cls_inp = []
                threads = [None] * 5
                cropped_images = [None] * 5

                for i in range(pred_masks.shape[0]): # 1
                    row = dataset_seg.df.iloc[batch_id * CFG.batch_size_seg + i]
                    cropped_images = load_cropped_images(pred_masks[i], t_paths, cls_config) # bigger size
                    cls_inp.append(cropped_images.permute(0, 3, 1, 2).float() / 255.)
                cls_inp = torch.stack(cls_inp, 0).to(device)  # e.g. (1, 105, 6, 224, 224)

                del threads, cropped_images
                gc.collect()


                ####### 2-1) organ models #######
                liver_preds, spleen_preds, kidney_preds = [], [], []

                liver = cls_inp[0,0:15,:,:,:]
                spleen = cls_inp[0,15:30,:,:,:]
                l_kidney = cls_inp[0,30:45,:,:,:]
                r_kidney = cls_inp[0,45:60,:,:,:]

                kidney = []
                for ind in range(15):
                    _kidney = torch.cat(
                        [
                            F.interpolate(l_kidney[ind].unsqueeze(0), (l_kidney.shape[-2], l_kidney.shape[-1]//2), mode='bilinear'),
                            F.interpolate(r_kidney[ind].unsqueeze(0), (r_kidney.shape[-2], r_kidney.shape[-1]//2), mode='bilinear')
                        ], dim=-1)
                    kidney.append(_kidney)
                kidney = torch.cat(kidney, dim=0)

                for i, model in enumerate(cls_organ_models):

                    liver_logits = model(liver.unsqueeze(0))
                    spleen_logits = model(spleen.unsqueeze(0))
                    kidney_logits = model(kidney.unsqueeze(0))

                    liver_preds.append(F.softmax(liver_logits, dim=-1).detach().cpu().numpy())
                    spleen_preds.append(F.softmax(spleen_logits, dim=-1).detach().cpu().numpy())
                    kidney_preds.append(F.softmax(kidney_logits, dim=-1).detach().cpu().numpy())
                
                del liver, spleen, l_kidney, r_kidney, kidney
                gc.collect()
                
                ####### 2-2) bowel models #######
                bowel_preds = []

                bowel = cls_inp[0,60:,:,:,:]
                
                for i, model in enumerate(cls_bowel_models):
                    bowel_logits, _ = model(bowel.unsqueeze(0))
                    bowel_preds.append(bowel_logits.sigmoid().detach().cpu().numpy())

                del bowel
                gc.collect()
                
                _liver_outputs.append(np.mean(liver_preds, axis=0)) # append (1,15,3)
                _spleen_outputs.append(np.mean(spleen_preds, axis=0))
                _kidney_outputs.append(np.mean(kidney_preds, axis=0))
                _bowel_outputs.append(np.mean(bowel_preds))

            liver_outputs.append(_liver_outputs[0] * 0.8 + _liver_outputs[1] * 0.2)
            spleen_outputs.append(_spleen_outputs[0] * 0.8 + _spleen_outputs[1] * 0.2)
            kidney_outputs.append(_kidney_outputs[0] * 0.8 + _kidney_outputs[1] * 0.2)
            bowel_outputs.append(_bowel_outputs[0] * 0.8 + _bowel_outputs[1] * 0.2)        
    # output : (n, 15, 3)
    return liver_outputs, spleen_outputs, kidney_outputs, bowel_outputs, d
            
if __name__ == '__main__':
    if not DEBUG and not fast_sub:
    # if DEBUG:
        liver_outputs, spleen_outputs, kidney_outputs, bowel_outputs, d = main()

        del models_organ_1, models_bowel_1, models_organ_2, models_bowel_2, models_seg_all
        gc.collect()
        torch.cuda.empty_cache()

## Extra

In [17]:
# seghead extra model
if 1:
    models_feat_1, models_seq_1 = [], []

    for i, extra_config in zip(range(1,2), [ExtraCFG1]):

        models_feat, models_seq = [], []

        # feature
        for fold in range(5):
            model = TimmFeatExtractorSeg(extra_config.feature_backbone, (256, 128, 64, 32, 16), pretrained=False)
            model = model.to(device)
            load_model_file = os.path.join(extra_config.feature_model_dir, f'{extra_config.feature_kernel_type}', f'extra-feat-sliding-seg_fold{fold}_best.pth')
            sd = torch.load(load_model_file)
            # sd = torch.load(load_model_file, map_location='cpu')
            if 'model_state_dict' in sd.keys():
                sd = sd['model_state_dict']
            sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
            model.load_state_dict(sd, strict=True)
            model.eval()
            models_feat.append(model)

        # sequence
        for fold in range(5):
            model = SeqGRUModel(extra_config)
            model = model.to(device)
            load_model_file = os.path.join(extra_config.sequence_model_dir, f'{extra_config.sequence_kernel_type}',f'extra-feat-sliding-seg-seq_fold{fold}_best.pth')
            sd = torch.load(load_model_file)
            if 'model_state_dict' in sd.keys():
                sd = sd['model_state_dict']
            sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
            model.load_state_dict(sd, strict=True)
            model.eval()
            models_seq.append(model)

        if i == 1:
            models_feat_1, models_seq_1 = models_feat, models_seq

        del models_feat, models_seq, model, sd
        gc.collect()
        torch.cuda.empty_cache()
    
    print(1, len(models_feat_1), len(models_seq_1))

1 5 5


In [18]:
# seghead extra model EfficientNet
if 0:
    models_feat_2, models_seq_2 = [], []

    for i, extra_config in zip(range(1,2), [ExtraCFG2]):

        models_feat, models_seq = [], []

        # feature
        for fold in range(5):
            model = TimmEffFeatExtractorSeg(extra_config.feature_backbone, (256, 160, 64, 48, 24), pretrained=False)
            model = model.to(device)
            load_model_file = os.path.join(extra_config.feature_model_dir, f'{extra_config.feature_kernel_type}', f'extra-feat-sliding-seg_fold{fold}_best.pth')
            sd = torch.load(load_model_file)
            # sd = torch.load(load_model_file, map_location='cpu')
            if 'model_state_dict' in sd.keys():
                sd = sd['model_state_dict']
            sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
            model.load_state_dict(sd, strict=True)
            model.eval()
            models_feat.append(model)

        # sequence
        for fold in range(5):
            model = SeqGRUModel(extra_config)
            model = model.to(device)
            load_model_file = os.path.join(extra_config.sequence_model_dir, f'{extra_config.sequence_kernel_type}',f'extra-feat-sliding-seg-seq_fold{fold}_best.pth')
            sd = torch.load(load_model_file)
            if 'model_state_dict' in sd.keys():
                sd = sd['model_state_dict']
            sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
            model.load_state_dict(sd, strict=True)
            model.eval()
            models_seq.append(model)

        if i == 1:
            models_feat_2, models_seq_2 = models_feat, models_seq

        del models_feat, models_seq, model, sd
        gc.collect()
        torch.cuda.empty_cache()
    
    print(2, len(models_feat_2), len(models_seq_2))

In [19]:
def load_extra(t_paths, config, slide=5):
    indices = [i for i in range(2,len(t_paths)-2) if i%slide == 2]
    imgs = []
    for i in indices:
        x0 = dicomsdl_read_one(t_paths[i-2], 'extra')
        x1 = dicomsdl_read_one(t_paths[i-1], 'extra')
        x2 = dicomsdl_read_one(t_paths[i],   'extra') # (0,1) (512, 512)
        x3 = dicomsdl_read_one(t_paths[i+1], 'extra')
        x4 = dicomsdl_read_one(t_paths[i+2], 'extra')

        x0 = np.expand_dims(x0, axis=2)
        x1 = np.expand_dims(x1, axis=2)
        x2 = np.expand_dims(x2, axis=2)
        x3 = np.expand_dims(x3, axis=2)
        x4 = np.expand_dims(x4, axis=2)
        img = np.concatenate([x0, x1, x2, x3, x4], axis=-1) # (512, 512, 5)
                
        img = cv2.resize(
            img, 
            (config.image_size_extra, config.image_size_extra), interpolation=cv2.INTER_LINEAR
        ) # (384, 384, 5)
        
        imgs.append(torch.tensor(img.transpose(2,0,1)).float())
        
    imgs = torch.stack(imgs, 0) # (len, 5, 384, 384)
    
    return imgs    

class ExtraTestDataset(Dataset):
    def __init__(self, df, d, config):
        self.df = df.reset_index()
        self.d = d
        self.config = config

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        row = self.df.iloc[index]
        psid = str(row['psid'])
        t_paths = self.d[psid]
        images = load_extra(t_paths, self.config, slide=5)
        return torch.tensor(images).float(), list(t_paths) # (len, 5, 224, 224)
    

In [20]:
def extra_main_seghead(df, d, config, models_feat, models_seq):
    extra_bs = config.extra_bs
    m_size = config.m_size
    
    extra_outputs = []

    extra_ds = ExtraTestDataset(df, d, config)
    loader_extra = torch.utils.data.DataLoader(extra_ds, batch_size=1, shuffle=False, num_workers=CFG.num_workers)

    with torch.no_grad():
        for batch_id, (imgs, t_paths) in tqdm(enumerate(loader_extra), total=len(loader_extra)):
            imgs = imgs[0]
            
            # make group for batch input
            group, remain = imgs.shape[0]//extra_bs, imgs.shape[0]%extra_bs

            embs = []
            for g in range(group):
                img_batch = imgs[g * extra_bs : g * extra_bs + extra_bs]
                if len(img_batch.shape) < 4:
                    img_batch = torch.unsqueeze(img_batch, dim=0)
                    
                _embs = []
                for i, model in enumerate(models_feat):

#                     if i == fold:
                    if config.forward_type == 1:
                        emb = model.encoder(img_batch.cuda())
                        emb = model.pool(emb[-1]).squeeze().squeeze() # (bs, emb_size)
                    elif config.forward_type == 2:
                        emb = model.encoder(img_batch.cuda())
                        emb = model.hidden_layer(emb[-1])
                    _embs.append(emb)

                _embs = torch.stack(_embs, 0) # (n_models, bs, emb_size)
                _embs = torch.mean(_embs, 0) # (bs, emb_size)
                embs.append(_embs)

            if remain != 0:
                img_batch = imgs[group * extra_bs:]
                if len(img_batch.shape) < 4:
                    img_batch = torch.unsqueeze(img_batch, dim=0)
                _embs = []
                for i, model in enumerate(models_feat):

#                     if i == fold:
                    if config.forward_type == 1:
                        emb = model.encoder(img_batch.cuda())
                        emb = model.pool(emb[-1]).squeeze().squeeze() # (bs, emb_size)
                    elif config.forward_type == 2:
                        emb = model.encoder(img_batch.cuda())
                        emb = model.hidden_layer(emb[-1])
                    _embs.append(emb)

                _embs = torch.stack(_embs, 0) # (n_models, bs, emb_size)
                if len(_embs.shape) == 2:
                    _embs = _embs.unsqueeze(1)
                _embs = torch.mean(_embs, 0) # (bs, emb_size)
                embs.append(_embs)

            embs = torch.cat(embs).detach().cpu().numpy() # (seq_len, emb_size)
            
            del _embs, img_batch, imgs
            gc.collect()

            # make data for seq model
            if len(embs) < m_size:
                pad_sz = (m_size - len(embs))
                pad = np.zeros((pad_sz, embs.shape[-1]))
                mask = np.concatenate([np.zeros(pad_sz,), np.ones(len(embs),)])
                embs = np.concatenate([pad, embs])
            else: # resize
                embs = cv2.resize(embs, (embs.shape[-1], m_size), interpolation=cv2.INTER_LINEAR)
                mask = np.ones(len(embs),)

            diff1 = np.zeros_like(embs)
            diff1[1:] = embs[1:] - embs[:-1]
            diff2 = np.zeros_like(embs)
            diff2[:-1] = embs[:-1] - embs[1:]

            embs = np.concatenate([embs, diff1, diff2], axis=-1)

            embs = torch.tensor(embs).float().unsqueeze(0) # (m_size, emb_dim * 3) (1, 192, 6144)
            mask = torch.tensor(mask).float().unsqueeze(0) # (m_size)

            ####### 3-2) sequence model #######
            extra_preds = []
            for i, model in enumerate(models_seq):

#                 if i == fold:
                logit, _ = model(embs.to(device), mask.to(device))
                extra_preds.append(logit.sigmoid().detach().cpu().numpy())
            
            # print('extra seq models done')

            extra_outputs.append(np.mean(extra_preds))
            del embs, mask, diff1, diff2, extra_preds
            gc.collect()
            
    return extra_outputs
            
if __name__ == "__main__":
    if not DEBUG and not fast_sub:
    # if DEBUG:
        extra_outputs_1 = extra_main_seghead(df, d, ExtraCFG1, models_feat_1, models_seq_1)
        # extra_outputs_2 = extra_main_seghead(df, d, ExtraCFG2, models_feat_2, models_seq_2)
        # del ExtraCFG1, models_feat_1, models_seq_1, ExtraCFG2, models_feat_2, models_seq_2
        del ExtraCFG1, models_feat_1, models_seq_1
        # extra_outputs = np.array(extra_outputs_1) * 0.5 + np.array(extra_outputs_2) * 0.5
        extra_outputs = extra_outputs_1
        gc.collect()

In [21]:
if not DEBUG and not fast_sub:
# if DEBUG:
    liver_outputs = np.mean(np.stack(liver_outputs), axis=-2).squeeze(1)
    spleen_outputs = np.mean(np.stack(spleen_outputs), axis=-2).squeeze(1)
    kidney_outputs = np.mean(np.stack(kidney_outputs), axis=-2).squeeze(1)
    bowel_outputs = np.stack(bowel_outputs)
    extra_outputs = np.stack(extra_outputs)

    tmp = pd.DataFrame({'psid':df['psid'].values})
    tmp['patient_id'] = tmp['psid'].apply(lambda x: int(x.split('_')[0]))
    tmp['series_id'] = tmp['psid'].apply(lambda x: int(x.split('_')[1]))
    tmp['bowel_injury'] = bowel_outputs
    tmp['bowel_healthy'] = 1-tmp['bowel_injury']
    tmp['extravasation_injury'] = extra_outputs * 3
    tmp['extravasation_healthy'] = 1-extra_outputs
    tmp[['kidney_healthy','kidney_low','kidney_high']] = kidney_outputs
    tmp[['spleen_healthy','spleen_low','spleen_high']] = spleen_outputs
    tmp[['liver_healthy','liver_low','liver_high']] = liver_outputs
    tmp = tmp.drop(columns=['psid','series_id'])
    
    sub = tmp.groupby('patient_id').mean().reset_index()

    sub.to_csv("submission.csv", index=False)
    print('sub')
    
elif not DEBUG and fast_sub:
    print('fast_sub')
    sub.to_csv("submission.csv", index=False)

fast_sub


In [22]:
sub

Unnamed: 0,patient_id,bowel_healthy,bowel_injury,extravasation_healthy,extravasation_injury,kidney_healthy,kidney_low,kidney_high,liver_healthy,liver_low,liver_high,spleen_healthy,spleen_low,spleen_high
0,48843,0.5,0.5,0.5,0.5,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333
1,50046,0.5,0.5,0.5,0.5,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333
2,63706,0.5,0.5,0.5,0.5,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333
