In [None]:
# !conda install -c conda-forge gdcm -y -q
!pip install ../input/pytorchcv/pytorchcv-0.0.55-py2.py3-none-any.whl --quiet

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import cv2
from tqdm import tqdm

import pydicom
from sklearn.cluster import KMeans
from skimage import morphology, measure
from scipy.ndimage.interpolation import zoom

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda import amp

import warnings

import random
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

In [None]:
def crop(image):
    mid_img = image[int(image.shape[0] / 2)]

    same_first_row = (mid_img[0, :] == mid_img[0, 0]).all()
    same_first_col = (mid_img[:, 0] == mid_img[0, 0]).all()
    if same_first_col and same_first_row:
        pass
    else:
        return image
    
    r_min, r_max = None, None
    c_min, c_max = None, None
    for row in range(mid_img.shape[0]):
        if not (mid_img[row, :] == mid_img[0, 0]).all() and r_min is None:
            r_min = row
        if (mid_img[row, :] == mid_img[0, 0]).all() and r_max is None \
                and r_min is not None:
            r_max = row
            break

    for col in range(mid_img.shape[1]):
        if not (mid_img[:, col] == mid_img[0, 0]).all() and c_min is None:
            c_min = col
        if (mid_img[:, col] == mid_img[0, 0]).all() and c_max is None \
                and c_min is not None:
            c_max = col
            break
#     print(r_min, r_max, c_min, c_max)
    image = image[:, r_min:r_max, c_min:c_max]
    return image

def resize(image, shape=(40,256,256)):
    resize_factor = np.array(shape) / np.array(image.shape)
    image = zoom(image, resize_factor, mode='nearest')
    return image

def window(img, WL=50, WW=350):
    upper, lower = WL+WW//2, WL-WW//2
    X = np.clip(img.copy(), lower, upper)
    X = X - np.min(X)
    X = X / np.max(X)
    X = (X*255.0).astype('uint8')
    return X

In [None]:
import glob
import re

def load_scan(path):
    paths = glob.glob(path+'/*.dcm')
    paths.sort(key=lambda f: int(re.sub('\D', '', f)))
    if len(paths) < 51:
        paths = paths[:50]
#         paths = paths[10:-10]
    slices = [pydicom.read_file(p) for p in paths]
    try:
        slices.sort(key=lambda x: float(x.ImagePositionPatient[2]))
    except:
        pass
    
    image = np.stack([s.pixel_array.astype(float) for s in slices])
    return image, slices[0], slices

In [None]:
IN_TRAIN = '../input/osic-pulmonary-fibrosis-progression/test/'
paths = os.listdir(IN_TRAIN)
paths.sort()

In [None]:
train = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/train.csv')
train.drop_duplicates(keep=False, inplace=True, subset=['Patient','Weeks'])
sub = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/sample_submission.csv')
test = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/test.csv')

sub['Patient'] = sub['Patient_Week'].apply(lambda x:x.split('_')[0])
sub['Weeks'] = sub['Patient_Week'].apply(lambda x: int(x.split('_')[-1]))
sub =  sub[['Patient','Weeks','Confidence','Patient_Week']]
sub = sub.merge(test.drop('Weeks', axis=1), on="Patient")
# sub.head()

sub['WHERE'] = 'test'
test['WHERE'] = 'sub'
train['WHERE'] = 'train'

data = train.append([test, sub])

In [None]:
# data = sub.copy()
data['min_week'] = data['Weeks']
data.loc[data.WHERE=='test','min_week'] = np.nan
data['min_week'] = data.groupby('Patient')['min_week'].transform('min')

base = data.loc[data.Weeks == data.min_week]
base = base[['Patient','FVC']].copy()
base.columns = ['Patient','min_FVC']
base['nb'] = 1
base['nb'] = base.groupby('Patient')['nb'].transform('cumsum')
base = base[base.nb==1]
base.drop('nb', axis=1, inplace=True)

data = data.merge(base, on='Patient', how='left')
data['base_week'] = data['Weeks'] - data['min_week']
del base

data['age'] = (data['Age'] - data['Age'].min() ) / ( data['Age'].max() - data['Age'].min() )
data['BASE'] = (data['min_FVC'] - data['min_FVC'].min() ) / ( data['min_FVC'].max() - data['min_FVC'].min() )
data['week'] = (data['base_week'] - data['base_week'].min() ) / ( data['base_week'].max() - data['base_week'].min() )
data['percent'] = (data['Percent'] - data['Percent'].min() ) / ( data['Percent'].max() - data['Percent'].min() )

COLS = ['Sex','SmokingStatus'] #,'Age'
FE = []
FE += ['Patient']
for col in COLS:
    for mod in data[col].unique():
        FE.append(mod)
        data[mod] = (data[col] == mod).astype(int)
FE += ['age','percent','week','BASE', 'WHERE']

meta_df = data[FE]
meta_df['fold'] = 0
meta_df = meta_df[meta_df.WHERE=='test']
# meta_df = meta_df[meta_df.WHERE=='train']
meta_df.head()

In [None]:
def get_tab(df):
    vector = [(df.Age.values[0] - 30) / 30] 
    
    if df.Sex.values[0] == 'male':
       vector.append(0)
    else:
       vector.append(1)
    
    if df.SmokingStatus.values[0] == 'Never smoked':
        vector.extend([0,0])
    elif df.SmokingStatus.values[0] == 'Ex-smoker':
        vector.extend([1,1])
    elif df.SmokingStatus.values[0] == 'Currently smokes':
        vector.extend([0,1])
    else:
        vector.extend([1,0])
    return np.array(vector) 

targets = []
tab = []
P = []
for i, p in enumerate(test.Patient.unique()):
    sub = test.loc[test.Patient == p, :] 
    
    tab.append(get_tab(sub))

patients = pd.DataFrame({'Patient': test.Patient.unique(), 'target': 0, 'meta': tab, 'fold': 0})
patients.head()

In [None]:
train['fold'] = 0

In [None]:
from pytorchcv.model_provider import get_model

class FCN(torch.nn.Module):
  def __init__(self, base, in_f, num_classes, in_meta, dropout=True):
    super(FCN, self).__init__()
    self.base = base
    self.after_model = nn.Sequential(
        nn.Flatten(),
        nn.Dropout(0.5)
    )
    self.meta = nn.Sequential(
        nn.Linear(in_meta, 100),
        # nn.BatchNorm1d(100),
        nn.ReLU(),
        # nn.Dropout(0.8),
        nn.Linear(100, 100),
        # nn.BatchNorm1d(100),
        nn.ReLU(),
        # nn.Dropout(0.5)

#         nn.Linear(in_meta, 1024),
#         # nn.BatchNorm1d(1024),
#         nn.ReLU(),
#         # nn.Dropout(0.8),
#         nn.Linear(1024, 512),
#         # nn.BatchNorm1d(512),
#         nn.ReLU(),
#         # nn.Dropout(0.5)
    )
    self.classification_meta = nn.Sequential(
        # nn.Linear(in_f+100, 1024),
        # nn.BatchNorm1d(1024),
        # nn.ReLU(),
        # nn.Dropout(0.5),
        # nn.Linear(1024, num_classes)
        nn.Linear(in_f+100, num_classes)
#         nn.Linear(in_f+512, num_classes)
    )
    self.classification = nn.Sequential(
        # nn.Linear(in_f+100, 1024),
        # nn.BatchNorm1d(1024),
        # nn.ReLU(),
        # nn.Dropout(0.5),
        # nn.Linear(1024, num_classes)
        nn.Linear(in_f, num_classes)
    )
    self.meta_head = nn.Linear(100, num_classes)
  
  def forward(self, x, meta):
    x = self.base(x)
    x = self.after_model(x)
    meta = self.meta(meta)
    features = torch.cat((x,meta),dim=1)
    x = self.classification_meta(features)
    # x = self.classification(x)
    return x

def create_model(name, path=None):
    model = get_model(name, pretrained=False)
    
    try:
      features = list(model.children())[-1].in_features
    except:
      features = list(model.children())[-1][-1].in_features
    model = nn.Sequential(*list(model.children())[:-1]) # Remove original output layer
    model[0].final_pool = nn.Sequential(nn.AdaptiveAvgPool2d(1))
    # model[-1] = nn.Sequential(nn.AdaptiveAvgPool2d(1))
    model = FCN(model, features, config.n_seg_classes, config.meta_features, dropout=True)

    if path:
      print ('loading pretrained model {}'.format(path))
      pretrain = torch.load(path)['model_state']
      model.load_state_dict(pretrain)

    return model

In [None]:
from pytorchcv.model_provider import get_model

class FCN2(torch.nn.Module):
  def __init__(self, base, in_f, num_classes, in_meta, dropout=True):
    super(FCN2, self).__init__()
    self.base = base
    self.after_model = nn.Sequential(
        nn.Flatten(),
        nn.Dropout(0.5)
    )
    self.meta = nn.Sequential(
#         nn.Linear(in_meta, 100),
#         # nn.BatchNorm1d(100),
#         nn.ReLU(),
#         # nn.Dropout(0.8),
#         nn.Linear(100, 100),
#         # nn.BatchNorm1d(100),
#         nn.ReLU(),
#         # nn.Dropout(0.5)

        nn.Linear(in_meta, 1024),
        # nn.BatchNorm1d(1024),
        nn.ReLU(),
        # nn.Dropout(0.8),
        nn.Linear(1024, 512),
        # nn.BatchNorm1d(512),
        nn.ReLU(),
        # nn.Dropout(0.5)
    )
    self.classification_meta = nn.Sequential(
        # nn.Linear(in_f+100, 1024),
        # nn.BatchNorm1d(1024),
        # nn.ReLU(),
        # nn.Dropout(0.5),
        # nn.Linear(1024, num_classes)
#         nn.Linear(in_f+100, num_classes)
        nn.Linear(in_f+512, num_classes)
    )
    self.classification = nn.Sequential(
        # nn.Linear(in_f+100, 1024),
        # nn.BatchNorm1d(1024),
        # nn.ReLU(),
        # nn.Dropout(0.5),
        # nn.Linear(1024, num_classes)
        nn.Linear(in_f, num_classes)
    )
    self.meta_head = nn.Linear(100, num_classes)
  
  def forward(self, x, meta):
    x = self.base(x)
    x = self.after_model(x)
    meta = self.meta(meta)
    features = torch.cat((x,meta),dim=1)
    x = self.classification_meta(features)
    # x = self.classification(x)
    return x

def create_model2(name, path=None):
    model = get_model(name, pretrained=False)
    
    try:
      features = list(model.children())[-1].in_features
    except:
      features = list(model.children())[-1][-1].in_features
    model = nn.Sequential(*list(model.children())[:-1]) # Remove original output layer
    model[0].final_pool = nn.Sequential(nn.AdaptiveAvgPool2d(1))
    # model[-1] = nn.Sequential(nn.AdaptiveAvgPool2d(1))
    model = FCN2(model, features, config.n_seg_classes, config.meta_features, dropout=True)

    if path:
      print ('loading pretrained model {}'.format(path))
      pretrain = torch.load(path)['model_state']
      model.load_state_dict(pretrain)

    return model

In [None]:
class config:
    input_D = 30
    input_H = 512
    input_W = 512
    n_seg_classes = 3 # previously 1
#     meta_features = 4
    meta_features = 9
#     meta_features = 6
    seed = 42
    quantiles = (0.2, 0.5, 0.8)

nets=[]

# 6.88
model_name = 'resnet50'
model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld1-2 (2).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld2-2 (2).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld3-2 (2).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld4-2 (2).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld5-2 (2).pth') # 6.885
model.cuda()
nets.append(model)

# 6.890
model_name = 'resnet50'
model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld1-2 (4).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld2-2 (4).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld3-2 (4).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld4-2 (4).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld5-2 (4).pth') # 6.885
model.cuda()
nets.append(model)

# 6.9048
model_name = 'resnet50'
model = create_model2(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld1-2 (5).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model2(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld2-2 (5).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model2(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld3-2 (5).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model2(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld4-2 (5).pth') # 6.885
model.cuda()
nets.append(model)

model = create_model2(model_name, 
    path = '../input/osic-5fold-models2/resnet3d-fld5-2 (5).pth') # 6.885
model.cuda()
nets.append(model)

In [None]:
# err

In [None]:
import albumentations as A

transform = A.Compose([
    A.HorizontalFlip(p=1.0)
])

transform2 = A.Compose([
    A.VerticalFlip(p=1.0)
])

In [None]:
def convertToHU(image, data):
#     intercept = data.RescaleIntercept
#     slope = data.RescaleSlope
#     image = (image * slope + intercept).astype(np.int16)
    image = (image + data.RescaleIntercept) / (data.RescaleSlope)
    return image

A_test, B_test, P_test, WEEK = {},{},{},{}
last_pat = ''
o1=[]
model.eval()
with torch.no_grad():
    t = tqdm(meta_df.Patient.values)
    for i,patient in enumerate(t):
        meta_batch = torch.from_numpy(meta_df.values[i][1:-2][None].astype(np.float32)).cuda().float()
        if patient != last_pat:
            last_pat = patient

            patient_dir = '../input/osic-pulmonary-fibrosis-progression/test/' + patient
            imgs, meta, slices = load_scan(patient_dir)

            for i,img in enumerate(imgs):
                imgs[i] = convertToHU(img, meta)
            imgs = crop(imgs)
            imgs = resize(imgs, shape=(50,512,512))
            imgs = imgs[10:40]
            
            imgs1,imgs2,imgs3 = np.zeros((30,512,512,1)),np.zeros((30,512,512,1)),np.zeros((30,512,512,1))
            for i,img in enumerate(imgs):
                imgs1[i] = window(img, -600, 1500)[:,:,None]
                imgs2[i] = window(img, 100, 700)[:,:,None]
                imgs3[i] = window(img, 40, 400)[:,:,None]

            imgs_ = np.concatenate([imgs1, imgs2, imgs3], axis=-1)
            image_batch = []
            for i in range(1):
#                 imgs = imgs_[14+i]
                imgs = imgs_[15]
                imgs = np.rollaxis(imgs, -1, 0) / 255.
#                 imgs = imgs[::-1]
#                 imgs = imgs[None].copy()
                image_batch.append(imgs)
            # tta
#             for img in image_batch:
#                 image_batch.append(transform(image=img)['image'])
#                 image_batch.append(transform2(image=img)['image'])
                break
            image_batch = np.array(image_batch)
#             image_batch = torch.from_numpy(image_batch).cuda().float()

        p = p[0]
        
        o=[]
        for model in nets:
            for img in image_batch:
                img = img[None]
                img = torch.from_numpy(img).cuda().float()
                o.append(model(img, meta_batch).cpu().numpy())
#         o1=np.mean(np.array(o))
        for o in np.mean(np.array(o), axis=0):
            o1.append(o)
        
o1=np.array(o1)

In [None]:
sub = data[data.WHERE=='test'].copy()
sub['FVC'] = o1[:, 1]
sub['Confidence'] = o1[:, 2] - o1[:, 0]
subm = sub[['Patient_Week','FVC','Confidence']].copy()
subm.head()
# sub.head()

In [None]:
otest = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/test.csv')
for i in range(len(otest)):
    subm.loc[subm['Patient_Week']==otest.Patient[i]+'_'+str(otest.Weeks[i]), 'FVC'] = otest.FVC[i]
    subm.loc[subm['Patient_Week']==otest.Patient[i]+'_'+str(otest.Weeks[i]), 'Confidence'] = 0.1

In [None]:
sub = subm.copy()

In [None]:
sub.head()

In [None]:
# sub[["Patient_Week","FVC","Confidence"]].to_csv("submission.csv", index=False)
sub.to_csv("submission.csv", index=False)