In [None]:
image_size = 512
batch_size = 32
num_workers = 4

In [None]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import os
import sys
import time
import cv2
import PIL.Image
import random
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import albumentations
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import gc
from sklearn.metrics import roc_auc_score
%matplotlib inline
import seaborn as sns
from pylab import rcParams
import timm
from warnings import filterwarnings
from sklearn.preprocessing import LabelEncoder
import math
import glob
filterwarnings("ignore")

device = torch.device('cuda') 

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print(f'Setting all seeds to be {seed} to reproduce...')
seed_everything(42)

In [None]:
transforms_valid = albumentations.Compose([
    albumentations.Resize(image_size, image_size),
    albumentations.Normalize()
])

In [None]:
class RANZCRDataset(Dataset):
    def __init__(self, df, mode, transform=None):
        
        self.df = df.reset_index(drop=True)
        self.mode = mode
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        row = self.df.loc[index]
        img = cv2.imread(row.file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform is not None:
            res = self.transform(image=img)
            img = res['image']
                
        img = img.astype(np.float32)
        img = img.transpose(2,0,1)
        
        if self.mode == 'test':
            return torch.tensor(img).float()
        else:
            return torch.tensor(img).float(), torch.tensor(row.PatientID).float()

In [None]:
class ArcModule(nn.Module):
    def __init__(self, in_features, out_features, s=10, m=0):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_normal_(self.weight)

        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = torch.tensor(math.cos(math.pi - m))
        self.mm = torch.tensor(math.sin(math.pi - m) * m)

    def forward(self, inputs, labels):
        cos_th = F.linear(inputs, F.normalize(self.weight))
        cos_th = cos_th.clamp(-1, 1)
        sin_th = torch.sqrt(1.0 - torch.pow(cos_th, 2))
        cos_th_m = cos_th * self.cos_m - sin_th * self.sin_m
        # print(type(cos_th), type(self.th), type(cos_th_m), type(self.mm))
        cos_th_m = torch.where(cos_th > self.th, cos_th_m, cos_th - self.mm)

        cond_v = cos_th - self.th
        cond = cond_v <= 0
        cos_th_m[cond] = (cos_th - self.mm)[cond]

        if labels.dim() == 1:
            labels = labels.unsqueeze(-1)
        onehot = torch.zeros(cos_th.size()).cuda()
        labels = labels.type(torch.LongTensor).cuda()
        onehot.scatter_(1, labels, 1.0)
        outputs = onehot * cos_th_m + (1.0 - onehot) * cos_th
        outputs = outputs * self.s
        return outputs

In [None]:
class MetricLearningModel(nn.Module):

    def __init__(self, channel_size, out_feature, dropout=0.5, backbone='densenet121', pretrained=False):
        super(MetricLearningModel, self).__init__()
        self.backbone = timm.create_model(backbone, pretrained=pretrained)
        self.channel_size = channel_size
        self.out_feature = out_feature
        self.in_features = self.backbone.classifier.in_features
        self.margin = ArcModule(in_features=self.channel_size, out_features = self.out_feature)
        self.bn1 = nn.BatchNorm2d(self.in_features)
        self.dropout = nn.Dropout2d(dropout, inplace=True)
        self.fc1 = nn.Linear(self.in_features * 16 * 16 , self.channel_size)
        self.bn2 = nn.BatchNorm1d(self.channel_size)
        
    def forward(self, x, labels=None):
        features = self.backbone.features(x)
        features = self.bn1(features)
        features = self.dropout(features)
        features = features.view(features.size(0), -1)
        features = self.fc1(features)
        features = self.bn2(features)
        features = F.normalize(features)
        if labels is not None:
            return self.margin(features, labels)
        return features


In [None]:
model = MetricLearningModel(image_size, 30805)
model.load_state_dict(torch.load('../input/feature-extractor/dense121_feature_extractor.pth', map_location='cuda:0'))
model.to(device);

In [None]:
test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')
test['file_path'] = test.StudyInstanceUID.apply(lambda x: os.path.join('../input/ranzcr-clip-catheter-line-classification/test',x) + '.jpg')
dataset_test = RANZCRDataset(test, 'test', transform=transforms_valid)
test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

In [None]:
def generate_test_features(test_loader):
    model.eval()
    bar = tqdm(test_loader)
    
    FEAS = []
    TARGETS = []

    with torch.no_grad():
        for batch_idx, (images) in enumerate(bar):

            images = images.to(device)

            features = model(images)

            FEAS += [features.detach().cpu()]

    FEAS = torch.cat(FEAS).cpu().numpy()
    
    return FEAS

In [None]:
FEAS = generate_test_features(test_loader)
FEAS = torch.tensor(FEAS).cuda()

In [None]:
chestx_df = pd.read_csv('../input/data/Data_Entry_2017.csv')
chestx_df['file_path'] = sorted(glob.glob('../input/data/images_*/*/*'))

In [None]:
chestx_features = np.load('../input/chest-x-features/chest_x_features.npy')
chestx_features = torch.tensor(chestx_features).cuda()


In [None]:
idx = (FEAS@chestx_features.T).argmax(1)
test['chest_x_image'] = chestx_df.loc[idx.detach().cpu()]['Image Index'].values
test['chest_x_file_path'] = chestx_df.loc[idx.detach().cpu()]['file_path'].values

In [None]:
#ensemble offline csv
def get_df(model_type):
    csv_paths = glob.glob(f'../input/chestx-preds/*_{model_type}*')
    assert len(csv_paths) == 5, "num folds not equal 5"
    df = pd.read_csv(csv_paths[0])
    for col in df.columns.tolist()[1:]:
        mean = df[col].dropna().values.mean()
        df[col] = df[col].fillna(mean)

    for path in csv_paths[1:]:
        df1 = pd.read_csv(path)
        for col in df1.columns.tolist()[1:]:
            mean = df1[col].dropna().values.mean()
            df1[col] = df1[col].fillna(mean)
        df.iloc[:, 1:] += df1.iloc[:, 1:]
    df.iloc[:, 1:] /= len(csv_paths)
    return df

if 1:
    s1024_df = get_df('1024_f')
    s1024b_df = get_df('1024_b')
    b5_df = get_df('b5_f')
    b5b_df = get_df('b5_b_')
    sheep_df = get_df('sheep_')
    b7_df = get_df('b7_')
    sheep1_df = get_df('sheep1_')

    offline_df = b5_df.copy()
    offline_df.iloc[:, 1:] = 0.5*b5_df.iloc[:, 1:]**0.5 + 0.5*b5b_df.iloc[:, 1:]**0.5 \
            + 1*sheep_df.iloc[:, 1:]**0.5 + 0.5*sheep1_df.iloc[:, 1:]**0.5 \
            + 1*b7_df.iloc[:, 1:]**0.5  + 0.5*s1024_df.iloc[:, 1:]**0.5  + 0.5*s1024b_df.iloc[:, 1:]**0.5
    
    offline_df.iloc[:, 1:] /= 4.5
    
    #0.5-b5, 0.5-b5b, 1-s, 0.5-s1, 0.5-1024, 0.5-1024b, 1-b7

In [None]:
train_mapping_df = pd.read_csv('../input/ranzrc-offline/train_mapping.csv')

In [None]:
offline_df = offline_df.rename(columns={"StudyInstanceUID": "file_path"})

In [None]:
remove_cols = ['similarity_score', 'StudyInstanceUID', 'follow_up_number', 'chest_x_labels', 
 'chest_x_patient_age', 'chest_x_image', 'chest_x_patient_id', 'PatientID']
train_mapping_df = train_mapping_df.drop_duplicates(subset=['chest_x_image'])
train_mapping_df = train_mapping_df.drop(remove_cols, axis=1)
train_mapping_df['file_path'] = train_mapping_df['file_path'].apply(lambda x: x.replace('chestx/',''))

In [None]:
offline_df = pd.concat([offline_df, train_mapping_df])

In [None]:
test['file_path'] = test['chest_x_file_path']
test['file_path'] = test['file_path'].apply(lambda x: x.replace('../input/data/',''))

In [None]:
target_cols = ['ETT - Abnormal',
 'ETT - Borderline',
 'ETT - Normal',
 'NGT - Abnormal',
 'NGT - Borderline',
 'NGT - Incompletely Imaged',
 'NGT - Normal',
 'CVC - Abnormal',
 'CVC - Borderline',
 'CVC - Normal',
 'Swan Ganz Catheter Present']

In [None]:
test[target_cols] = offline_df.set_index('file_path').loc[test.file_path][target_cols].values

In [None]:
test = test.drop(['file_path', 'chest_x_image', 'chest_x_file_path'], axis=1)

In [None]:
test.to_csv('submission.csv', index=False)