In [None]:
package_paths = ['../input/timm-module']
import sys;
for pth in package_paths:
    sys.path.append(pth)

import os
import pandas as pd
import torch
import timm
import albumentations as A
import matplotlib.pyplot as plt
import wandb
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import cv2

from torch.utils.data import Dataset, DataLoader
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import multilabel_confusion_matrix
from timm import *
from fastai.vision.learner import _add_norm
from fastai.vision.all import *
from fastai.vision.learner import _update_first_layer
from fastai.callback.wandb import *

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if device == 'cuda:0':
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

In [None]:
class Config:
    seed_val = 111
    seed_everything(seed_val)
    fold_num = 0
    job = 1
    num_classes = 4
    input_dims = 512
    model_arch = 'tf_efficientnetv2_m_in21ft1k'
    batch_size = 3
    num_workers = 0
    kfold = 5
    loss_func = nn.BCEWithLogitsLoss()  # CrossEntropyLossFlat() or LabelSmoothingCrossEntropyFlat() for multi-class
    metrics = [accuracy_multi, RocAucMulti(average='macro'), F1ScoreMulti(average='macro')]
    job_name = f'{model_arch}_fold{fold_num}_job{job}'
    print("Job Name:", job_name)

    wandb_project = 'SIIM_classifier_public'
    wandb_run_name = job_name

    if str(device) == 'cuda:0':
        fp16 = True
    else:
        fp16 = False


cfg = Config()
config_dict = dict(vars(Config))
config_dict = {k:(v if type(v)==int else str(v)) for (k,v) in config_dict.items() if '__' not in k}

In [None]:
def create_timm_body(arch:str, pretrained=True, cut=None, n_in=3):
    "Creates a body from any model in the `timm` library."
    model = create_model(arch, pretrained=pretrained, num_classes=0, global_pool='')
    _update_first_layer(model, n_in, pretrained)
    if cut is None:
        ll = list(enumerate(model.children()))
        cut = next(i for i,o in reversed(ll) if has_pool_type(o))
    if isinstance(cut, int): return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut): return cut(model)
    else: raise NamedError("cut must be either integer or function")


def create_timm_model(arch: str, n_out, cut=None, pretrained=True, n_in=3,
                      init=nn.init.kaiming_normal_, custom_head=None,
                      concat_pool=True, **kwargs):
    "Create custom architecture using `arch`, `n_in` and `n_out` from the `timm` library"
    body = create_timm_body(arch, pretrained, None, n_in)
    if custom_head is None:
        nf = num_features_model(nn.Sequential(*body.children()))
        head = create_head(nf, n_out, concat_pool=concat_pool, **kwargs)
    else:
        head = custom_head
    model = nn.Sequential(body, head)
    if init is not None: apply_init(model[1], init)
    return model

In [None]:
class PlantDataset(Dataset):
    def __init__(self, df, transform=None):
        self.image_id = df['study_id'].values
        self.transform = transform
        self.path = df['image_path'].values

    def __len__(self):
        return len(self.image_id)

    def __getitem__(self, idx):
        image_id = self.image_id[idx]
        image_name = image_id

        image_path = self.path[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        augmented = self.transform(image=image)
        image = augmented['image']
        return image, image_name


def get_transform():
        return A.Compose([A.Resize(cfg.input_dims, cfg.input_dims, p=1.0), ToTensorV2()], p=1.0)

In [None]:
class PlantPredictor():
    def __init__(self, net, test_df, df_labels_idx, dataloaders, device):
        self.net = net
        self.df_labels_idx = df_labels_idx
        self.dataloaders = dataloaders
        self.df_submit = pd.DataFrame(columns = ['id', 'labels'])
        self.device = device
        self.net.to(self.device)
    
    
    def inference(self):
        self.net.eval()
        labels_pre = []
        image_name_all = []
        for inputs, image_name in self.dataloaders:
            # torch.cuda.empty_cache()
            pre_list = []
            image_name_all.append(image_name)
            inputs = inputs.float()
            inputs = inputs.to(self.device)

            with torch.no_grad():
                out = self.net(inputs)
            out = out.cpu()
            pre_list.append(out.detach().sigmoid().numpy() > 0.41)
            pre_list = pd.DataFrame(np.concatenate(pre_list).astype(np.int), columns=labels)
            multi_labels = pre_list.columns
            for i, row in pre_list.iterrows():
                if ((row['negative'] == 1) or row.sum() == 0):
                    tmp = multi_labels[np.argmax(torch.nn.functional.softmax(out[i].reshape(1, len(out[i])), dim=1)).item()]
                else:
                    tmp = ' '.join(multi_labels[row==row.max()])
                labels_pre.append(tmp + ' 1 0 0 1 1')
        image_name_list = np.concatenate(image_name_all)
        self.df_submit['id'] = image_name_list
        self.df_submit['labels'] = labels_pre

In [None]:
net =  create_timm_model(arch=cfg.model_arch, n_out=4, pretrained=False, n_in=3)
net.load_state_dict({k.replace('', ''): v for k, v in torch.load('../input/efficientnet0/e43ae05e11713613365669a4e1cb010d.pth').items()})

sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')

# Form study and image dataframes
sub_df['level'] = sub_df.id.map(lambda idx: idx[-5:])
study_df = sub_df[sub_df.level=='study'].rename({'id':'study_id'}, axis=1)
image_df = sub_df[sub_df.level=='image'].rename({'id':'image_id'}, axis=1)

dcm_path = glob.glob('/kaggle/input/siim-covid19-detection/test/**/*dcm', recursive=True)
test_meta = pd.DataFrame({'dcm_path':dcm_path})
test_meta['image_id'] = test_meta.dcm_path.map(lambda x: x.split('/')[-1].replace('.dcm', '')+'_image')
test_meta['study_id'] = test_meta.dcm_path.map(lambda x: x.split('/')[-3].replace('.dcm', '')+'_study')

study_df = study_df.merge(test_meta, on='study_id', how='left')
image_df = image_df.merge(test_meta, on='image_id', how='left')

# Remove duplicates study_ids from study_df
study_df.drop_duplicates(subset="study_id",keep='first', inplace=True)
study_dir = '../input/siim-covid19-images-metadata-256-512-768/images_metadata_256_512_768/test_512x512'
study_df['image_path'] = study_dir+'/'+study_df['image_id']+'.png'

labels = ['negative', 'typical', 'indeterminate', 'atypical']
labels_n = [1, 2, 3, 4]
df_labels_idx = pd.DataFrame({'labels_n':labels_n, 'labels':labels})
test_dataset = PlantDataset(study_df, transform=get_transform())
test_loader =  DataLoader(test_dataset, batch_size=2, shuffle=False)
pre = PlantPredictor(net, study_df, df_labels_idx, test_loader,device)
pre.inference()
df_submit = pre.df_submit.copy()

In [None]:
TEST_PATH = '/kaggle/input/siimcovid19resizedto256pxjpg/test/'
MODEL_PATH = '/kaggle/input/yolov5/YOLOV5.pt'

In [None]:
shutil.copytree('/kaggle/input/kaggle/tmp/yolov5', '/kaggle/working/yolov5')
os.chdir('/kaggle/working/yolov5')

In [None]:
IMG_SIZE = 256
!python detect.py --weights {MODEL_PATH} \
                  --source {TEST_PATH} \
                  --img {IMG_SIZE} \
                  --conf 0.281 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf

In [None]:
PRED_PATH = 'runs/detect/exp/labels'
# %cat /kaggle/working/yolov5/runs/detect/exp/labels/c6c9bf98487a.txt
prediction_files = os.listdir(PRED_PATH)


In [None]:
def correct_bbox_format(bboxes):
    correct_bboxes = []
    for b in bboxes:
        xc, yc = int(np.round(b[0]*IMG_SIZE)), int(np.round(b[1]*IMG_SIZE))
        w, h = int(np.round(b[2]*IMG_SIZE)), int(np.round(b[3]*IMG_SIZE))

        xmin = xc - int(np.round(w/2))
        xmax = xc + int(np.round(w/2))
        ymin = yc - int(np.round(h/2))
        ymax = yc + int(np.round(h/2))
        
        correct_bboxes.append([xmin, xmax, ymin, ymax])
        
    return correct_bboxes

# Read the txt file generated by YOLOv5 during inference and extract 
# confidence and bounding box coordinates.
def get_conf_bboxes(file_path):
    confidence = []
    bboxes = []
    with open(file_path, 'r') as file:
        for line in file:
            preds = line.strip('\n').split(' ')
            preds = list(map(float, preds))
            confidence.append(preds[-1])
            bboxes.append(preds[1:-1])
    return confidence, bboxes

In [None]:
from tqdm import tqdm
sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
sub_df.tail()

In [None]:
predictions = []

for i in tqdm(range(len(sub_df))):
    row = sub_df.loc[i]
    id_name = row.id.split('_')[0]
    id_level = row.id.split('_')[-1]
    
    if id_level == 'study':
        # do study-level classification

        predictions.append(df_submit.loc[df_submit['id'] == row.id].labels.tolist()[0]) # dummy prediction
        
        
    elif id_level == 'image':
        # we can do image-level classification here.
        # also we can rely on the object detector's classification head.
        # for this example submisison we will use YOLO's classification head. 
        # since we already ran the inference we know which test images belong to opacity.
        if f'{id_name}.txt' in prediction_files:
            # opacity label
            confidence, bboxes = get_conf_bboxes(f'{PRED_PATH}/{id_name}.txt')
            bboxes = correct_bbox_format(bboxes)
            pred_string = ''
            for j, conf in enumerate(confidence):
                pred_string += f'opacity {conf} ' + ' '.join(map(str, bboxes[j])) + ' '
            predictions.append(pred_string[:-1]) 
        else:
            predictions.append("None 1 0 0 1 1")

In [None]:
sub_df['PredictionString'] = predictions
sub_df.to_csv('/kaggle/working/submission.csv',index = False)  
sub_df.tail()