In [None]:
!conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

In [None]:
import sys; 

package_paths = [
    '../input/timm-pytorch-image-models/pytorch-image-models-master',
]

for pth in package_paths:
    sys.path.append(pth)

In [None]:
import glob
import os
import time
import random

import numpy as np  # linear algebra!
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import PIL

from sklearn.model_selection import GroupKFold, StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score

import cv2
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm
from tqdm.contrib.concurrent import process_map
from multiprocessing import Pool, cpu_count

import torch
import torchvision
from torch.utils.data.dataset import Dataset
import torch.cuda.amp as amp

import timm

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

import albumentations as A
from albumentations.pytorch import ToTensorV2
SIZE = (384, 384)

MODEL_DIR = "/kaggle/input/classification-training-script"
DATA_DIR = RESIZE_DIR = "/kaggle/working/"
FOLDS = 5
NUM_CLASSES = 4
BATCHSIZE = 64
SEED = 420
MODEL_NAME = "tf_efficientnetv2_s"

In [None]:
sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
print(len(sub_df))
sub_df.head()

In [None]:
study_df = sub_df.loc[sub_df.id.str.contains('_study')]
len(study_df)

In [None]:
image_df = sub_df.loc[sub_df.id.str.contains('_image')]
len(image_df)

In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def resize_xray(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

In [None]:
TEST_PATH = f'/kaggle/working/test_{SIZE[0]}x{SIZE[1]}'

os.makedirs(TEST_PATH, exist_ok=True)
filenames = glob.glob(f'/kaggle/input/siim-covid19-detection/test/*/*/*.dcm')

def persist_image(path):
    xray = read_xray(path)
    im = resize_xray(xray, size=SIZE[0])
    fname = os.path.basename(os.path.splitext(path)[-2])
    jpg_fname = os.path.join(TEST_PATH, "{}.jpg".format(fname))
    im.save(jpg_fname)
    return [fname,xray.shape[0],xray.shape[1]]
with Pool(cpu_count()) as pool:
    img_metadata = pool.map(persist_image,filenames)

In [None]:
print(f'Number of test images: {len(os.listdir(TEST_PATH))}')

In [None]:
test_imgs_study_mapping = pd.DataFrame(img_metadata,columns = ['image_id','dim0','dim1'])

# Associate image-level id with study-level ids.
# Note that a study-level might have more than one image-level ids.
for study_dir in os.listdir('../input/siim-covid19-detection/test'):
    for series in os.listdir(f'../input/siim-covid19-detection/test/{study_dir}'):
        for image in os.listdir(f'../input/siim-covid19-detection/test/{study_dir}/{series}/'):
            image_id = image[:-4]
            test_imgs_study_mapping.loc[test_imgs_study_mapping['image_id'] == image_id, 'study_id'] = study_dir
        
test_imgs_study_mapping.head()

In [None]:
# !mkdir "/kaggle/working/test_{SIZE[0]}x{SIZE[1]}"
# !tar -xzf "/kaggle/input/train-{SIZE[0]}x{SIZE[1]}/test_{SIZE[0]}x{SIZE[1]}.tar.gz" -C "/kaggle/working/test_{SIZE[0]}x{SIZE[1]}" .

In [None]:
# Make results reproducible
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True


seed_everything(SEED)

In [None]:
model_pattern = f"{MODEL_NAME}"
modelnames = glob.glob(os.path.join(MODEL_DIR, f"{MODEL_NAME}*", "*.pth"))
print(f"{len(modelnames)}: Models Found")

In [None]:
# test_imgs_study_mapping = pd.read_csv("/kaggle/input/test-image-to-study-mapping/test_study_id_to_image_mapping.csv")

In [None]:
# TEST_DATA_PATH = os.path.join(RESIZE_DIR, f"test_{SIZE[0]}x{SIZE[1]}")
print("Test Data Path {}".format(TEST_PATH))
def get_img_path(row):
    study_id = row["study_id"]
    img_id = row["image_id"]
    paths = glob.glob(os.path.join(TEST_PATH, "{}*.jpg".format(img_id)))
    for path in paths:
        if img_id in path:
            return path
    return None

In [None]:
test_imgs_study_mapping["path"] = test_imgs_study_mapping.apply(get_img_path, axis=1)

In [None]:
test_imgs_study_mapping[test_imgs_study_mapping["path"].isna()]

In [None]:
def load_model(modelname):
    summary = torch.load(modelname)
    print(f"Loaded model {modelname}")
    print(f"Epoch {summary['epoch']}")
    print(f"Map@2 {summary['map_at_2']}")
    print(f"AUC@2 {summary['auc']}")
    model = timm.create_model(model_name=MODEL_NAME, pretrained=False, in_chans=3)
    model.classifier = torch.nn.Linear(
        in_features=model.classifier.in_features, out_features=NUM_CLASSES
    )
    model.load_state_dict(summary["state_dict"], strict=True)
    return model

In [None]:
class XRayDatasetFromDF(Dataset):
    def __init__(self, df, train=True, augment=True, normalize=False, size=(384, 384)):
        self.df = df
        self.name_to_label_map = {
            "Negative": 0,
            "Typical": 1,
            "Indeterminate": 2,
            "Atypical": 3,
        }
        self.study_ids = df.index.sort_values()
        self.path_suffix = (
            os.path.join(DATA_DIR, "train") if train else os.path.join(DATA_DIR, "test")
        )
        self._train = train
        self._augment = augment
        self._normalize = normalize
        self._size = size
        self._transform_list = [
            # A.Resize(size[0], size[1], p=1)
        ]

        if self._augment:
            self._transform_list.extend(
                [
                    A.VerticalFlip(p=0.5),
                    A.HorizontalFlip(p=0.5),
                    A.ShiftScaleRotate(
                        scale_limit=0.20,
                        rotate_limit=10,
                        shift_limit=0.1,
                        p=0.5,
                        border_mode=cv2.BORDER_CONSTANT,
                        value=0,
                    ),
                    A.RandomBrightnessContrast(p=0.5),
                    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                    ToTensorV2(),
                ]
            )
        elif self._normalize and not self._augment:  # test mode
            self._transform_list.extend(
                [
                    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                    ToTensorV2(),
                ]
            )
        self._transforms = A.Compose(self._transform_list)

    def __len__(self):
        return len(self.study_ids)

    def assign_label(self, row):
        for k in self.name_to_label_map:
            if row[k]:
                return self.name_to_label_map[k]

    def __getitem__(self, idx):
        study_id = self.study_ids[idx]
        study_imgs = self.df.loc[study_id]
        path = None
        label = None

        path = study_imgs["path"]
        label = study_imgs["int_label"] if self._train else -1

        # ideally, we'd clean up the df,
        # but may be we use it to produce predictions as well.
        dicom_arr = (
            cv2.imread(path)
            if path.endswith(".jpg")
            else dicom2array(path, size=self._size)
        )
        img = cv2.cvtColor(dicom_arr, cv2.COLOR_BGR2RGB)
        img = self._transforms(image=img)["image"]

        return img, label

In [None]:
test_ds = XRayDatasetFromDF(df=test_imgs_study_mapping, train=False, augment=True, normalize=False, size=SIZE)
test_dl = torch.utils.data.DataLoader(
        dataset=test_ds,
        batch_size=BATCHSIZE * 2,
        pin_memory=True,
        num_workers=8,
        drop_last=False,
        shuffle=False,
        prefetch_factor=8,
    )

In [None]:
def predict(model, slide_dl, tta_times=10):
    sample_size = len(slide_dl.dataset)
    print("Predicting on {} Images {} times".format(sample_size, tta_times))
    probs = np.zeros((sample_size, NUM_CLASSES))
    loss_sum = 0

    loss_fn = torch.nn.BCEWithLogitsLoss(reduction="none").to(dev)

    for i in range(tta_times):

        offset = 0

        for i, grid in enumerate(tqdm(slide_dl)):
            with torch.no_grad():
                img, _ = grid
                curr_batch_size = img.shape[0]

                pred = model(img.to(dev))
                # remove the redundant dimension added by
                # pytorch's collate_fn

                prob = pred.softmax(dim=1)
                probs[offset : offset + curr_batch_size, :] += prob.cpu().numpy()
                offset += curr_batch_size


    return probs / tta_times

In [None]:
dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(dev)

In [None]:
predictions = []
for modelname in modelnames:
    model = load_model(modelname)
    model = model.to(dev)
    model = model.eval()
    probs = predict(model, test_dl)
    # probs = torch.from_numpy(probs)
    # probs = probs.softmax(dim=1)

    for k in test_ds.name_to_label_map:
        col_idx = test_ds.name_to_label_map[k]
        test_imgs_study_mapping[k] = probs[:, col_idx]
    test_imgs_study_mapping["modelname"] = modelname
    predictions.append(test_imgs_study_mapping.copy())

In [None]:
predictions_df = pd.concat(predictions)

In [None]:
mean_predictions_df = predictions_df.groupby("study_id").agg({
    "Negative":"mean",
    "Typical":"mean",
    "Indeterminate":"mean",
    "Atypical":"mean"
}).reset_index()

In [None]:
mean_predictions_df["id"] = mean_predictions_df["study_id"] + "_study"

In [None]:
mean_predictions_df

In [None]:
OPBB = "0 0 1 1"
def generate_cls_prediction_strings(row):
    predictions = []

    for k in test_ds.name_to_label_map:
        
        p_k = row[k]
        predictions.append(k.lower())
        predictions.append(str(p_k))
        predictions.append(OPBB)
    return " ".join(predictions)

In [None]:
mean_predictions_df["PredictionString"] = mean_predictions_df.apply(generate_cls_prediction_strings, axis=1)

In [None]:
cls_submission_df = mean_predictions_df[["id", "PredictionString"]]

In [None]:
import tensorflow as tf
print(tf.__version__)
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

import gc
import glob
from tqdm import tqdm
from shutil import copyfile

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
    # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
        print(e)

In [None]:
sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
print(len(sub_df))
sub_df.head()

In [None]:
study_df = sub_df.loc[sub_df.id.str.contains('_study')]
len(study_df)

In [None]:
image_df = sub_df.loc[sub_df.id.str.contains('_image')]
len(image_df)

In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def resize_xray(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

In [None]:
TEST_PATH = f'/kaggle/tmp/test/'
IMG_SIZE = 512

os.makedirs(TEST_PATH, exist_ok=True)
filenames = glob.glob(f'/kaggle/input/siim-covid19-detection/test/*/*/*.dcm')

def persist_image(path):
    xray = read_xray(path)
    im = resize_xray(xray, size=IMG_SIZE)  
    fname = os.path.basename(os.path.splitext(path)[-2])
    jpg_fname = os.path.join(TEST_PATH, "{}.jpg".format(fname))
    im.save(jpg_fname)
    return [fname,xray.shape[0],xray.shape[1]]
with Pool(cpu_count()) as pool:
    img_metadata = pool.map(persist_image,filenames)

In [None]:
print(f'Number of test images: {len(os.listdir(TEST_PATH))}')

In [None]:
meta_df = pd.DataFrame(img_metadata,columns = ['image_id','dim0','dim1'])

# Associate image-level id with study-level ids.
# Note that a study-level might have more than one image-level ids.
for study_dir in os.listdir('../input/siim-covid19-detection/test'):
    for series in os.listdir(f'../input/siim-covid19-detection/test/{study_dir}'):
        for image in os.listdir(f'../input/siim-covid19-detection/test/{study_dir}/{series}/'):
            image_id = image[:-4]
            meta_df.loc[meta_df['image_id'] == image_id, 'study_id'] = study_dir
        
meta_df.head()

In [None]:
%cp -r /kaggle/input/yolomodelsm6allfolds /kaggle/working

In [None]:
!python /kaggle/input/siimcovidyolov5l/yolov5/detect.py --weights /kaggle/working/yolomodelsm6allfolds/yolov5m6fold0.pt /kaggle/working/yolomodelsm6allfolds/yolov5m6fold1.pt /kaggle/working/yolomodelsm6allfolds/yolov5m6fold2.pt /kaggle/working/yolomodelsm6allfolds/yolov5m6fold3.pt /kaggle/working/yolomodelsm6allfolds/yolov5m6fold4.pt \
                                      --source {TEST_PATH} \
                                      --img 512 \
                                      --conf 0.25 \
                                      --iou-thres 0.5 \
                                      --max-det 10 \
                                      --save-txt \
                                      --save-conf \
                                      --augment

In [None]:
PRED_PATH = 'runs/detect/exp/labels'
prediction_files = os.listdir(PRED_PATH)
print(f'Number of opacity predicted by YOLOv5: {len(prediction_files)}')

In [None]:
def correct_bbox_format(bboxes):
    correct_bboxes = []
    for b in bboxes:
        xc, yc = int(np.round(b[0]*IMG_SIZE)), int(np.round(b[1]*IMG_SIZE))
        w, h = int(np.round(b[2]*IMG_SIZE)), int(np.round(b[3]*IMG_SIZE))

        xmin = xc - int(np.round(w/2))
        ymin = yc - int(np.round(h/2))
        xmax = xc + int(np.round(w/2))
        ymax = yc + int(np.round(h/2))
        
        correct_bboxes.append([xmin, ymin, xmax, ymax])
        
    return correct_bboxes

def scale_bboxes_to_original(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.dim1
    scale_y = IMG_SIZE/row.dim0
    
    scaled_bboxes = []
    for bbox in bboxes:
        xmin, ymin, xmax, ymax = bbox
        
        xmin = int(np.round(xmin/scale_x))
        ymin = int(np.round(ymin/scale_y))
        xmax = int(np.round(xmax/scale_x))
        ymax = int(np.round(ymax/scale_y))
        
        scaled_bboxes.append([xmin, ymin, xmax, ymax])
        
    return scaled_bboxes

# Read the txt file generated by YOLOv5 during inference and extract 
# confidence and bounding box coordinates.
def get_conf_bboxes(file_path):
    confidence = []
    bboxes = []
    with open(file_path, 'r') as file:
        for line in file:
            preds = line.strip('\n').split(' ')
            preds = list(map(float, preds))
            confidence.append(preds[-1])
            bboxes.append(preds[1:-1])
    return confidence, bboxes

In [None]:
image_pred_strings = []
ctr = 0
for i in tqdm(range(len(image_df))):
    row = meta_df.loc[i]
    id_name = row.image_id
    
    if f'{id_name}.txt' in prediction_files:
        # opacity label
        confidence, bboxes = get_conf_bboxes(f'{PRED_PATH}/{id_name}.txt')
        bboxes = correct_bbox_format(bboxes)
        ori_bboxes = scale_bboxes_to_original(row, bboxes)
        
        pred_string = ''
        for j, conf in enumerate(confidence):
            pred_string += f'opacity {conf} ' + ' '.join(map(str, ori_bboxes[j])) + ' '
        
        row = mean_predictions_df.loc[mean_predictions_df['study_id'] == row.study_id]
        neg = row.Negative.item()
        typ = row.Typical.item()
        ind = row.Indeterminate.item()
        atp = row.Atypical.item()
        output_class = np.argmax(np.array([neg,typ,ind,atp]))
        if output_class == 0 and neg > 0.7:
            ctr+=1
            image_pred_strings.append("none 1 0 0 1 1")
        else:
            image_pred_strings.append(pred_string[:-1])
    else:
        image_pred_strings.append("none 1 0 0 1 1")
print('Number of images that were detected as opacity but are forced to none on the basis of classification output are :' + str(ctr))

In [None]:
meta_df['PredictionString'] = image_pred_strings
image_df = meta_df[['study_id','image_id', 'PredictionString']]
# image_df.insert(0, 'id', image_df.apply(lambda row: row.image_id+'_image', axis=1))
# image_df = image_df.drop('image_id', axis=1)
image_df.head(20)

In [None]:
image_df.insert(0, 'id', image_df.apply(lambda row: row.image_id+'_image', axis=1))

In [None]:
image_df_new = image_df[['id','PredictionString']]

In [None]:
cls_submission_df = cls_submission_df.append(image_df_new).reset_index(drop=True)
cls_submission_df.to_csv('/kaggle/working/submission.csv',index = False)

In [None]:
%rm -rf runs
%rm -rf test_384x384
%rm -rf /kaggle/working/yolomodelsm6allfolds

In [None]:
# ## MMDetection compatible torch installation
# !pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torch-1.7.0+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
# !pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torchvision-0.8.1+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
# !pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torchaudio-0.7.0-cp37-cp37m-linux_x86_64.whl' --no-deps

# ## Compatible Cuda Toolkit installation
# !mkdir -p /kaggle/tmp && cp /kaggle/input/pytorch-170-cuda-toolkit-110221/cudatoolkit-11.0.221-h6bb024c_0 /kaggle/tmp/cudatoolkit-11.0.221-h6bb024c_0.tar.bz2 && conda install /kaggle/tmp/cudatoolkit-11.0.221-h6bb024c_0.tar.bz2 -y --offline

# ## MMDetection Offline Installation
# !pip install '/kaggle/input/mmdetectionv2140/addict-2.4.0-py3-none-any.whl' --no-deps
# !pip install '/kaggle/input/mmdetectionv2140/yapf-0.31.0-py2.py3-none-any.whl' --no-deps
# !pip install '/kaggle/input/mmdetectionv2140/terminal-0.4.0-py3-none-any.whl' --no-deps
# !pip install '/kaggle/input/mmdetectionv2140/terminaltables-3.1.0-py3-none-any.whl' --no-deps
# !pip install '/kaggle/input/mmdetectionv2140/mmcv_full-1_3_8-cu110-torch1_7_0/mmcv_full-1.3.8-cp37-cp37m-manylinux1_x86_64.whl' --no-deps
# !pip install '/kaggle/input/mmdetectionv2140/pycocotools-2.0.2/pycocotools-2.0.2' --no-deps
# !pip install '/kaggle/input/mmdetectionv2140/mmpycocotools-12.0.3/mmpycocotools-12.0.3' --no-deps

# !cp -r /kaggle/input/swintrans1024repo/covid-detection /kaggle/working/
# # !mv /kaggle/working/mmdetection-2.14.0 /kaggle/working/mmdetection
# %cd /kaggle/working/covid-detection
# !pip install -v -e . --no-deps
# %cd /kaggle/working/

In [None]:
# import sys
# sys.path.append('/opt/conda/lib/python3.7/site-packages/mmdet.egg-link')
# sys.path.append('/kaggle/working/mmdetection')

In [None]:
# sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')

# # Form image dataframe
# sub_df['level'] = sub_df.id.map(lambda idx: idx[-5:])
# image_df = sub_df[sub_df.level=='image'].rename({'id':'image_id'}, axis=1)

# dcm_path = glob.glob('/kaggle/input/siim-covid19-detection/test/**/*dcm', recursive=True)
# test_meta = pd.DataFrame({'dcm_path':dcm_path})
# test_meta['image_id'] = test_meta.dcm_path.map(lambda x: x.split('/')[-1].replace('.dcm', '')+'_image')
# test_meta['study_id'] = test_meta.dcm_path.map(lambda x: x.split('/')[-3].replace('.dcm', '')+'_study')

# image_df = image_df.merge(test_meta, on='image_id', how='left')
# image_df

In [None]:
# IMAGE_DIMS = (1024, 1024)

# image_dir = f'/kaggle/tmp/test/image/'
# os.makedirs(image_dir, exist_ok=True)

# def read_xray(path, voi_lut = True, fix_monochrome = True):
#     # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
#     dicom = pydicom.read_file(path)
    
#     # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
#     # "human-friendly" view
#     if voi_lut:
#         data = apply_voi_lut(dicom.pixel_array, dicom)
#     else:
#         data = dicom.pixel_array
               
#     # depending on this value, X-ray may look inverted - fix that:
#     if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
#         data = np.amax(data) - data
        
#     data = data - np.min(data)
#     data = data / np.max(data)
#     data = (data * 255).astype(np.uint8)
#     return data

# def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
#     # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
#     im = Image.fromarray(array)
    
#     if keep_ratio:
#         im.thumbnail((size, size), resample)
#     else:
#         im = im.resize((size, size), resample)
#     return im

# image_df['dim0'] = -1
# image_df['dim1'] = -1
# img_ext = '.jpg'
# for index, row in tqdm(image_df[['image_id', 'dcm_path', 'dim0', 'dim1']].iterrows(), total=image_df.shape[0]):
#     # set keep_ratio=True to have original aspect ratio
#     xray = read_xray(row['dcm_path'])
#     im = resize(xray, size=IMAGE_DIMS[0])  
#     im.save(os.path.join(image_dir, row['image_id']+img_ext))
#     image_df.loc[image_df.image_id==row.image_id, 'dim0'] = xray.shape[0]
#     image_df.loc[image_df.image_id==row.image_id, 'dim1'] = xray.shape[1]

In [None]:
# %cd /kaggle/working/covid-detection

In [None]:
# from mmdet.apis import init_detector, inference_detector
# import torch
# import mmcv
# import os
# from tqdm import tqdm

# config_file = 'configs/swin/mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco.py'
# checkpoint_file = '/kaggle/input/swinmodels1024/epoch_11.pth'

# model = init_detector(config_file, checkpoint_file, device='cuda:0')

# classes = ['opacity']
# imgNoBB = []
# pred_dir = 'preds'
# os.makedirs(pred_dir,exist_ok=True)
# for file in tqdm(os.listdir(image_dir)):
#     # # test a single image and show the results
#     img = os.path.join(image_dir,file) # or img = mmcv.imread(img), which will only load it once
#     try:
#         result = inference_detector(model, img)
#     except RuntimeError:
#         imgNoBB.append(file.split('.')[0])
#         pass
#     # f.write(classes[int(npcid[0][i][0])] + " " + str(npscore[0][i][0]) + " " + str(xmin) + " " + str(ymin) + " " + str(
#     #     xmax) + " " + str(ymax) + "\n")
#     for i,cls in enumerate(classes):
#         for arr in result[0][i]:
#             if arr[-1] > 0.6:
#                 with open(os.path.join(pred_dir,file.replace(img_ext,'.txt')),'a') as f:
#     #                 if arr[-1] > 0.5:
#                     f.write(str(arr[-1]) + ' ' + str(arr[0]) + ' ' + str(arr[1]) + ' ' + str(arr[2]) + ' ' + str(arr[3]) + "\n")
# # print('Images with no opacities : '+str(len(imgNoBB)))

In [None]:
# prediction_files = os.listdir(pred_dir)
# print('No of predictions by Swin Transformer are : '+ str(len(prediction_files)))

In [None]:
# IMG_SIZE = 1024
# def scale_bboxes_to_original(row, bboxes):
#     # Get scaling factor
#     scale_x = IMG_SIZE/row.dim1
#     scale_y = IMG_SIZE/row.dim0
    
#     scaled_bboxes = []
#     for bbox in bboxes:
#         xmin, ymin, xmax, ymax = bbox
        
#         xmin = int(np.round(xmin/scale_x))
#         ymin = int(np.round(ymin/scale_y))
#         xmax = int(np.round(xmax/scale_x))
#         ymax = int(np.round(ymax/scale_y))
        
#         scaled_bboxes.append([xmin, ymin, xmax, ymax])
        
#     return scaled_bboxes

# def get_conf_bboxes(file_path):
#     confidence = []
#     bboxes = []
#     with open(file_path, 'r') as file:
#         for line in file:
#             preds = line.strip('\n').split(' ')
#             preds = list(map(float, preds))
#             confidence.append(preds[0])
#             bboxes.append(preds[1:])
#     return confidence, bboxes

In [None]:
# image_pred_strings = []
# ctr = 0
# for i in tqdm(range(len(image_df))):
#     row = image_df.loc[i]
#     id_name = row.image_id
# #     print(prediction_files[0])
# #     print(id_name)
# #     break
#     if f'{id_name}.txt' in prediction_files:
#         # opacity label
#         confidence, bboxes = get_conf_bboxes(os.path.join(pred_dir,f'{id_name}.txt'))
#         ori_bboxes = scale_bboxes_to_original(row, bboxes)
        
#         pred_string = ''
#         for j, conf in enumerate(confidence):
#             pred_string += f'opacity {conf} ' + ' '.join(map(str, ori_bboxes[j])) + ' '
        
#         row = mean_predictions_df.loc[mean_predictions_df['study_id'] == row.study_id.split("_")[0]]
#         neg = row.Negative.item()
#         typ = row.Typical.item()
#         ind = row.Indeterminate.item()
#         atp = row.Atypical.item()
#         output_class = np.argmax(np.array([neg,typ,ind,atp]))
#         if output_class == 0 and neg > 0.7:
#             ctr+=1
#             image_pred_strings.append("none 1 0 0 1 1")
#         else:
#             image_pred_strings.append(pred_string[:-1])
#     else:
#         image_pred_strings.append("none 1 0 0 1 1")
# print('Number of images that were detected as opacity but are forced to none on the basis of classification output are :' + str(ctr))

In [None]:
# image_df['PredictionString'] = image_pred_strings
# image_df = image_df[['study_id','image_id', 'PredictionString']]
# # image_df.insert(0, 'id', image_df.apply(lambda row: row.image_id+'_image', axis=1))
# # image_df = image_df.drop('image_id', axis=1)
# image_df.head(20)

In [None]:
# image_df.insert(0, 'id', image_df.apply(lambda row: row.image_id, axis=1))

In [None]:
# image_df_new = image_df[['id','PredictionString']]

In [None]:
# %cd /kaggle/working

In [None]:
# cls_submission_df = cls_submission_df.append(image_df_new).reset_index(drop=True)
# cls_submission_df.to_csv('/kaggle/working/submission.csv',index = False)

In [None]:
# %rm -rf covid-detection
# %rm -rf test_384x384