# FasterRCNN

I am little bit confused about repeated instances of boxes. By exploding the dataset into multiple instance~s of the same ID's we are viewing every image N times, where N is the number of boxes (Opacity Count).

In [1]:
# imports
import torch
from torch import nn
from torchvision.models import resnet18
from torch.utils.data import Dataset
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import platform
from torch.autograd import Variable
import time
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import  FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
import glob
import os
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold

# vis
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
import sklearn.metrics
from math import ceil
import cv2
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import PIL

In [2]:
# import dataset
train = pd.read_csv('../input/train_exploded_filled.csv')

In [3]:
class Config:
    train_pcent = 0.8
    TRAIN_BS = 4
    VALID_BS = 4
    NB_EPOCHS = 3
    model_name = 'FastRCNN'
    reshape_size = (400, 400)
    num_classes = 4
    seed = 2021

# Splits

In [4]:
# split into folds
df_folds = train.copy()
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=Config.seed)
for n, (train_index, val_index) in enumerate(skf.split(X=df_folds.index, y=df_folds.integer_label)):
    df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = int(n)
df_folds['fold'] = df_folds['fold'].astype(int)
print(df_folds.groupby(['fold', df_folds.integer_label]).size())

fold  integer_label
0     0                 334
      1                1160
      2                 286
      3                 121
1     0                 334
      1                1160
      2                 286
      3                 121
2     0                 334
      1                1160
      2                 286
      3                 121
3     0                 334
      1                1160
      2                 286
      3                 121
4     0                 333
      1                1159
      2                 286
      3                 122
dtype: int64


In [7]:
df_folds.set_index('id').index

Index(['000a312787f2', '000a312787f2', '0012ff7358bc', '0012ff7358bc',
       '001398f4ff4f', '001bd15d1891', '001bd15d1891', '0022227f5adf',
       '002e9b2128d0', '002e9b2128d0',
       ...
       'ff01229b525c', 'ff03d1d41968', 'ff0743bee789', 'ff4cd60f14b7',
       'ff6ee6ae167b', 'ff7659762b75', 'ff9f10a24c27', 'ffa9fef3c7bf',
       'ffcc6edd9445', 'ffd91a2c4ca0'],
      dtype='object', name='id', length=9504)

# Dataset & Dataloader

In [None]:
class SIIM(Dataset):
    def __init__(self, image_ids, df, is_train=True, augments=None, 
                 reshape_size=Config.reshape_size):
        super().__init__()
        # image_ids
        self.image_ids = image_ids
        # random sample data
        self.df = df
        # training or validation
        self.is_train = is_train
        # augmentations
        self.augments = augments
        # reshape size
        self.reshape_size = reshape_size
    
    def __len__(self) -> int:
        return(len(self.df.shape[0]))
    
    @staticmethod
    def dicom2array(path: str, voi_lut=True, fix_monochrome=True):
        dicom = pydicom.read_file(path)
        # VOI LUT (if available by DICOM device) is used to
        # transform raw DICOM data to "human-friendly" view
        if voi_lut:
            data = apply_voi_lut(dicom.pixel_array, dicom)
        else:
            data = dicom.pixel_array
        # depending on this value, X-ray may look inverted - fix that:
        if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
            data = np.amax(data) - data
        data = data - np.min(data)
        data = data / np.max(data)
        data = (data * 255).astype(np.uint8)
        return data
    
    def load_bbox_labels(idx: int):
        return(data)
    
    def __getitem__(self, idx: int):
        # retrieve idx data
        image_id = self.image_ids[idx]
        # get path
        image_path = self.df['path'].values[idx]
        # get image
        image = self.dicom2array(image_path)
        # get boxes and labels
        boxes, labels = self.load_bbox_labels(idx)
        
        
        # Augments
        if self.augments:
            image = self.augments(image=image)
        else:
            image = np.transpose(image, (2, 0, 1)).astype(np.float32)
            image = torch.tensor(image)  
        # if train
        if self.is_train:
            label = self.df[self.df['StudyInstanceUID'] == image_id].values.tolist()[0][4:-2]
            return image, torch.tensor(label)

        return image