In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import cv2
import os
from matplotlib import pyplot as plt
import os
import cv2
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader,Dataset
import albumentations as albu
from skimage.color import gray2rgb
import functools
import torch
from tqdm.auto import tqdm

train_csv_path = '../input/rsna-str-pulmonary-embolism-detection/train.csv'
jpeg_dir = '../input/rsna-str-pe-detection-jpeg-256/train-jpegs'

train_df = pd.read_csv(train_csv_path)
train_df.head()

row = train_df.iloc[100]
img = cv2.imread(glob.glob(f"{jpeg_dir}/{row[0]}/{row[1]}/*{row[2]}.jpg")[0])
plt.figure(figsize=[12,6])
plt.subplot(131)
plt.imshow(img[:,:,0],cmap='gray')
plt.subplot(132)
plt.imshow(img[:,:,1],cmap='gray')
plt.subplot(133)
plt.imshow(img[:,:,2],cmap='gray')

In [None]:
def get_training_augmentation(y=256,x=256):
    train_transform = [albu.VerticalFlip(p=0.5),
                           albu.HorizontalFlip(p=0.5),
                           albu.Downscale(p=1.0,scale_min=0.35,scale_max=0.75,),
                           albu.Resize(y, x)]
    return albu.Compose(train_transform)


formatted_settings = {
            'input_size': [3, 224, 224],
            'input_range': [0, 1],
            'mean': [0.485, 0.456, 0.406],
            'std': [0.229, 0.224, 0.225],}
def preprocess_input(
    x, mean=None, std=None, input_space="RGB", input_range=None, **kwargs
):

    if input_space == "BGR":
        x = x[..., ::-1].copy()

    if input_range is not None:
        if x.max() > 1 and input_range[1] == 1:
            x = x / 255.0

    if mean is not None:
        mean = np.array(mean)
        x = x - mean

    if std is not None:
        std = np.array(std)
        x = x / std

    return x

def get_preprocessing(preprocessing_fn):
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)

def get_validation_augmentation(y=256,x=256):
    """Add paddings to make image shape divisible by 32"""
    test_transform = [albu.Resize(y, x)]
    return albu.Compose(test_transform)

def to_tensor(x, **kwargs):
    """
    Convert image or mask.
    """
    return x.transpose(2, 0, 1).astype('float32')

class CTDataset2D(Dataset):
    def __init__(self,df,transforms = albu.Compose([albu.HorizontalFlip()]),preprocessing=None,size=1,mode='val'):
        self.df_main = df.values
        if mode=='val':
            self.df = self.df_main
        else:
            self.update_train_df()
            
        self.transforms = transforms
        self.preprocessing = preprocessing
        self.size=size


    def __getitem__(self, idx):
        row = self.df[idx]
        img = cv2.imread(glob.glob(f"{jpeg_dir}/{row[0]}/{row[1]}/*{row[2]}.jpg")[0])
        label = row[3:].astype(int) 
        UID_new = row[0:3]                                        #Changed: As we need image UIDs
        label[2:] = label[2:] if label[0]==1 else 0
        if self.transforms:
            img = self.transforms(image=img)['image']
        if self.preprocessing:
            img = self.preprocessing(image=img)['image']
        return img,torch.from_numpy(label.reshape(-1)),UID_new

    def __len__(self):
        return len(self.df)
    
    def update_train_df(self):
        df0 = self.df_main[self.df_main[:,3]==0]
        df1 = self.df_main[self.df_main[:,3]==1]
        #np.random.shuffle(df0)
        self.df = np.concatenate([df0[:len(df1)],df1],axis=0)
        

def norm(img):
    img-=img.min()
    return img/img.max()

StudyInstanceUID = list(set(train_df['StudyInstanceUID']))
print(len(StudyInstanceUID))
t_df = train_df[train_df['StudyInstanceUID'].isin(StudyInstanceUID[0:6500])]
v_df = train_df[train_df['StudyInstanceUID'].isin(StudyInstanceUID[6500:])]

class config:
    model_name="resnet18"
    batch_size = 1
    WORKERS = 4
    classes =14
    resume = False
    epochs = 10
    MODEL_PATH = 'log/cpt'
    if not os.path.exists(MODEL_PATH):
        os.makedirs(MODEL_PATH)
        
preprocessing_fn = functools.partial(preprocess_input, **formatted_settings)
train_dataset = CTDataset2D(t_df,
                            transforms=get_training_augmentation(),
                            preprocessing=get_preprocessing(preprocessing_fn),mode='train')
val_dataset = CTDataset2D(v_df,
                            transforms=get_validation_augmentation(),
                            preprocessing=get_preprocessing(preprocessing_fn))

train = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.WORKERS, pin_memory=True)
val = DataLoader(val_dataset, batch_size=config.batch_size*2, shuffle=False, num_workers=config.WORKERS, pin_memory=True)

x,y,Uid = train_dataset[-400]
x.shape,len(y),y,len(train_dataset), len(val_dataset)

In [None]:
plt.imshow(np.swapaxes(x,0,2))

In [None]:

"""
This script reads training and validation images from folders and save them as csv files

@author: Dipu
"""
import torch.nn as nn


from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os



In [None]:
plt.ion()   # interactive mode
model_ft = models.resnet50(pretrained=True)

''' NullNet forwards the input(features) to output '''
class NullNet(nn.Module): 
    def __init__(self):
        super(NullNet, self).__init__()
    def forward(self, x):
        return x
    
    
model_ft.fc = NullNet()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_ft = model_ft.to(device)

print(device)
import torchvision.transforms as T
trf = T.Compose([T.ToTensor()])


#%%


import csv

since = time.time()
iter1=0
file = open('train_features.csv', 'a+', newline ='')
# writing the data into the file 
with file:     
    write = csv.writer(file) 
    
    for inputs, labels,UIDs_new in train_dataset:
        inp = np.swapaxes(inputs,0,2)
        inp=trf(inp).unsqueeze(0)
        #print('Input Size',inp.size())
        inputs = inp.to(device)
        outputs = model_ft(inputs)
        #print(outputs[1,0])
        outputs = torch.flatten(outputs)


        output_format = np.concatenate((UIDs_new,labels.numpy(),outputs.cpu().data.numpy()),axis=0)
        write.writerows([output_format]) 

        iter1 = iter1 + 1

        if iter1 % 20000 == 500:
            time_elapsed = time.time() - since
            print('Time from start {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
            print('Percentage complete: {:4f}'.format(100*iter1/(len(val_dataset)+len(train_dataset))))



            #break # delete this break to save features from the entire dataset


    for inputs, labels,UIDs_new in val_dataset:
        inp = np.swapaxes(inputs,0,2)
        inp=trf(inp).unsqueeze(0)
        #print('Input Size',inp.size())
        inputs = inp.to(device)
        outputs = torch.flatten(model_ft(inputs))

        output_format = np.concatenate((UIDs_new,labels.numpy(),outputs.cpu().data.numpy()),axis=0)
        write.writerows([output_format]) 
        iter1 = iter1 + 1

        if iter1 % 20000 == 500:
            time_elapsed = time.time() - since
            print('Time from start {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
            print('Percentage complete: {:4f}'.format(100*iter1/(len(val_dataset)+len(train_dataset))))



            #break # delete this break to save features from the entire dataset