In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torch import nn
from sklearn.model_selection import train_test_split
from glob import glob
import shutil
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm
import seaborn as sns
import json
import math
from albumentations import Compose
import albumentations as A
from PIL import Image
import os
import warnings
from multiprocessing import Pool

import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm

In [None]:
CFG = {
    "model": 'tf_efficientnet_b4_ns',
    #"img_size": 512,
    # "img_size": 256,
    "img_size": 224,
    "epochs": 5,
    # 'lr': 1e-5,
    'lr': .001,
    "n_class": 4,
    "batch_size": 8,
    "num_workers": 4,
    'device': 'cuda:0'
}

In [None]:
import cv2


img_add = '../input/siim-covid19-resized-to-512px-png/train/001398f4ff4f.png'
img = cv2.imread(img_add, cv2.IMREAD_COLOR)

# cv2.imshow('image',img)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
im = img_gray > 0.1 * np.mean(img_gray[img_gray != 0])
row_sums = np.sum(im, axis=1)
col_sums = np.sum(im, axis=0)
rows = np.where(row_sums > img.shape[1] * 0.03)[0]
cols = np.where(col_sums > img.shape[0] * 0.03)[0]
min_row, min_col = np.min(rows), np.min(cols)
max_row, max_col = np.max(rows), np.max(cols)
print(max_row)


In [None]:
def trim1(im):
    
    percentage = 0.03

    img = np.array(im)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    im = img_gray > 0.1 * np.mean(img_gray[img_gray != 0])
    row_sums = np.sum(im, axis=1)
    col_sums = np.sum(im, axis=0)
    rows = np.where(row_sums > img.shape[1] * percentage)[0]
    cols = np.where(col_sums > img.shape[0] * percentage)[0]
    min_row, min_col = np.min(rows), np.min(cols)
    max_row, max_col = np.max(rows), np.max(cols)
    im_crop = img[min_row : max_row + 1, min_col : max_col + 1]
    return Image.fromarray(im_crop)

def resize_maintain_aspect(image, desired_size):
    
    
    old_size = image.size  # old_size[0] is in (width, height) format
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])
    im = image.resize(new_size, Image.ANTIALIAS)
    new_im = Image.new("RGB", (desired_size, desired_size))
    new_im.paste(im, ((desired_size - new_size[0]) // 2, (desired_size - new_size[1]) // 2))
    return new_im

def save_single(args):
    img_file, input_path_folder, output_path_folder, output_size = args
    image_original = Image.open(os.path.join(input_path_folder, img_file))
    image = trim1(image_original)
    image = resize_maintain_aspect(image, desired_size=output_size[0])
    image.save(os.path.join(output_path_folder + img_file))
    

def fast_image_resize(input_path_folder, output_path_folder, output_size=None):

    if not output_size:
        warnings.warn("Need to specify output_size! For example: output_size=100")
        exit()

    if not os.path.exists(output_path_folder):
        os.makedirs(output_path_folder)

    jobs = [
        (file, input_path_folder, output_path_folder, output_size)
        for file in os.listdir(input_path_folder)
    ]

    with Pool() as p:
        list(tqdm(p.imap_unordered(save_single, jobs), total=len(jobs)))


# if __name__ == "__main__":
# fast_image_resize("../train/images/", "../train/images_resized_150/", output_size=(150, 150))
# fast_image_resize("../test/images/", "../test/images_resized_150/", output_size=(150, 150))





In [None]:
train_dir = "../input/siim-covid19-detection/train"
train_image_level = pd.read_csv("../input/siim-covid19-detection/train_image_level.csv")
train_study_level = pd.read_csv("../input/siim-covid19-detection/train_study_level.csv")
def trim(x):
   return x[:-6]

train_study_level['id'] = train_study_level['id'].apply(trim)
train_study_level['StudyInstanceUID'] = train_study_level['id']

train_data = train_image_level.merge(train_study_level, on='StudyInstanceUID', how='inner')
del train_data['id_y']
train_data['id_x'] = train_data['id_x'].apply(trim)
train_data = train_data.rename(columns={'id_x':"image_id"})
meta = pd.read_csv("../input/siim-covid19-resized-to-512px-png/meta.csv")
train_data = train_data.merge(meta, on='image_id', how='inner')

# train_data = pd.read_csv("../input/trained-data-an-scaled-v3csv/train_data_an_scaled_v3.csv")



#train_data
# boxes_split=train_data.boxes.str.split(expand=True)
# train_data = train_data.join(boxes_split)
# train_data.to_csv('train_data_an.csv')

train_data.head()


In [None]:
del train_data['split']
del train_data['boxes']
del train_data['label']

# train_data = train_data.drop(columns = ["split", "boxes", "label"])

train_data['image_path'] = '../input/siim-covid19-resized-to-512px-png/train/'+train_data['image_id']+'.png'
train_data["class_id"] = 0*train_data["Negative for Pneumonia"] + 1*train_data["Typical Appearance"] + 2*train_data["Indeterminate Appearance"] + 3*train_data["Atypical Appearance"]
train_data = train_data.drop(columns=["Negative for Pneumonia", "Typical Appearance", "Indeterminate Appearance", "Atypical Appearance"])

In [None]:
classes = [
    "negative",
    "typical",
    "indeterminate",
    "atypical"
]

# train, test = train_test_split(train_data, test_size=0.5, random_state=42)
train, val = train_test_split(train_data, test_size=0.3, random_state=42)
train = train.reset_index()
val = val.reset_index()

In [None]:
val.head()

In [None]:
class ImageDataset(Dataset):
    def __init__(self, df, output_label=True, transform=None):
        super().__init__()
        self.df = df
        self.image_paths = df['image_path']
        self.output_label = output_label
        if output_label:
            self.labels = df['class_id']
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        if self.output_label:
            label = self.labels[index]
        img_path = self.df.loc[index]['image_path']
        image = mpimg.imread(img_path)
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        image = np.array([image, image, image])
        if self.output_label:
            return image, label
        else:
            return image
        
train_transforms = Compose([
    A.Transpose(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5),
    # A.Resize(width = 256, height = 256),
    # A.Normalize( ),
    #        mean=[0.3199, 0.2240, 0.1609],
    #        std=[0.3020, 0.2183, 0.1741],
    #        max_pixel_value=255.0,
    #    ),
    A.Resize(width = 224, height = 224),
    A.Normalize( 
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
        ),
    A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
], p=1.)

In [None]:
train_ds = ImageDataset(train, train_transforms)
valid_ds = ImageDataset(val)

train_ds
train_loader = torch.utils.data.DataLoader(
    train_ds,
    batch_size=CFG['batch_size'],
    pin_memory=False,
    drop_last=False,
    shuffle=True,        
    num_workers=CFG['num_workers'],
)
val_loader = torch.utils.data.DataLoader(
    valid_ds, 
    batch_size=CFG['batch_size'],
    pin_memory=False,
    shuffle=False,
    num_workers=CFG['num_workers'],        
)

In [None]:
class CovidClassifier(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, n_class)
    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
def train_epoch(model, loss_fn, optimizer, scheduler, loader, device):
    model.train()
    
    pbar = tqdm(enumerate(loader), total=len(loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device)
        image_labels = image_labels.to(device)
        image_preds = model(imgs)
        loss = loss_fn(image_preds, image_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
#         scheduler.step()
        
def validate_epoch(model, loss_fn, loader, device):
    model.eval()
    test_loss, correct = 0, 0
    size = len(loader.dataset)
    pbar = tqdm(enumerate(loader), total=len(loader))
    with torch.no_grad():
        for step, (imgs, image_labels) in pbar:
            imgs = imgs.to(device)
            image_labels = image_labels.to(device)
            image_preds = model(imgs)
            loss = loss_fn(image_preds, image_labels)
            test_loss+=loss.item()
            correct+=(image_preds.argmax(1) == image_labels).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
device = torch.device(CFG['device'])
model = CovidClassifier(CFG["model"], CFG["n_class"]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=CFG['lr'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_loader)/2)
loss_fn = nn.CrossEntropyLoss().to(device)
for epoch in range(CFG["epochs"]):
    train_epoch(model, loss_fn, optimizer, scheduler, train_loader, device)
    validate_epoch(model, loss_fn, val_loader, device)

In [None]:
torch.save(model.state_dict(), "model1")