In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    print(dirname, len(filenames))

# You can write up to 20GB to the current directory /kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim 
import torch.nn.functional as F
import torchvision

from torch.utils.data import  TensorDataset, DataLoader 
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device", device)

from cv2 import cv2
from PIL import Image
import matplotlib.pyplot as plt

import time
import copy
import random

from tqdm import tqdm

random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

%matplotlib inline

In [None]:
train_path = "/kaggle/input/aptos2019-blindness-detection/train_images/"
test_path = "/kaggle/input/aptos2019-blindness-detection/test_images/"
train_data = pd.read_csv("../input/aptos2019-blindness-detection/train.csv")
test_data = pd.read_csv("../input/aptos2019-blindness-detection/test.csv")

In [None]:
print(train_data.shape)
print(train_data.head(10))
print(test_data.shape)
print(test_data.head(10))

train_data["diagnosis"].value_counts().plot(kind="pie")

# grayscale

In [None]:
def crop_image1(img,tol=7):
    # img is image data
    # tol  is tolerance        
    mask = img>tol
    return img[np.ix_(mask.any(1),mask.any(0))]

def crop_image(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        h,w,_=img.shape
#         print(h,w)
        img1=cv2.resize(crop_image1(img[:,:,0]),(w,h))
        img2=cv2.resize(crop_image1(img[:,:,1]),(w,h))
        img3=cv2.resize(crop_image1(img[:,:,2]),(w,h))
        
#         print(img1.shape,img2.shape,img3.shape)
        img[:,:,0]=img1
        img[:,:,1]=img2
        img[:,:,2]=img3
        return img
    
fig = plt.figure(figsize=(25,16))

for class_id in sorted(train_data['diagnosis'].unique()):
    for i, (idx, row) in enumerate(train_data.loc[train_data['diagnosis'] == class_id].sample(5).iterrows()):
        ax = fig.add_subplot(5, 5, class_id * 5 + i + 1, xticks=[], yticks=[])
        img = cv2.imread(train_path +'/' + row['id_code'] +'.png')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = crop_image(img)
        img = cv2.resize(img,(300,300))
        ax.imshow(img,cmap='gray')
        ax.set_title('Label: %d-%d-%s' % (class_id, idx, row['id_code']) )

# ben color

In [None]:
def load_ben_color(path, sigmaX=10):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image(image)
    image = cv2.resize(image, (300, 300))
    image = cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , sigmaX) ,-4 ,128)        
    return image


fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_data['diagnosis'].unique()):
    for i, (idx, row) in enumerate(train_data.loc[train_data['diagnosis'] == class_id].sample(5).iterrows()):
        ax = fig.add_subplot(5,5, class_id * 5 + i + 1, xticks=[], yticks=[])
        path=f"../input/aptos2019-blindness-detection/train_images/{row['id_code']}.png"
        image = load_ben_color(path,sigmaX=30)
        plt.imshow(image)
        ax.set_title('%d-%d-%s' % (class_id, idx, row['id_code']))

In [None]:
def crop_image1(img,tol=7):
    # img is image data
    # tol  is tolerance        
    mask = img>tol
    return img[np.ix_(mask.any(1),mask.any(0))]

def crop_image(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        h,w,_=img.shape
#         print(h,w)
        img1=cv2.resize(crop_image1(img[:,:,0]),(w,h))
        img2=cv2.resize(crop_image1(img[:,:,1]),(w,h))
        img3=cv2.resize(crop_image1(img[:,:,2]),(w,h))
        
#         print(img1.shape,img2.shape,img3.shape)
        img[:,:,0]=img1
        img[:,:,1]=img2
        img[:,:,2]=img3
        return img

In [None]:
def circle_crop(path):
    img = cv2.imread(path)
    img = crop_image(img)    

    height, width, depth = img.shape    
    
    x = int(width/2)
    y = int(height/2)
    r = np.amin((x,y))
    
    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
    img = crop_image(img)
    return img

def change_ben_color(image, sigmaX=10):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
    image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
    return image
def change_gray_color(image, sigmaX=10):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
    image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
    return image

fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_data['diagnosis'].unique()):
    for i, (idx, row) in enumerate(train_data.loc[train_data['diagnosis'] == class_id].sample(5).iterrows()):
        ax = fig.add_subplot(5,5, class_id * 5 + i + 1, xticks=[], yticks=[])
        path=f"../input/aptos2019-blindness-detection/train_images/{row['id_code']}.png"
        image = circle_crop(path)
        #image = change_gray_color(image, 40)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = change_ben_color(image, 30)
        image = cv2.resize(image, (300, 300))
        plt.imshow(image, cmap='gray')
        
        ax.set_title('%d-%d-%s' % (class_id, idx, row['id_code']))

In [None]:
from sklearn.model_selection import train_test_split
train_data['id_code'],val_set,train_data['diagnosis'],val_label = train_test_split(train_data['id_code'], train_data['diagnosis'],test_size=0.9, random_state=42)
train_data=train_data.dropna()
train_data=train_data.reset_index(drop=True)
train_data.head(10)

# augmentation 1 
# transforms

In [None]:
transform = torchvision.transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(), #0.5
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 1. 색 변환 x

In [None]:
from PIL import Image

class Dataset1():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img
    
    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        img = Image.fromarray(img).convert('RGB')
        x = self.transform(img)
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset1(train_data, train_path, transform)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
x,y=train_dataset.__getitem__(0)
print(x.shape)
print(y.shape)

In [None]:
images,labels=next(iter(dataloader))
images.shape,labels.shape

In [None]:
model_ft=models.resnet18(pretrained=True)

In [None]:
params=list(model_ft.parameters())
len(params),params[0].size()

In [None]:
num_features=model_ft.fc.in_features
model_ft.fc=nn.Linear(num_features,1)

model_ft=model_ft.to(device)

In [None]:
criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model_ft.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model_ft.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model_ft(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model_ft.state_dict(), "model1.bin")

# 2. change ben color

In [None]:
from PIL import Image

class Dataset2():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img

    def change_ben_color(self,image, sigmaX=30):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
        image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
        return image
    
    def change_gray_color(self,image, sigmaX=40):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
        image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
        return image
    
    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        img = self.change_ben_color(img) 
        #img = self.change_gray_color(img) 
        img = Image.fromarray(img).convert('RGB')
        x = self.transform(img)
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset2(train_data, train_path, transform)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
model2=models.resnet18(pretrained=True)

num_features=model2.fc.in_features
model2.fc=nn.Linear(num_features,1)

model2=model2.to(device)

criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model2.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model2.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model2(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model2.state_dict(), "model2.bin")

# 3. change gray color

In [None]:
from PIL import Image

class Dataset3():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img

    def change_ben_color(self,image, sigmaX=30):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
        image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
        return image
    
    def change_gray_color(self,image, sigmaX=40):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
        image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
        return image
    
    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        #img = self.change_ben_color(img) 
        img = self.change_gray_color(img) 
        img = Image.fromarray(img).convert('RGB')
        x = self.transform(img)
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset3(train_data, train_path, transform)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
model3=models.resnet18(pretrained=True)

num_features=model3.fc.in_features
model3.fc=nn.Linear(num_features,1)

model3=model3.to(device)

criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model3.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model3.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model3(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model3.state_dict(), "model3.bin")

# augmentation 2 
# 1. albumentation

In [None]:
import albumentations
import albumentations.pytorch

In [None]:
'''
transform = torchvision.transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(), #0.5
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
'''

In [None]:
# 이거 ~~!~~~~~~~~!!@!!!

albumentations_transform = albumentations.Compose([
    albumentations.Resize(224,224), 
    albumentations.HorizontalFlip(), # Same with transforms.RandomHorizontalFlip()
    albumentations.RandomRotate90(),
    albumentations.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    albumentations.pytorch.transforms.ToTensorV2()
])

In [None]:

class Dataset4():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img

    def change_ben_color(self,image, sigmaX=30):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
        image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
        return image
    
    def change_gray_color(self,image, sigmaX=40):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
        image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
        return image
    
    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        x = self.transform(image=img)['image'] #dictionary 
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset4(train_data, train_path, albumentations_transform)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
model4=models.resnet18(pretrained=True)

num_features=model4.fc.in_features
model4.fc=nn.Linear(num_features,1)

model4=model3.to(device)

criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model4.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
images,labels=next(iter(dataloader))
print(images.shape,labels.shape)

params=list(model4.parameters())
len(params),params[0].size()

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model4.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model4(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model4.state_dict(), "model4.bin")

# 2. albumentation One of

1.ResizedRandomCrop

In [None]:
albumentations_transform_oneof = albumentations.Compose([
    albumentations.Resize(256, 256), 
    albumentations.RandomCrop(224, 224),
    albumentations.OneOf([
                          albumentations.HorizontalFlip(p=1),
                          albumentations.RandomRotate90(p=1),
                          albumentations.VerticalFlip(p=1)            
                ], p=1),
    albumentations.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    albumentations.pytorch.transforms.ToTensorV2()
])

In [None]:
class Dataset5():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img

    def change_ben_color(self,image, sigmaX=30):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
        image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
        return image
    
    def change_gray_color(self,image, sigmaX=40):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.addWeighted(image,4, cv2.GaussianBlur(image , (0,0),sigmaX) ,-4 ,128)        
        image = cv2.addWeighted(image,-4, cv2.GaussianBlur(image , (0,0),sigmaX) ,4 ,128)        
        return image
    
    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        #img = self.change_ben_color(img) 
        #img = self.change_gray_color(img) 
        x = self.transform(image=img)['image'] #dictionary 
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset5(train_data, train_path, albumentations_transform_oneof)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
model5=models.resnet18(pretrained=True)

num_features=model5.fc.in_features
model5.fc=nn.Linear(num_features,1)

model5=model5.to(device)

criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model5.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model5.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model5(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model5.state_dict(), "model5.bin")

2. just resize

In [None]:
albumentations_transform_resize = albumentations.Compose([
    albumentations.Resize(224, 224),
    albumentations.OneOf([
                          albumentations.HorizontalFlip(p=1),
                          albumentations.RandomRotate90(p=1),
                          albumentations.VerticalFlip(p=1)            
                ]),
    albumentations.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    albumentations.pytorch.transforms.ToTensorV2()
])

In [None]:
class Dataset6():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img

    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        x = self.transform(image=img)['image'] #dictionary 
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset6(train_data, train_path, albumentations_transform_resize)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
model6=models.resnet18(pretrained=True)

num_features=model6.fc.in_features
model6.fc=nn.Linear(num_features,1)

model6=model6.to(device)

criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model6.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model6.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model6(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model6.state_dict(), "model6.bin")

3. p =1

In [None]:
albumentations_transform_ = albumentations.Compose([
    albumentations.Resize(224, 224),
    albumentations.OneOf([
                          albumentations.HorizontalFlip(p=1),
                          albumentations.RandomRotate90(p=1),
                          albumentations.VerticalFlip(p=1)            
                ],p=1),
    albumentations.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    albumentations.pytorch.transforms.ToTensorV2()
])

In [None]:
class Dataset7():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img

    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        x = self.transform(image=img)['image'] #dictionary 
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset7(train_data, train_path, albumentations_transform_)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
model7=models.resnet18(pretrained=True)

num_features=model7.fc.in_features
model7.fc=nn.Linear(num_features,1)

model7=model7.to(device)

criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model7.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model7.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model7(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model7.state_dict(), "model7.bin")

4. not augmented

In [None]:
albumentations_transform__ = albumentations.Compose([
    albumentations.Resize(224, 224),
    albumentations.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    albumentations.pytorch.transforms.ToTensorV2()
])

In [None]:
class Dataset8():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img

    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        x = self.transform(image=img)['image'] #dictionary 
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset8(train_data, train_path, albumentations_transform__)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
model8=models.resnet18(pretrained=True)

num_features=model8.fc.in_features
model8.fc=nn.Linear(num_features,1)

model8=model8.to(device)

criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model8.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model8.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model8(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model8.state_dict(), "model8.bin")

# augmentation 3

# randaugment

- batch를 추출할 때마다 여러 Augmentation 옵션들 중에서 random하게 추출해서 적용
- 전체 transform 중에 몇 개씩 뽑을 지(N)와 Augmentation의 강도를 어느 정도로 줄지(M)이 hyper parameter

In [None]:
!pip install randaugment
from randaugment import RandAugment

In [None]:

transform = torchvision.transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(), #0.5
    RandAugment(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
class Dataset9():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img

    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        img = Image.fromarray(img).convert('RGB')
        x = self.transform(img)
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset9(train_data, train_path, transform)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

model9=models.resnet18(pretrained=True)

num_features=model9.fc.in_features
model9.fc=nn.Linear(num_features,1)

model9=model9.to(device)

criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model9.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model9.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model9(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model9.state_dict(), "model9.bin")

# augmentation 4
# uniform augmentation
- search 없이 random하게 augmentation을 확률적으로 적용


In [None]:
!pip install git+https://github.com/tgilewicz/uniformaugment/
from UniformAugment import UniformAugment

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Add UniformAugment with num_ops hyperparameter (num_ops=2 is optimal)
transform.transforms.insert(0, UniformAugment())

In [None]:
class Dataset10():
    def __init__(self, data, root, transform):
        self.files = list(root + data['id_code'] + '.png')
        self.targets = data['diagnosis']
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def circle_crop(self,path):
        img = cv2.imread(path)    
        height, width, depth = img.shape       
        x = int(width/2)
        y = int(height/2)
        r = np.amin((x,y))
        circle_img = np.zeros((height, width), np.uint8)
        cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
        img = cv2.bitwise_and(img, img, mask=circle_img)
        img = crop_image(img)
        return img

    def __getitem__(self, idx):
        img = self.circle_crop(self.files[idx])
        img = Image.fromarray(img).convert('RGB')
        x = self.transform(img)
        y = torch.tensor(self.targets[idx]).unsqueeze(0).float()
        return x, y

In [None]:
train_dataset = Dataset10(train_data, train_path, transform)
dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

model10=models.resnet18(pretrained=True)

num_features=model10.fc.in_features
model10.fc=nn.Linear(num_features,1)

model10=model10.to(device)

criterion =nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-4, params=model10.parameters())
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

In [None]:
since = time.time()
criterion = torch.nn.MSELoss()
num_epochs = 15
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    model10.train()
    running_loss = 0.0
    tk0 = tqdm(dataloader, total=int(len(dataloader)))
    counter = 0
    for bi, (d, t) in enumerate(tk0):
        inputs = d
        labels = t
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model10(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * dataloader.batch_size)))
    epoch_loss = running_loss / len(dataloader)
    print('Training Loss: {:.4f}'.format(epoch_loss))
    scheduler.step()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model10.state_dict(), "model10.bin")