In [1]:
import numpy as np 
import pandas as pd 
import os
import cv2
import torch.nn.init as init
import torch
import torch.nn as nn
# import timm

In [2]:
from PIL import Image, ImageFilter
from sklearn.model_selection import train_test_split, StratifiedKFold
from torch.utils.data import Dataset
from torchvision import transforms
from torch.optim import Adam, SGD, RMSprop
import time

In [3]:
from torch.autograd import Variable
import torch.functional as F
from tqdm import tqdm
from sklearn import metrics
import urllib
import pickle
import cv2
import torch.nn.functional as F

In [4]:
from torchvision import models
import seaborn as sns
import random

In [5]:
from sklearn.metrics import roc_auc_score
import sys
sys.path.append('./pytorch-auto-augment')
from auto_augment import AutoAugment, Cutout

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True

In [7]:
train_path = r'C:\Users\Xing\Projects\SIIM2020\data_512\train'
test_path = r'C:\Users\Xing\Projects\SIIM2020\data_512\test'
train_csv = pd.read_csv(r'C:\Users\Xing\Projects\SIIM2020\data\train.csv')
test_csv = pd.read_csv(r'C:\Users\Xing\Projects\SIIM2020\data\test.csv')
sample = pd.read_csv(r'C:\Users\Xing\Projects\SIIM2020\data\sample_submission.csv')

In [8]:
train_csv.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0


In [9]:
class MyDataset(Dataset):
    
    def __init__(self, dataframe, transform=None, test=False):
        self.df = dataframe
        self.transform = transform
        self.test = test
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        
        label = self.df.target.values[idx]
        p = self.df.image_name.values[idx]
        
        if self.test == False:
            p_path = train_path + p + '.png'
        else:
            p_path = test_path + p + '.png'
            
        image = cv2.imread(p_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#         image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        image = transforms.ToPILImage()(image)
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [11]:
seed_everything(2020)
num_classes = 2
bs = 80
lr = 1e-3
IMG_SIZE = 512

In [12]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    AutoAugment(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])


testset      = MyDataset(sample, transform=test_transform, test=True)
test_loader  = torch.utils.data.DataLoader(testset, batch_size=bs, shuffle=False, num_workers=4)

In [13]:
class AverageMeter:
    """
    Computes and stores the average and current value
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [14]:
def train_model(model, epoch):
    model.train() 
    
    losses = AverageMeter()
    avg_loss = 0.

    optimizer.zero_grad()
    
    tk = tqdm(train_loader, total=len(train_loader), position=0, leave=True)
    for idx, (imgs, labels) in enumerate(tk):
        print(idx)
        imgs_train, labels_train = imgs.cuda(), labels.cuda().long()
        output_train = model(imgs_train)

        loss = criterion(output_train, labels_train)
        loss.backward()

        optimizer.step() 
        optimizer.zero_grad() 
        
        avg_loss += loss.item() / len(train_loader)
        
        losses.update(loss.item(), imgs_train.size(0))

        tk.set_postfix(loss=losses.avg)
        
    return avg_loss


def test_model(model):    
    model.eval()
    
    losses = AverageMeter()
    avg_val_loss = 0.
    
    valid_preds, valid_targets = [], []
    
    with torch.no_grad():
        tk = tqdm(val_loader, total=len(val_loader), position=0, leave=True)
        for idx, (imgs, labels) in enumerate(tk):
            imgs_valid, labels_valid = imgs.cuda(), labels.cuda().long()
            output_valid = model(imgs_valid)
            
            loss = criterion(output_valid, labels_valid)
            
            avg_val_loss += loss.item() / len(val_loader)

            losses.update(loss.item(), imgs_valid.size(0))
            
            tk.set_postfix(loss=losses.avg)
            
            valid_preds.append(torch.softmax(output_valid,1)[:,1].detach().cpu().numpy())
            valid_targets.append(labels_valid.detach().cpu().numpy())
            
        valid_preds = np.concatenate(valid_preds)
        valid_targets = np.concatenate(valid_targets)
        auc =  roc_auc_score(valid_targets, valid_preds) 
            
    return avg_val_loss, auc

In [15]:
kf = StratifiedKFold(5, shuffle=True, random_state=0)

cv = []

In [16]:
sys.path.append('./EfficientUnet-PyTorch/')
from efficientunet import *

In [20]:
fold = 0

for trn_ind, val_ind in kf.split(train_csv.image_name, train_csv.target):
    fold += 1
    print('fold:', fold)

    train_df = train_csv.loc[trn_ind]
    val_df = train_csv.loc[val_ind]
    train_df.reset_index(drop=True, inplace=True)
    val_df.reset_index(drop=True, inplace=True)

    trainset = MyDataset(train_df, transform=train_transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True, num_workers=4)
   
    valset = MyDataset(val_df, transform=test_transform)
    val_loader = torch.utils.data.DataLoader(valset, batch_size=bs, shuffle=False, num_workers=4)

#     model = timm.create_model('tf_efficientnet_b3_ns', pretrained=True, num_classes=num_classes)
    model = EfficientNet.from_name('efficientnet-b5', n_classes=2, pretrained=False).cuda()
#     model.cuda()
    
    os.environ["CUDA_VISIBLE_DEVICES"] = '3,2,1,0'
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model).cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.001)
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.3)

    best_auc = 0
    n_epochs = 20
    es = 0

    for epoch in range(n_epochs):
        avg_loss = train_model(model, epoch)
        avg_val_loss, auc = test_model(model)

        if auc > best_auc:
            best_auc = auc
            torch.save(model.state_dict(), str(fold) + 'weight.pt')
        else:
            es += 1
            if es > 1:
                break
        print('current_val_auc:', auc, 'best_val_auc:', best_auc)
        
        scheduler.step()

    cv.append(best_auc)

fold: 1


  0%|                                                                                          | 0/332 [00:00<?, ?it/s]

BrokenPipeError: [Errno 32] Broken pipe