### Installing XLA

In [None]:
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev

In [None]:
!export XLA_USE_BF16=1

### Importing Libraries

In [None]:

from PIL import Image
import numpy as np
import pandas as pd
import time
import os
import torch
import torch.nn as nn
import torch.utils.data as D
from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau

from torchvision import models as M, transforms as T

from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, GroupKFold
from glob import glob

import torch_xla.core.xla_model as xm
import torch_xla.distributed.parallel_loader as pl
import torch_xla.distributed.xla_multiprocessing as xmp

import gc
import matplotlib.pyplot as plt
import pickle
import warnings
warnings.filterwarnings("ignore")
from joblib import Parallel, delayed
from sklearn.metrics import accuracy_score, roc_auc_score
print("Required libraries installed /-\-/-\...")

In [None]:
!pip install efficientnet_pytorch

In [None]:
from efficientnet_pytorch import EfficientNet

### Seeding

In [None]:
def seed_torch(seed_value):
    #random.seed(seed_value) # Python
    np.random.seed(seed_value) # cpu vars
    torch.manual_seed(seed_value) # cpu  vars    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value) # gpu vars
    if torch.backends.cudnn.is_available:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
seed_torch(42)

### Reading CSV's

In [None]:
BASE_PATH = "../input/jpeg-melanoma-256x256"
df_train = pd.read_csv(BASE_PATH + "/train.csv")
df_test = pd.read_csv(BASE_PATH + "/test.csv")
df_sub = pd.read_csv(BASE_PATH + "/sample_submission.csv")

In [None]:
temp = plt.imread("../input/siic-isic-224x224-images/train/ISIC_4232172.png")
plt.xticks([])
plt.yticks([])
plt.imshow(temp)

In [None]:
groups_by_patient = df_train.patient_id.copy().to_list()

### Training Images

In [None]:
BINGO_PATH = "/kaggle/input/siic-isic-224x224-images"

### Defining Architecture

In [None]:
classes = 2

class Net(nn.Module):
    def __init__(self, arch):
        super(Net, self).__init__()
        self.arch = arch
        self.arch._avg_pool = nn.modules.pooling.AdaptiveAvgPool2d(output_size = 32)
        self.arch._fc = nn.Linear(in_features = 1280, out_features = 2, bias = True)
        
    def forward(self, x):
        x = self.arch(x)
        return x

### Preparing Data

In [None]:
class ImagesDS(D.Dataset):
    def __init__(self, df, dir, mode = "train", transforms = None):
        self.records = df.to_records(index = False)
        self.mode = mode
        self.dir = dir
        self.len = df.shape[0]
        self.transforms = transforms
        
    @staticmethod
    def _load_train_img_as_tensor(filename):
        with Image.open(filename) as img:
            return T.Compose([
                T.RandomResizedCrop(size = 224, scale = (0.7, 1.0)), 
                                  T.RandomHorizontalFlip(), 
                                  T.RandomVerticalFlip(), 
                                  T.ColorJitter(brightness = 32. / 255., saturation = 0.5),
                                  T.ToTensor(), 
                                  T.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])(img)
    
    @staticmethod
    def _load_test_img_as_tensor(filename):
        with Image.open(filename) as img:
            return T.Compose([T.ToTensor(), T.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])(img)
    
    def _get_image_path(self, index):
        image_id = self.records[index].image_name
        return "/".join([self.dir, self.mode, f"{image_id}.png"])
    
    
    def __getitem__(self, index):
        path = self._get_image_path(index)
        
        if self.transforms == "train":
            img = self._load_train_img_as_tensor(path)
        else:
            img = self._load_test_img_as_tensor(path)

        if self.mode == "train":
            return img, self.records[index].target
        else:
            return img
        
    def __len__(self):
        return self.len

In [None]:
class AverageMeter:
    """
    Computes and stores the average and current value
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
def reduce_fn(values):
    return sum(values)/len(values)

In [None]:
gkf = GroupKFold(n_splits = 3)
cv = []

### Fitter

In [None]:
model = EfficientNet.from_pretrained("efficientnet-b0")
net = Net(arch = model)

In [None]:
class Fitter:
    def __init__(self, batch_size, epoch):
        self.device = xm.xla_device()
        xm.master_print(f'Fitter prepared. Device is {self.device}')
        xm.master_print(f"Device: {xm.get_ordinal()}, Num_Replicas: {xm.xrt_world_size()}")
        self.best_auc = 0
        self.n_epochs = epoch
        self.bs = batch_size
        
    def train_model(self, model, epoch, loader, device):
        model.train()
        losses = AverageMeter()
        avg_loss = 0
        for i, data in enumerate(loader.per_device_loader(device)):
            # Get the inputs
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            # Forward + backward + optimize
            self.optimizer.zero_grad()
        
            outputs = model(inputs)
            loss = self.criterion(outputs, labels)
            loss.backward()
            xm.optimizer_step(self.optimizer, barrier = True)
            reduced_loss = xm.mesh_reduce("loss_reduce", loss, reduce_fn)
            losses.update(reduced_loss.item(), inputs.size(0))
        return avg_loss

    def test_model(self, model, val_loader, device):
        model.eval()
    
        losses = AverageMeter()
        avg_val_loss = 0.
        valid_preds, valid_targets = [], []
    
        with torch.no_grad():
#         .per_device_loader(device)
            for i, data in enumerate(val_loader.per_device_loader(device)):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = self.criterion(outputs, labels)
            
#                 avg_val_loss += loss.item()/len(val_loader)
                reduced_loss = xm.mesh_reduce("loss_reduce", loss, reduce_fn)
                losses.update(reduced_loss.item(), inputs.size(0))
                valid_preds.append(torch.softmax(outputs, 1)[:, 1].detach().cpu().numpy())
                valid_targets.append(labels.detach().cpu().numpy())
            
            valid_preds = np.concatenate(valid_preds)
            valid_targets = np.concatenate(valid_targets)
            val_auc = roc_auc_score(valid_targets, valid_preds)
            val_acc = accuracy_score(valid_targets, np.round(valid_preds))
    
        return avg_val_loss, val_auc, val_acc

    def start(self):
        for fold, (train_idx, val_idx) in enumerate(gkf.split(X = np.zeros(len(df_train)), y = df_train["target"], groups = groups_by_patient), 1):
            xm.master_print("*"*40, "Fold ", fold, "*"*40)
            xm.master_print("xla:", xm.get_ordinal())
            self.best_auc = 0
            ds = ImagesDS(df_train.iloc[train_idx], BINGO_PATH, mode = "train", transforms = "train")
            ds_val = ImagesDS(df_train.iloc[val_idx], BINGO_PATH, mode = "train", transforms = "test")
            
            train_sampler = D.distributed.DistributedSampler(ds, num_replicas = xm.xrt_world_size(), rank = xm.get_ordinal(), shuffle = True)
            loader = D.DataLoader(ds, batch_size = self.bs, sampler = train_sampler, num_workers = 0)
            
            val_sampler = D.distributed.DistributedSampler(ds_val, num_replicas = xm.xrt_world_size(), rank = xm.get_ordinal(), shuffle = True)
 
            val_loader = D.DataLoader(ds_val, batch_size = self.bs, sampler = val_sampler, num_workers = 0)
    
#             model = EfficientNet.from_pretrained("efficientnet-b0")
#             net = Net(arch = model)
            net.to(self.device)
            
            self.criterion = nn.CrossEntropyLoss()
            self.optimizer = torch.optim.Adam(net.parameters(), lr = 0.0001)
            self.scheduler = ReduceLROnPlateau(self.optimizer, mode = "max", patience = 3, verbose = True, factor = 0.2)

            for epoch in range(self.n_epochs):
                para_loader = pl.ParallelLoader(loader, [self.device])
                avg_loss = self.train_model(net, epoch, para_loader, self.device)
            
                para_loader = pl.ParallelLoader(val_loader, [self.device])
                avg_val_loss, val_auc, val_acc = self.test_model(net, para_loader, self.device)

                if val_auc > self.best_auc:
                    self.best_auc = val_auc
                    xm.save(net.state_dict(), str(fold) + 'weight.pt')
                xm.master_print('current_val_auc: ', val_auc, '| best_val_auc: ', self.best_auc, "| Average loss: ", avg_loss, "| Average val loss: ", avg_val_loss, "| Validation accuracy: ", val_acc)
        
                self.scheduler.step(val_auc)

            cv.append(self.best_auc)

In [None]:
def _mp_fn(rank, flags):
    torch.set_default_tensor_type('torch.FloatTensor')
    fitter = Fitter(64, 5)
    if rank==0:
        time.sleep(1)
    fitter.start()
FLAGS={}
xmp.spawn(_mp_fn, args=(FLAGS,), nprocs=8, start_method='fork')

In [None]:
print(cv)

In [None]:
device = xm.xla_device()
model = EfficientNet.from_pretrained("efficientnet-b0")
model1 = Net(arch = model)
model1.to(device)
model1.load_state_dict(torch.load("./1weight.pt"))

model2 = Net(arch = model)
model2.to(device)
model2.load_state_dict(torch.load("./2weight.pt"))

model3 = Net(arch = model)
model3.to(device)
model3.load_state_dict(torch.load("./3weight.pt"))

In [None]:
model1.eval()
model2.eval()
model3.eval()

In [None]:
ds_test = ImagesDS(df_test, BINGO_PATH, mode = "test")
test_loader = D.DataLoader(ds_test, batch_size = batch_size, shuffle = False, num_workers = 4)

In [None]:
tta = 4
test_pred = np.zeros((len(df_test),))

with torch.no_grad():
    for i, data in enumerate(tqdm(test_loader, position = 0, leave = True)):
        images = data
        images = images.to(device)
        
        pred = (model1(images) + model2(images) + model3(images)) \
             + (model1(images) + model2(images) + model3(images)) \
            + (model1(images) + model2(images) + model3(images)) \
            + (model1(images) + model2(images) + model3(images))
        
        pred = torch.softmax(pred,1).cpu().detach().numpy()[:,1]
    
        test_pred[i*batch_size: (i+1)*batch_size] = pred

In [None]:
print(test_pred)

In [None]:
import seaborn as sns

In [None]:
sns.kdeplot(pd.Series(test_pred.reshape(-1, )))

In [None]:
df_sub.target = test_pred

In [None]:
df_sub.to_csv("submission.csv", index = False)