# EfficientNet Regression
In this Kernel I am taking the approach of an EfficientNet Architecture. There are some concepts included from iafoss' tiles. However, they are then aggregated to a single picture and fed through the network.

Afterwards I use an optimized threshold to turn the regression into a classification again.

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import fastai
from fastai.vision import *
from fastai.callbacks import SaveModelCallback
import os
#from sklearn.model_selection import KFold
from radam import *
from csvlogger import *
from mish_activation import *
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import cohen_kappa_score,confusion_matrix
import warnings
import scipy as sp
import skimage.io
import cv2

warnings.filterwarnings("ignore")

# remove this cell if run locally
!mkdir 'cache'
!mkdir 'cache/torch'
!mkdir 'cache/torch/checkpoints'
torch.hub.DEFAULT_CACHE_DIR = 'cache'

import PIL.Image
PIL.Image.MAX_IMAGE_PIXELS = 933120000

# EfficientNet imports
import sys
package_path = '../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master'
sys.path.append(package_path)
from efficientnet_pytorch import EfficientNet

from albumentations import Compose, Normalize, HorizontalFlip, VerticalFlip, Rotate, RandomScale
from albumentations.pytorch import ToTensorV2

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

SEED = 42
seed_everything(SEED)

In [None]:
PATH = "../input/"
os.listdir(PATH)

In [None]:
tile_sz = 224
bs = 8
n_epochs = 8
nfolds = 5
N = 12
LABELS = PATH + 'prostate-cancer-grade-assessment/train.csv'
IMAGES = PATH + 'prostate-cancer-grade-assessment/train_images/'

# Data Preparation and Cleaning
Data cleaning is based on this Kernel: https://www.kaggle.com/tanulsingh077/prostate-cancer-in-depth-understanding-eda-model

Thank you a lot for your EDA!

In [None]:
df = pd.read_csv(LABELS).set_index('image_id')

# Wrongly labeled data
wrong_label = df[(df['isup_grade'] == 2) & (df['gleason_score'] == '4+3')]
display(wrong_label)
df.drop([wrong_label.index[0]],inplace=True)
df = df.reset_index()

# incosistency with "0" and "negative"
df['gleason_score'] = df['gleason_score'].apply(lambda x: "0+0" if x=="negative" else x)

splits = StratifiedKFold(n_splits=nfolds, random_state=SEED, shuffle=True)
splits = list(splits.split(df,df.isup_grade))
folds_splits = np.zeros(len(df)).astype(np.int)

for i in range(nfolds):
    if i == nfolds-1:
        folds_splits[splits[i][1]] = 0
    else:    
        folds_splits[splits[i][1]] = 1
    
df['split'] = folds_splits
df.head(10)

## Data Loaders

In [None]:
def tile(img, sz=128, N=16):
    """ Subdivide large image in tiles and return most significant squares
    
    Params:
    img: large input image
    sz: size of tiles
    N: number of most important tiles
    
    Returns: list of N most significant tiles
    """
    shape = img.shape
    pad0,pad1 = (sz - shape[0]%sz)%sz, (sz - shape[1]%sz)%sz
    
    img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],constant_values=255)
    img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
    img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)
    
    if len(img) < N:
        img = np.pad(img,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=255)
        
    idxs = np.argsort(img.reshape(img.shape[0],-1).sum(-1))[:N]
    img = img[idxs]
    return img

In [None]:
class TrainDataset(Dataset):
    def __init__(self, df, labels, transform=None):
        self.df = df
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        """
        Prepares Items before feeding them in a neural network
        
        Params:
        self: class
        idx: current index of dataset
        
        Returns: List of images (tiles) and its label
        """
        file_name = self.df['image_id'].values[idx]
        file_path = IMAGES + f'{file_name}.tiff'
        images = skimage.io.MultiImage(file_path)[1]
        images = tile(images, sz=tile_sz, N=N)
        new_images = []
        for i,img in enumerate(images):
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # Transformations
            if self.transform:
                augmented = self.transform(image=img)
                img = augmented['image']
            
            new_images.append(img)
            
        label = torch.tensor(self.labels[idx]).float()
        new_images = torch.stack(new_images)
        return new_images, label

In [None]:
def get_transforms(*, data):
    """
    Get image transformation of data
    """
    assert data in ('train', 'valid')
    
    if data == 'train':
        return Compose([
            HorizontalFlip(p=0.25),
            Rotate(border_mode = cv2.BORDER_CONSTANT, value=(255,255,255), p=0.25),
            VerticalFlip(p=0.25),
            Normalize(),
            ToTensorV2(),
        ])
    
    elif data == 'valid':
        return Compose([
            Normalize(),
            ToTensorV2(),
        ])

In [None]:
train_dataset = TrainDataset(df, df["isup_grade"], transform=get_transforms(data='train'))
train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=False)

for x,y in train_loader:
    print(x.shape)
    fig, axs = plt.subplots(bs,N, figsize=(30, 12), facecolor='w', edgecolor='k')
    axs = axs.ravel()
    for j,batch in enumerate(x):
        for i,t in enumerate(batch):
            axs[j*N+i].imshow(t.permute(1,2,0).numpy())
    break

## Model

In [None]:
class Model(nn.Module):
    def __init__(self, pre=True):
        super().__init__()
        
        # Load model backbone
        backbone = EfficientNet.from_name('efficientnet-b0')
        
        # Get preloaded model
        if pre:
            backbone.load_state_dict(torch.load(PATH + 'efficientnet-pytorch/efficientnet-b0-08094119.pth'))
        
        # Encoder, runs through the pretrained efficientnet
        self.backbone = backbone
        
        # Neural network head. After running through the neural network, this is the transfer
        nc = list(backbone.children())[-1].in_features
        self.head = nn.Sequential(AdaptiveConcatPool2d(),
                                  Flatten(),
                                  nn.Linear(2*nc,512), 
                                  Mish(),
                                  nn.BatchNorm1d(512), 
                                  nn.Dropout(0.5),
                                  nn.Linear(512,1))
        
        
    def forward(self, x):
        """ Forward run through the neural network
        Params:
        x: torch tensor - input
        
        returns: output after feed forward.
        """
        # Reshape array
        shape = x.shape
        n = shape[1]
        x = x.view(-1,shape[2],shape[3],shape[4])
        
        #x: bs*N x 3 x 224 x 224
        # Go through convolutional layers
        x = self.backbone.extract_features(x)
        
        shape = x.shape
        x = x.view(-1,n,shape[1],shape[2],shape[3]).permute(0,2,1,3,4).contiguous()\
          .view(-1,shape[1],shape[2]*n,shape[3])
        
        # Go through classifier
        x = self.head(x)
        return x

### Regression to Classification conversion using the OptimizedRounder

In [None]:
# inspired by https://www.kaggle.com/tanlikesmath/intro-aptos-diabetic-retinopathy-eda-starter
class KappaOptimizer(nn.Module):
    def __init__(self):
        super().__init__()
        self.coef = [0.5, 1.5, 2.5, 3.5, 4.5]
        # define score function:
        self.func = self.quad_kappa
    
    
    def predict(self, preds):
        return self._predict(self.coef, preds)

    
    @classmethod
    def _predict(cls, coef, preds):
        if type(preds).__name__ == 'Tensor':
            y_hat = preds.clone().view(-1)
        else:
            y_hat = torch.FloatTensor(preds).view(-1)

        for i,pred in enumerate(y_hat):
            if   pred < coef[0]: y_hat[i] = 0
            elif pred < coef[1]: y_hat[i] = 1
            elif pred < coef[2]: y_hat[i] = 2
            elif pred < coef[3]: y_hat[i] = 3
            elif pred < coef[4]: y_hat[i] = 4
            else:                y_hat[i] = 5
        return y_hat.int()
    
    
    def quad_kappa(self, preds, y):
        return self._quad_kappa(self.coef, preds, y)

    
    @classmethod
    def _quad_kappa(cls, coef, preds, y):
        y_hat = cls._predict(coef, preds)
        
        try:
            return cohen_kappa_score(y, y_hat, weights='quadratic')
        except:
            return cohen_kappa_score(y.cpu(), y_hat.cpu(), weights='quadratic')

    
    def fit(self, preds, y):
        ''' maximize quad_kappa '''
        neg_kappa = lambda coef: -self._quad_kappa(coef, preds, y)
        opt_res = sp.optimize.minimize(neg_kappa, x0=self.coef, method='nelder-mead',
                                       options={'maxiter':150, 'fatol':1e-20, 'xatol':1e-20})
        self.coef = opt_res.x

        
    def forward(self, preds, y):
        ''' the pytorch loss function '''
        return torch.tensor(self.quad_kappa(preds, y))

kappa_opt = KappaOptimizer()

# Training

In [None]:
# Prepare databunch
fold =  0
train_idx = df[df['split'] != fold].index
val_idx = df[df['split'] == fold].index

train_dataset = TrainDataset(df.loc[train_idx].reset_index(drop=True), 
                             df.loc[train_idx].reset_index(drop=True)["isup_grade"], 
                             transform=get_transforms(data='train'))
valid_dataset = TrainDataset(df.loc[val_idx].reset_index(drop=True), 
                             df.loc[val_idx].reset_index(drop=True)["isup_grade"], 
                             transform=get_transforms(data='valid'))

train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=8)
valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=8)


data = DataBunch(train_dl = train_loader, valid_dl = valid_loader)

In [None]:
fname = 'EFFNETB0_REGRESSION'
model = Model()

learn = Learner(data, 
                model, 
                loss_func=torch.nn.SmoothL1Loss(),
                opt_func=RAdam, 
                metrics=[kappa_opt])

logger = CSVLogger(learn, f'log_{fname}_{fold}')

learn.split([model.backbone, model.head])
learn.freeze_to(0)

In [None]:
# Fit for n_epochs cycles
learn.fit_one_cycle(n_epochs, 
                    (1e-4, 1e-3),
                    callbacks = [SaveModelCallback(learn,
                                          name=f'model',
                                          mode='min',
                                          monitor='valid_loss')])

# Save model
torch.save(learn.model.state_dict(), f'{fname}_{fold}.pth')

## Evaluation

In [None]:
train_pred,train_target, pred, target = [],[],[],[]
learn.model.eval()
with torch.no_grad():
    """for step, (x, y) in progress_bar(enumerate(data.dl(DatasetType.Train)),total=len(data.dl(DatasetType.Train))):
        p = learn.model(x)
        p = p.float().cpu()
        train_pred.append(p)
        train_target.append(y.cpu())"""
        
    for step, (x, y) in progress_bar(enumerate(data.dl(DatasetType.Valid)),total=len(data.dl(DatasetType.Valid))):
        p = learn.model(x)
        p = p.float().cpu()
        pred.append(p)
        target.append(y.cpu())

In [None]:
p = torch.cat(pred)
t = torch.cat(target)
p = kappa_opt.predict(p)
print(cohen_kappa_score(t,p,weights='quadratic'), "\n")
print(confusion_matrix(t,p), "\n")
print(kappa_opt.coef)

In [None]:
p_train = torch.cat(train_pred)
t_train = torch.cat(train_target)
kappa_opt.fit(p_train,t_train)

p = kappa_opt.predict(p)
print(cohen_kappa_score(t,p,weights='quadratic'), "\n")
print(confusion_matrix(t,p), "\n")
print(kappa_opt.coef)

In [None]:
!rm -r 'cache'