## EfficientNet + Quantile Regression Model in Pytorch

This notebook generates predictions using Images and tabular data. 

### Acknowledgements 

* efficientnets-quantile-regression-inference: https://www.kaggle.com/leoisleo1/efficientnets-quantile-regression-inference

* Training EfficientNet with pytorch: https://www.kaggle.com/noelmat/training-efficientnet-with-pytorch

* melanoma-pytorch-starter-efficientnet: https://www.kaggle.com/nroman/melanoma-pytorch-starter-efficientnet

## Imports

In [None]:
import pandas as pd
import numpy as np
import torch
from torchvision import models
from pathlib import Path
Path.ls = lambda x: list(x.iterdir())

import cv2 
import pydicom
from tqdm import tqdm
from matplotlib import pyplot as plt
from torchvision import transforms

from torch import nn
# from efficientnet_pytorch import EfficientNet
# from efficientnet_pytorch.utils import MemoryEfficientSwish
import warnings

import random
from torch.optim import Adam
from torch.optim.lr_scheduler import OneCycleLR, ReduceLROnPlateau
import pydicom
from pathlib import Path
Path.ls = lambda x: list(x.iterdir())
import sys

from sklearn.model_selection import GroupKFold
from torch.utils.data import DataLoader, Subset
from torch.optim.lr_scheduler import StepLR
from datetime import datetime, timedelta
from time import time
import torch.nn.functional as F
import copy
import matplotlib.pyplot as plt

In [None]:
package_path = '../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master'
sys.path.append(package_path)

In [None]:
#!pip install resnet_pytorch

In [None]:
from efficientnet_pytorch import EfficientNet


## Config

In [None]:
warnings.simplefilter('ignore')
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark =True
    
seed_everything(42)

In [None]:
class Config:
    def __init__(self):
        self.FOLDS = 2
        self.EPOCHS = 1
        self.DEVICE = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
        self.TRAIN_BS = 32
        self.VALID_BS = 128
        self.model_type = 'efficientnet-b3'
        self.loss_fn = nn.L1Loss()
        
config = Config()

In [None]:
path = Path('/kaggle/input/osic-pulmonary-fibrosis-progression/')
path.ls()

### Load dataset

In [None]:
train_df = pd.read_csv(path/'train.csv')
train_df.head()

In [None]:
train_df = train_df.drop(np.nonzero(np.array(train_df['Patient'] == 'ID00011637202177653955184',dtype=float))[0], axis=0).reset_index(drop=True)
train_df = train_df.drop(np.nonzero(np.array(train_df['Patient'] == 'ID00052637202186188008618',dtype=float))[0], axis=0).reset_index(drop=True)

## Preprocessing

In [None]:
def get_tab(df):
    vector = [(df.Weeks.values[0] - 30 )/30]
    
    if df.Sex.values[0] == 'Male':
       vector.append(0)
    else:
       vector.append(1)
    
    if df.SmokingStatus.values[0] == 'Never smoked':
        vector.extend([0,0])
    elif df.SmokingStatus.values[0] == 'Ex-smoker':
        vector.extend([1,1])
    elif df.SmokingStatus.values[0] == 'Currently smokes':
        vector.extend([0,1])
    else:
        vector.extend([1,0])
    return np.array(vector) 

In [None]:
TAB = {}
TARGET = {}
Person = []

for i, p in tqdm(enumerate(train_df.Patient.unique())):
    sub = train_df.loc[train_df.Patient == p]
    fvc = sub.FVC.values
    weeks = sub.Weeks.values
    c = np.vstack([weeks, np.ones(len(weeks))]).T
    a, b = np.linalg.lstsq(c, fvc)[0]
    
    TARGET[p] = a
    TAB[p] = get_tab(sub)
    Person.append(p)

Person = np.array(Person)

### Read dicom image

In [None]:
def get_img(path):
    d = pydicom.dcmread(path)
    return cv2.resize(d.pixel_array / 2**11, (512, 512))

### Dataset class

In [None]:
class Dataset:
    def __init__(self, path, df, tabular, targets, mode , folder = 'train' ):
        self.df = df
        self.tabular = tabular
        self.targets = targets
        self.folder = folder
        self.mode = mode
        self.path = path
        self.transform = transforms.Compose([
            transforms.ToTensor()
        ])
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        row = self.df.loc[idx,:]
        pid = row['Patient']
        # Path to record
        record = self.path/self.folder/pid
        # select image id
        try: 
            
            img_id =  np.random.choice(len(record.ls()))
            
            img = get_img(record.ls()[img_id])
            img = self.transform(img)
            tab = torch.from_numpy(self.tabular[pid]).float()
            if self.mode == 'train':
                target = torch.tensor(self.targets[pid])
                return (img,tab), target
            else:
                return (img,tab)
        except Exception as e:
            print(e)
            print(pid, img_id)

In [None]:
class Custom(Dataset):
    def __init__(self, path, df, tabular, targets, mode , folder = 'train' ):
        self.df = df
        self.tabular = tabular
        self.targets = targets
        self.folder = folder
        self.mode = mode
        self.path = path
        self.transform = transforms.Compose([
            transforms.ToTensor()
        ])
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        row = self.df.loc[idx,:]
        pid = row['Patient']
        # Path to record
        record = self.path/self.folder/pid/"1.dcm"
        # select image id
        try: 
            

            
            img = get_img(record)
            img = self.transform(img)
            tab = torch.from_numpy(self.tabular[pid]).float()
            if self.mode == 'train':
                target = torch.tensor(self.targets[pid])
                return (img,tab), target
            else:
                return (img,tab)
        except Exception as e:
            print(e)
            print(pid, img_id)

In [None]:
def collate_fn(b):
    xs, ys = zip(*b)
    imgs, tabs = zip(*xs)
    return (torch.stack(imgs).float(),torch.stack(tabs).float()),torch.stack(ys).float()

### Model Architecture

In [None]:
pretrained_model = {
    'efficientnet-b0': '../input/efficientnet-pytorch/efficientnet-b0-08094119.pth',
    'efficientnet-b3': '../input/efficientnet-pytorch/efficientnet-b3-c8376fa2.pth'
}

In [None]:
class OSIC_Model(nn.Module):
    def __init__(self,eff_name='efficienet-b0'):
        super().__init__()
        self.input = nn.Conv2d(1,3,kernel_size=3,padding=1,stride=2)
        self.bn = nn.BatchNorm2d(3)
        #self.model = EfficientNet.from_pretrained(f'efficientnet-{eff_name}-c8376fa2.pth')
        self.model = EfficientNet.from_name(eff_name)
        self.model.load_state_dict(torch.load(pretrained_model[eff_name]))
        self.model._fc = nn.Linear(1536, 500, bias=True)
        self.meta = nn.Sequential(nn.Linear(4, 500),
                                  nn.BatchNorm1d(500),
                                  nn.ReLU(),
                                  nn.Dropout(p=0.2),
                                  nn.Linear(500,250),
                                  nn.BatchNorm1d(250),
                                  nn.ReLU(),
                                  nn.Dropout(p=0.2))
        self.output = nn.Linear(500+250, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x,tab):
        x = self.relu(self.bn(self.input(x)))
        x = self.model(x)
        tab = self.meta(tab)
        x = torch.cat([x, tab],dim=1)
        return self.output(x)

### Kfold splits

In [None]:
from sklearn.model_selection import KFold

def get_split_idxs(n_folds=5):
    kv = KFold(n_splits=n_folds)
    splits = []
    for i,(train_idx, valid_idx) in enumerate(kv.split(Person)):
        splits.append((train_idx, valid_idx))
        
    return splits

In [None]:
splits = get_split_idxs(n_folds=config.FOLDS)

In [None]:
def train_loop(model, dl, opt, sched, device, loss_fn):
    model.train()
    for X,y in dl:
        imgs = X[0].to(device)
        tabs = X[1].to(device)
        y = y.to(device)
        outputs = model(imgs, tabs)
        loss = loss_fn(outputs.squeeze(), y)
        opt.zero_grad()
        loss.backward()
        opt.step()
        if sched is not None:
            sched.step()
            

def eval_loop(model, dl, device, loss_fn):
    model.eval()
    final_outputs = []
    final_loss = []
    with torch.no_grad():
        for X,y in dl:
            imgs = X[0].to(device)
            tabs = X[1].to(device)
            y=y.to(device)

            outputs = model(imgs, tabs)
            loss = loss_fn(outputs.squeeze(), y)

            final_outputs.extend(outputs.detach().cpu().numpy().tolist())
            final_loss.append(loss.detach().cpu().numpy())
        
    return final_outputs, final_loss

In [None]:
from functools import partial

def apply_mod(m,f):
    f(m)
    for l in m.children(): apply_mod(l,f)

def set_grad(m,b):
    if isinstance(m, (nn.Linear, nn.BatchNorm2d)): return 
    if hasattr(m, 'weight'):
        for p in m.parameters(): p.requires_grad_(b)



In [None]:
models = {}
for i in range(config.FOLDS):
    models[i] = OSIC_Model(config.model_type)

In [None]:
for k,v in models.items():
    apply_mod(v.model, partial(set_grad, b=False))

### View some training Images

In [None]:
train = train_df.loc[train_df['Patient'].isin(Person[:21])].reset_index(drop=True)
train_ds = Dataset(path, train, TAB, TARGET, mode='train')
train_dl = torch.utils.data.DataLoader(
    dataset=train_ds,
    batch_size=config.TRAIN_BS,
    shuffle=True,
    collate_fn=collate_fn        
)

In [None]:
fig=plt.figure(figsize=(8, 8))
columns = 4
rows = 4
i=1
for X,y in train_dl:
    pass
j=0
for i in range(1, columns*rows +1):
    img = np.array(X[0][j].permute(1,2,0))
    img = cv2.cvtColor(img ,cv2.COLOR_GRAY2RGB)
    fig.add_subplot(rows, columns, i)
    plt.imshow(img)
    j += 1
plt.show()

## Training

In [None]:
history = []

In [None]:
for i, (train_idx, valid_idx) in enumerate(splits):
    print(f"===================Fold : {i} ================")

    train = train_df.loc[train_df['Patient'].isin(Person[train_idx])].reset_index(drop=True)
    valid = train_df.loc[train_df['Patient'].isin(Person[valid_idx])].reset_index(drop=True)


    train_ds = Dataset(path, train, TAB, TARGET, mode= 'train')
    train_dl = torch.utils.data.DataLoader(
        dataset=train_ds,
        batch_size=config.TRAIN_BS,
        shuffle=True,
        collate_fn=collate_fn        
    )

    valid_ds = Dataset(path, valid, TAB, TARGET, mode='train')
    valid_dl = torch.utils.data.DataLoader(
        dataset=valid_ds,
        batch_size=config.VALID_BS,
        shuffle=False,
        collate_fn=collate_fn
    )

    model = models[i]
    model.to(config.DEVICE)
    lr=1e-3
    momentum = 0.9
    
    num_steps = len(train_dl)
    optimizer = Adam(model.parameters(), lr=lr,weight_decay=0.1)
    scheduler = OneCycleLR(optimizer, 
                           max_lr=lr,
                           epochs=config.EPOCHS,
                           steps_per_epoch=num_steps
                           )
    sched = ReduceLROnPlateau(optimizer,
                              verbose=True,
                              factor=0.1)
    losses = []
    for epoch in range(config.EPOCHS):
        print(f"=================EPOCHS {epoch+1}================")
        train_loop(model, train_dl, optimizer, scheduler, config.DEVICE,config.loss_fn)
        metrics = eval_loop(model, valid_dl,config.DEVICE,config.loss_fn)
        total_loss = np.array(metrics[1]).mean()
        losses.append(total_loss)
        print("Loss ::\t", total_loss)
        sched.step(total_loss)
        
    model.to('cpu')
    history.append(losses)
    
    
        

In [None]:
fold1=history[0]
fold2=history[1]
plt.plot(np.linspace(0,5,5),fold1,label='Fold1')
plt.plot(np.linspace(0,5,5),fold2,label='Fold2')
plt.xlabel('Epochs - EfficientNet')
plt.ylabel('Mean L1 Loss for gradient of the curve ')
plt.legend()

In [None]:
for k, m in models.items():
    torch.save(m.state_dict(), f'fold_{k}.pth')

### Prediction & Submission

In [None]:
test_df = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/test.csv')
sub = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/sample_submission.csv')

In [None]:
test_df=test_df.loc[:1,:]

In [None]:
print(test_df)


In [None]:
test_data= []
for i in range(1):
    for j in range(-12, 134):
        test_data.append([test_df['Patient'][i],j,test_df['Age'][i],test_df['Sex'][i],test_df['SmokingStatus'][i],test_df['FVC'][i],test_df['Percent'][i],str(test_df.iloc[0])+'_'+str(j)])

test_data = pd.DataFrame(test_data, columns=['Patient','Weeks','Age','Sex','SmokingStatus','FVC','Percent','Patient_Week'])

In [None]:
test_data.head(150)

In [None]:
TAB_test = {}

Person_test = []

for i, p in tqdm(enumerate(test_data.Patient.unique())):
    sub = test_data.loc[test_data.Patient == p]

    #weeks = sub.Weeks.values
    #c = np.vstack([weeks, np.ones(len(weeks))]).T

    TAB_test[p] = get_tab(sub)
    Person_test.append(p)

Person_test = np.array(Person_test)

In [None]:
#TAB_proj = {}

#Person_proj = []

#for i, p in tqdm(enumerate(proj_data.Patient.unique())):
    #sub = proj_data.loc[proj_data.Patient == p]

    #weeks = sub.Weeks.values
    #c = np.vstack([weeks, np.ones(len(weeks))]).T

    #TAB_proj[p] = get_tab(sub)
    #Person_proj.append(p)

#Person_proj = np.array(Person_proj)

In [None]:
def collate_fn_test(b):
    imgs, tabs = zip(*b)
    return (torch.stack(imgs).float(),torch.stack(tabs).float())

In [None]:
TARGET = {}
test = test_data
test_ds = Custom(path, test_data, TAB_test,TARGET, mode= 'test')
test_dl = torch.utils.data.DataLoader(
    dataset=test_ds,
    batch_size=128,
    shuffle=True,
    collate_fn=collate_fn_test        
)

In [None]:
print(len(test_data))

In [None]:
avg_predictions= np.zeros((146,1))

for i in range(len(models)):
    
    predictions = []
    model = models[i]
    model = model.to(config.DEVICE)
    model.load_state_dict(torch.load('./fold_' +str(i)+'.pth'))
    model.eval()
    with torch.no_grad():
        for X in test_dl:
            imgs = X[0].to(config.DEVICE)
            tabs = X[1].to(config.DEVICE)

            pred = model(imgs, tabs)

            predictions.extend(pred.detach().cpu().numpy().tolist())
    avg_predictions += predictions

In [None]:
predictions = avg_predictions / len(models)

In [None]:
fvc = []
conf = []
percent=[]
for i in range(len(test_data)):
    p =test_data['Patient'][i]
    good_fvc=(test_df.loc[test_df.Patient==p]['FVC']*100/(test_df.loc[test_df.Patient==p]['Percent']))
    B_test = predictions[i][0] * test_df.Weeks.values[test_df.Patient == p][0]
    cur_fvc=predictions[i][0] * test_data['Weeks'][i] + test_data['FVC'][i] - B_test
    fvc.append(predictions[i][0] * test_data['Weeks'][i] + test_data['FVC'][i] - B_test)
    conf.append(test_data['Percent'][i] + abs(predictions[i][0]) * abs(test_df.Weeks.values[test_df.Patient == p][0] - test_data['Weeks'][i]))
    percent.append((cur_fvc*100)/good_fvc)

In [None]:

submission = test_data[['Patient_Week']]


In [None]:
sub = pd.read_csv('../input/osic-pulmonary-fibrosis-progression/sample_submission.csv')
sub.head()

In [None]:
subm ={}
ids=[]
week=[]
fvc_1=[]
percent_1=[]
our_result=pd.DataFrame()
for i in range(len(submission)):
    subm[submission['Patient_Week'][i]]=[float(fvc[i]),float(percent[i])]
    id=submission['Patient_Week'][i].split("_")[0]
    week.append(submission['Patient_Week'][i].split("_")[1])
    ids.append(id)
    fvc_1.append(float(fvc[i]))
    percent_1.append(float(percent[i]))

In [None]:
our_result=pd.DataFrame()
our_result['ID']=ids
our_result['FVC']=fvc_1
our_result['percent']=percent_1
our_result['week']=week

In [None]:
our_result.to_csv('our_result.csv', index=False)

In [None]:
sub['FVC'] = sub['FVC'].astype(float)
for i in range(len(sub)):
    id = sub['Patient_Week'][i]
    print(id)
    sub['FVC'][i]= float(subm[id][0])

In [None]:
sub.head()

In [None]:
sub.to_csv('result.csv', index=False)

# Regression Model

## Data Inference

## NeuralNet model

## Training model

## Generating submission

## Ensemble (Simple Blend)