In [None]:
!pip install timm

In [None]:
!git clone https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer.git

In [None]:
import sys
sys.path.append('./Ranger-Deep-Learning-Optimizer')

In [None]:
import math
import os
import random
import numpy as np
import pandas as pd
import cv2
from collections import defaultdict
from tqdm import tqdm

import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch
from torch import nn
from torch.utils import data
from torch.utils.data import Dataset,DataLoader
import torch.nn.functional as F
from torch import Tensor
from torch.optim.optimizer import Optimizer, required

from transformers import AdamW
from transformers import get_linear_schedule_with_warmup,get_cosine_schedule_with_warmup
from transformers import get_cosine_with_hard_restarts_schedule_with_warmup,get_constant_schedule_with_warmup

import timm
from ranger.ranger2020 import Ranger

# Config

In [None]:
SEED = 1000
device= torch.device('cuda')
SIZE = 384

ROOT_DIR = '../input/seti-breakthrough-listen'

#### DATALOADER #####
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 32
NUM_WORKERS=4

### GENERAL ######
EPOCHS = 20
LR = 5.0e-06
MAX_LR = 1e-3

##### 
SCHEDULER = 'linear'

##### Model Params ######
model_params = dict(
    backbone='tf_efficientnet_b3_ns',
    in_channels=1,
    out_dim=1,
    pretrained=True
)

# Utils

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [None]:
class AverageMeter(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
class RocAucMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.y_true = np.array([0,1])
        self.y_pred = np.array([0.5,0.5])
        self.score = 0

    def update(self, y_true, y_pred):
        y_true = y_true.detach().cpu().numpy().astype(int)
        y_pred = y_pred.sigmoid().detach().cpu().numpy()
     
        self.y_true = np.hstack((self.y_true, y_true))
        self.y_pred = np.hstack((self.y_pred, y_pred))
        self.score = roc_auc_score(self.y_true, self.y_pred)
    
    @property
    def avg(self):
        return self.score

# Augmentations

In [None]:
def get_train_transform(size=SIZE):
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=90, p=0.5),
        A.RandomBrightnessContrast(p=0.25),
        A.Cutout(p=0.3),
        A.Resize(size,size,always_apply=True),
        ToTensorV2()
    ])
def get_valid_transform(size=SIZE):
    return A.Compose([
        A.Resize(size,size,always_apply=True),
        ToTensorV2()
    ])

# Dataset

In [None]:
class SetiDataset(torch.utils.data.Dataset):
    def __init__(self,df,transform,read_type='all',selected_dims=None):
        self.df = df
        self.transform = transform
        self.read_type = read_type
        self.selected_dims = selected_dims

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        row = self.df.iloc[index]
        
        id_ = row.id
        path = f"{ROOT_DIR}/train/{id_[0]}/{id_}.npy"
        label = row.target
        
        if self.read_type == 'selected':
            img = self.read_selected_cadence(path,self.selected_dims)
        else:
            img = self.read_all_cadence(path)
            
        img = self.transform(image=img)["image"]
        
        return img,torch.tensor(label,dtype=torch.float)

    def read_all_cadence(self, path):
        """Read cadence file and reshape"""
        img = np.load(path)  # shape: (6, 273, 256)
        img = np.vstack(img)  # shape: (1638, 256)
        img = img.transpose(1, 0)  # shape: (256, 1638)
        img = img.astype("f")[..., np.newaxis]  # shape: (256, 1638, 1)
        return img
    
    def read_selected_cadence(self, path ,selected_dims=[0,2,4]):
        """Read cadence file and reshape"""
        img = np.load(path)[selected_dims]  # shape: (3, 273, 256)
        img = np.vstack(img)  # shape: (819, 256)
        img = img.transpose(1, 0)  # shape: (256, 819)
        img = img.astype("f")[..., np.newaxis]  # shape: (256, 819, 1)
        return img

In [None]:
def dataset_checker(dataset):
    img,label = dataset[9]
    print(img.shape)
    print(label)
    img = img.permute(1,2,0).detach().numpy()
    fig, ax = plt.subplots(figsize=(16, 8),  nrows=1, ncols=2)
    ax[0].imshow(img)

In [None]:
# df = pd.read_csv('../input/seti-breakthrough-listen/train_labels.csv')
# d = SetiDataset(df,transform=get_train_transform(),read_type='selected',selected_dims=[0,2,4])
# dataset_checker(d)

# del d,df

In [None]:
def get_loader(fold):
    df = pd.read_csv('../input/seti-splits-old-new/5folds_split_new.csv')
    
    train = df[df['fold']!=fold]
    valid = df[df['fold']==fold]
    
    train_dataset = SetiDataset(
        train,
        transform=get_train_transform(),
        read_type='selected',
        selected_dims=[0,2,4]
    )
    
    valid_dataset = SetiDataset(
        valid,
        transform=get_valid_transform(),
        read_type='selected',
        selected_dims=[0,2,4]
    )
    
    train_loader = DataLoader(
        train_dataset,
        batch_size  = TRAIN_BATCH_SIZE,
        drop_last   = False,
        num_workers = NUM_WORKERS,
        pin_memory  = True,
    )
    
    valid_loader = DataLoader(
        valid_dataset,
        batch_size  = VALID_BATCH_SIZE,
        drop_last   = False,
        num_workers = NUM_WORKERS,
        pin_memory  = True,
    )
    
    return train_loader,valid_loader

# Model

In [None]:
class enetv2(nn.Module):
    def __init__(self, backbone,in_channels,out_dim,pretrained=True):
        super(enetv2, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 6, 3, stride=1, padding=1, bias=False)
        self.conv2 = nn.Conv2d(6, 12, 3, stride=1, padding=1, bias=False)
        self.conv3 = nn.Conv2d(12, 36, 3, stride=1, padding=1, bias=False)
        self.mybn1 = nn.BatchNorm2d(6)
        self.mybn2 = nn.BatchNorm2d(12)
        self.mybn3 = nn.BatchNorm2d(36)

        self.enet = timm.create_model(backbone, pretrained=pretrained,in_chans=in_channels)
        self.enet.conv_stem.weight = nn.Parameter(self.enet.conv_stem.weight.repeat(1, 36, 1, 1))

        self.dropout = nn.Dropout(0.5)
        self.enet.blocks[5] = nn.Identity()
        self.enet.blocks[6] = nn.Sequential(
            nn.Conv2d(self.enet.blocks[4][2].conv_pwl.out_channels, self.enet.conv_head.in_channels, 1),
            nn.BatchNorm2d(self.enet.conv_head.in_channels),
            nn.ReLU6(),
        )
        self.myfc = nn.Linear(self.enet.classifier.in_features, out_dim)
        self.enet.classifier = nn.Identity()

    def extract(self, x):
        x = F.relu6(self.mybn1(self.conv1(x)))
        x = F.relu6(self.mybn2(self.conv2(x)))
        x = F.relu6(self.mybn3(self.conv3(x)))
        x = self.enet(x)
        return x

    def forward(self, x):
        x = self.extract(x)
        x = self.myfc(self.dropout(x))
        return x

# Training Function

In [None]:
def train_fn(dataloader,model,criterion,optimizer,device,scheduler,epoch):
    model.train()
    loss_score = AverageMeter()
    auc_score = RocAucMeter()
    
    tk0 = tqdm(enumerate(dataloader), total=len(dataloader))
    for bi,d in tk0:
        
        batch_size = d[0].shape[0]

        images = d[0]
        targets = d[1]

        images = images.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        output = model(images)
        
        loss = criterion(output,targets.view(-1,1))
        
        loss.backward()
        optimizer.step()
        
        loss_score.update(loss.detach().item(), batch_size)
        auc_score.update(targets,output.squeeze(-1))
        tk0.set_postfix(Train_Loss=loss_score.avg,Train_AUC=auc_score.avg,Epoch=epoch,LR=optimizer.param_groups[0]['lr'])
        
        if scheduler is not None:
                scheduler.step()
        
    return loss_score

In [None]:
def evaluate(dataloader,model,criterion,device,epoch):
        model.eval()
        loss_score = AverageMeter()
        auc_score = RocAucMeter()
        
        tk0 = tqdm(enumerate(dataloader), total=len(dataloader))
        with torch.no_grad():
            for bi,d in tk0:

                batch_size = d[0].shape[0]
 
                image = d[0]
                labels = d[1]

                image = image.to(device)
                labels = labels.to(device)

                out = model(image)
                loss = criterion(out,labels.view(-1,1))
                
                loss_score.update(loss.detach().item(), batch_size)
                auc_score.update(labels,out.squeeze(-1))
                
                tk0.set_postfix(Valid_Loss=loss_score.avg,Valid_AUC=auc_score.avg,Epoch=epoch)
        
        return loss_score.avg,auc_score.avg

# Engine

In [None]:
def run(fold):
    seed_everything(SEED)
    
    train_loader,valid_loader = get_loader(fold)
    
    seed_everything(SEED)
    
    # Defining Model for specific fold
    model = enetv2(**model_params)
    model.to(device)
    
    #DEfining criterion
    criterion = nn.BCEWithLogitsLoss()
    criterion.to(device)
    
    #optimizer = torch.optim.Adam(model.parameters(), lr=scheduler_params['lr_start'])
    optimizer = Ranger(model.parameters(), lr= LR,weight_decay=1.0e-02)
    
    #Defining LR SCheduler
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        steps_per_epoch = len(train_loader),
        epochs=EPOCHS,
        max_lr=MAX_LR,
        pct_start= 0.1,
        anneal_strategy = 'cos',
        div_factor = 1.0e+3,
        final_div_factor= 1.0e+3
    )
    
    # THE ENGINE LOOP
    best_auc = 0 
    for epoch in range(EPOCHS):
        train_loss = train_fn(train_loader, model,criterion, optimizer, device,scheduler=scheduler,epoch=epoch)
        valid_loss,valid_auc = evaluate(valid_loader, model, criterion,device,epoch=epoch)
        
        if valid_auc > best_auc:
            best_auc = valid_auc
            torch.save(model.state_dict(),f"model_{model_params['backbone']}_IMG_SIZE_{SIZE}.bin")
            print('best model found for epoch {}'.format(epoch))

In [None]:
run(fold=0)