In [None]:
# Library
# ====================================================
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')


import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tqdm.notebook import tqdm

import albumentations
from sklearn import model_selection


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset

import timm
import sklearn 

import gc

In [None]:
#https://www.kaggle.com/abhishek/step-1-create-folds

def create_folds(data, num_splits):
    # we create a new column called kfold and fill it with -1
    data["kfold"] = -1
    
    # the next step is to randomize the rows of the data
    data = data.sample(frac=1).reset_index(drop=True)

    # calculate number of bins by Sturge's rule
    # I take the floor of the value, you can also
    # just round it
    num_bins = int(np.floor(1 + np.log2(len(data))))
    
    # bin targets
    data.loc[:, "bins"] = pd.cut(
        data["target"], bins=num_bins, labels=False
    )
    
    # initiate the kfold class from model_selection module
    kf = model_selection.StratifiedKFold(n_splits=num_splits)
    
    # fill the new kfold column
    # note that, instead of targets, we use bins!
    for f, (t_, v_) in enumerate(kf.split(X=data, y=data.bins.values)):
        data.loc[v_, 'kfold'] = f
    
    # drop the bins column
    data = data.drop("bins", axis=1)

    # return dataframe with folds
    return data


df_train             = pd.read_csv('../input/seti-breakthrough-listen/train_labels.csv')
df_train['img_path'] = df_train['id'].apply(lambda x: f'../input/seti-breakthrough-listen/train/{x[0]}/{x}.npy')
df_train_fold              = create_folds(df_train,num_splits=5)


del df_train

print(df_train_fold.groupby(['kfold','target']).size())
df_train_fold


In [None]:
df_test             = pd.read_csv('../input/seti-breakthrough-listen/sample_submission.csv')
df_test['img_path'] = df_test['id'].apply(lambda x: f'../input/seti-breakthrough-listen/test/{x[0]}/{x}.npy')


df_test

In [None]:
# ====================================================
# CFG
# ====================================================
class CFG:
    input_size  = 224
    output_size = 256

    batch_size  = 75
    num_workers  =8
    #model_name  ='resnet50' 
    model_name  ='nfnet_l0'
    num_epochs          = 15
    stop_num_epochs     = 10 
    seed=42
    
    
    warmup_epochs= 3
    warmup_lr    = 0
    base_lr      = 0.0085
    final_lr     = 0.5e-7
    
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model_names = timm.list_models(pretrained=True)
# model_names

# print("Available Vision Transformer Models: ")
# timm.list_models("vit*")

In [None]:
import torch

    
class LR_Scheduler(object):
    def __init__(self, optimizer, warmup_epochs, warmup_lr, num_epochs, base_lr, final_lr, iter_per_epoch, constant_predictor_lr=False):
        self.base_lr = base_lr
        self.constant_predictor_lr = constant_predictor_lr
        warmup_iter = iter_per_epoch * warmup_epochs
        warmup_lr_schedule = np.linspace(warmup_lr, base_lr, warmup_iter)
        decay_iter = iter_per_epoch * (num_epochs - warmup_epochs)
        cosine_lr_schedule = final_lr+0.5*(base_lr-final_lr)*(1+np.cos(np.pi*np.arange(decay_iter)/decay_iter))
        
        self.lr_schedule = np.concatenate((warmup_lr_schedule, cosine_lr_schedule))
        self.optimizer = optimizer
        self.iter = 0
        self.current_lr = 0
    def step(self):
        for param_group in self.optimizer.param_groups:

            if self.constant_predictor_lr and param_group['name'] == 'predictor':
                param_group['lr'] = self.base_lr
            else:
                lr = param_group['lr'] = self.lr_schedule[self.iter]
        
        self.iter += 1
        self.current_lr = lr
        return lr
    def get_lr(self):
        return self.current_lr

        

class SETIDataset:
    
    def __init__(self, image_paths, targets,  augmentations=None): 
        self.image_paths = image_paths
        self.targets = targets
        self.augmentations = augmentations

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):      
        image = np.load(self.image_paths[item]).astype(float)
        targets = self.targets[item]
      
        
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "label": torch.tensor(targets, dtype=torch.float),
        }
    

# Display sample Dataset

In [None]:
fold          = 0
fold_Batch    = CFG.batch_size

df_train                  = df_train_fold[df_train_fold.kfold != fold].reset_index(drop=True)
Dict_Target_counts        = df_train['target'].value_counts().to_dict()
df_train['weight_sample'] = df_train['target'].apply(lambda x : 1/Dict_Target_counts[x] )


df_valid                  = df_train_fold[df_train_fold.kfold == fold].reset_index(drop=True)



train_dataset = SETIDataset(image_paths=df_train['img_path'], targets=df_train['target'])    
train_sampler =  torch.utils.data.WeightedRandomSampler(df_train['weight_sample'].values, len(train_dataset), replacement=True)
train_loader  = DataLoader(train_dataset, batch_size=CFG.batch_size,sampler=train_sampler,num_workers = 0, pin_memory=True)



test_dataset = SETIDataset(image_paths=df_valid ['img_path'], targets=df_valid['target'])    
test_loader  = DataLoader(test_dataset, batch_size=CFG.batch_size,num_workers = CFG.num_workers, shuffle = False ,pin_memory=True)


In [None]:

def Plot_scheduler(Datalength = 100)  : 
    model = nn.Sequential(
              nn.Linear(10,10)
            )

    optimizer           = torch.optim.Adam(model.parameters(), lr=0.00001)
    scheduler           = LR_Scheduler(
            optimizer,
            CFG.warmup_epochs, 
            CFG.warmup_lr, 
            CFG.num_epochs, 
            CFG.base_lr, 
            CFG.final_lr, 
            Datalength
        )

    lrs = []
    plt.figure(figsize=(8,5))
    
    for epoch in range(CFG.num_epochs):
        for it in range(Datalength):
            lr = scheduler.step()
        lrs.append(lr)
    plt.plot(lrs)
    plt.title('Scheduler lr')
    plt.xlabel('Epoch')
    plt.show()
    
Plot_scheduler(Datalength = 100)

In [None]:

from matplotlib import pyplot as plt

for data in train_loader :
    Batch_image = data['image']
    Batch_label = data['label']
    
   
    
    for idx,(image, lable) in  enumerate(zip(Batch_image,Batch_label)) :
        if idx >= 8 :
            break
        plt.figure(figsize=(16, 10))
        for i in range(6):
            plt.subplot(1, 6, i + 1)
            plt.imshow(image[i])
            if i==0 :
              plt.title(f'lable = {lable}')
            plt.axis('off')
        plt.show() 
    break

# Model

In [None]:
from collections import OrderedDict


from torch.optim.optimizer import Optimizer 
    
class LARS_simclr(Optimizer):
    def __init__(self, 
                 named_modules, 
                 lr,
                 momentum=0.95, # beta? YES
                 trust_coef=1e-3,
                 weight_decay=1.5e-6,
                exclude_bias_from_adaption=True):
        '''byol: As in SimCLR and official implementation of LARS, we exclude bias # and batchnorm weight from the Lars adaptation and weightdecay'''
        defaults = dict(momentum=momentum,
                lr=lr,
                weight_decay=weight_decay,
                 trust_coef=trust_coef)
        parameters = self.exclude_from_model(named_modules, exclude_bias_from_adaption)
        super(LARS_simclr, self).__init__(parameters, defaults)

    @torch.no_grad() 
    def step(self):
        for group in self.param_groups: # only 1 group in most cases 
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            lr = group['lr']

            trust_coef = group['trust_coef']
            # print(group['name'])
            # eps = group['eps']
            for p in group['params']:
                # breakpoint()
                if p.grad is None:
                    continue
                global_lr = lr
                velocity = self.state[p].get('velocity', 0)  
                # if name in self.exclude_from_layer_adaptation:
                if self._use_weight_decay(group):
                    p.grad.data += weight_decay * p.data 

                trust_ratio = 1.0 
                if self._do_layer_adaptation(group):
                    w_norm = torch.norm(p.data, p=2)
                    g_norm = torch.norm(p.grad.data, p=2)
                    trust_ratio = trust_coef * w_norm / g_norm if w_norm > 0 and g_norm > 0 else 1.0 
                scaled_lr = global_lr * trust_ratio # trust_ratio is the local_lr 
                next_v = momentum * velocity + scaled_lr * p.grad.data 
                update = next_v
                p.data = p.data - update 


    def _use_weight_decay(self, group):
        return False if group['name'] == 'exclude' else True
    def _do_layer_adaptation(self, group):
        return False if group['name'] == 'exclude' else True

    def exclude_from_model(self, named_modules, exclude_bias_from_adaption=True):
        base = [] 
        exclude = []
        for name, module in named_modules:
            if type(module) in [nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d]:
                # if isinstance(module, torch.nn.modules.batchnorm._BatchNorm)
                for name2, param in module.named_parameters():
                    exclude.append(param)
            else:
                for name2, param in module.named_parameters():
                    if name2 == 'bias':
                        exclude.append(param)
                    elif name2 == 'weight':
                        base.append(param)
                    else:
                        pass # non leaf modules 
        return [{
            'name': 'base',
            'params': base
            },{
            'name': 'exclude',
            'params': exclude
        }] if exclude_bias_from_adaption == True else [{
            'name': 'base',
            'params': base+exclude 
        }]
    


    
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x    
    
  
    
class Net(nn.Module):
    def __init__(self, cfg, use_pretrain = False):
        super(Net, self).__init__() 
        self.cfg = cfg

        # defining our deep learning architecture
        self.model = timm.create_model(self.cfg.model_name, pretrained=use_pretrain)
        
#         for param in self.model.parameters():
#              param.requires_grad = False
         
        if self.cfg.model_name == 'nfnet_l0' :
           in_feature         =   self.model.head.fc.in_features
           self.model.head.fc =   Identity()
        else :
           in_feature    = self.model.fc.in_features
           self.model.fc =   Identity()
        
        
        self.head = nn.Sequential(OrderedDict([
                    ('fc1', nn.Linear(2*in_feature , 2048)),
                    ('bb1', nn.BatchNorm1d(2048)),
                    ('added_relu1', nn.ReLU(inplace=True)),
                     ('fc2', nn.Linear(2048, 1024)),
                    ('bb2', nn.BatchNorm1d(1024)),
                    ('added_relu2', nn.ReLU(inplace=True)),
                    ('fc3', nn.Linear(1024, 512)),
                    ('bb3', nn.BatchNorm1d(512)),
                    ('added_relu3', nn.ReLU(inplace=True)),
                    ('fc_final', nn.Linear(512, self.cfg.output_size )),
                ]))

       
       
        self.classify = nn.Sequential(OrderedDict([
            ('bb1', nn.BatchNorm1d(self.cfg.output_size)),
            ('fc1', nn.Linear(self.cfg.output_size , 1)),
        ]))

    def forward(self, x):
        

        X1,X2  = F.interpolate(x[:,0:3], (CFG.input_size, CFG.input_size)),F.interpolate(x[:,3:6], (CFG.input_size, CFG.input_size))
        x1,x2    = self.model(X1), self.model(X2)
        out      = torch.cat([x1,x2],-1)
        out      = self.head(out)
        out      = self.classify(out)
        return out
    
    


    
# model = Net1(CFG, use_pretrain=True).to(CFG.device)
# model.eval()
# input  = torch.randn((5,6,CFG.input_size,CFG.input_size)).to(CFG.device)
# output = model(input)
# output.shape

# model

In [None]:

def train(model ,fold = 0) :

    df_train                  = df_train_fold[df_train_fold.kfold != fold].reset_index(drop=True)
    Dict_Target_counts        = df_train['target'].value_counts().to_dict()
    df_train['weight_sample'] = df_train['target'].apply(lambda x : 1/Dict_Target_counts[x] )


    train_dataset = SETIDataset(image_paths=df_train['img_path'], targets=df_train['target'])    
    train_sampler =  torch.utils.data.WeightedRandomSampler(df_train['weight_sample'].values, len(train_dataset), replacement=True)
    train_loader  = DataLoader(train_dataset, batch_size=CFG.batch_size,sampler=train_sampler,num_workers = CFG.num_workers, pin_memory=True)

    criterion = nn.BCEWithLogitsLoss()
    model     = model.to(CFG.device)
    model.train()
    

    optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
    #optimizer = LARS_simclr(model.named_modules(), weight_decay= 0, lr=1e-4)


    lr_scheduler = LR_Scheduler(
                optimizer,
                CFG.warmup_epochs, 
                CFG.warmup_lr, 
                CFG.num_epochs, 
                CFG.base_lr, 
                CFG.final_lr, 
                len(train_loader),
            )



    local_progress = tqdm(train_loader, desc=f'Training Epoch = {epoch}/{ CFG.num_epochs }', disable= False)
    data_dict      = {}
    max_grad_norm  = 10

    for sample_batched in local_progress:
            Batch_image = sample_batched['image'].to(CFG.device, dtype=torch.float, non_blocking=True)
            Batch_label = sample_batched['label'].to(CFG.device, dtype=torch.float, non_blocking=True).view(-1, 1)
            optimizer.zero_grad()

            y_preds     = model(Batch_image)
            loss        = nn.BCEWithLogitsLoss()(y_preds, Batch_label)
            loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            lr_scheduler.step()

            
            y_preds = torch.sigmoid(y_preds).round().detach().cpu().numpy().tolist()
            targets  = Batch_label.detach().cpu().numpy().tolist() 
            
            acc     = sklearn.metrics.accuracy_score(targets,y_preds)
            roc_auc = sklearn.metrics.roc_auc_score(targets,y_preds)

            data_dict.update({'train loss':loss.item(), 'acc': acc, 'roc_auc':roc_auc ,'lr':lr_scheduler.get_lr()  })
            local_progress.set_postfix(data_dict)
    return model            


            
def eval(model,fold = 0) :
     df_valid                  = df_train_fold[df_train_fold.kfold == fold].reset_index(drop=True)
     test_dataset              = SETIDataset(image_paths=df_valid ['img_path'], targets=df_valid['target'])    
     test_loader               = DataLoader(test_dataset, batch_size=CFG.batch_size,num_workers = CFG.num_workers, shuffle = False ,pin_memory=True)
     model.eval()   

     local_progress = tqdm(test_loader, desc=f'Val Epoch = {epoch}/{ CFG.num_epochs }', disable= False)
     
     data_dict      = {}
     final_targets = []
     final_outputs = []
     final_losses  = []   

     with torch.no_grad():
        for sample_batched in local_progress:
                Batch_image = sample_batched['image'].to(CFG.device, dtype=torch.float, non_blocking=True)
                Batch_label = sample_batched['label'].to(CFG.device, dtype=torch.float, non_blocking=True).view(-1, 1)
                y_preds     = model(Batch_image)
                loss        = nn.BCEWithLogitsLoss()(y_preds, Batch_label)
                
                y_preds = torch.sigmoid(y_preds).round().detach().cpu().numpy().tolist()
                targets  = Batch_label.detach().cpu().numpy().tolist() 
            
                acc     = sklearn.metrics.accuracy_score(targets,y_preds)
                
                
                final_targets.extend(targets)
                final_outputs.extend(y_preds)
                final_losses.append(loss.item())
                data_dict.update({'val loss': loss.item(), 'acc':acc  })
                local_progress.set_postfix(data_dict)
                
                
     val_acc     = sklearn.metrics.accuracy_score(final_targets,final_outputs)
     val_roc_auc = sklearn.metrics.roc_auc_score(final_targets,final_outputs)
                
     return val_acc, val_roc_auc
            
                




        
        



In [None]:
model     = Net(CFG, use_pretrain=True)
# best_model_name = "model.pth"
# model.load_state_dict(torch.load(best_model_name))

global_progress = tqdm(range(0, CFG.stop_num_epochs), desc=f'Epoch')

best_roc = -np.inf
best_acc = -np.inf
best_epoch = -np.inf
best_model_name = None

for epoch in global_progress:
    model= train(model,fold = 0)
    val_acc, val_roc_auc = eval(model,fold = 0)
    print(f"Epoch = {epoch}  val_roc_auc = {val_roc_auc:.3f} val_acc = {val_acc:.3f}")
    if val_roc_auc > best_roc:
        best_roc = val_roc_auc
        best_acc = val_acc
        best_epoch = epoch
        best_model_name = "model.pth"
        print(f"Save_best_model : val_roc_auc = {val_roc_auc:.3f} val_acc = {val_acc:.3f}")
        torch.save(model.state_dict(),best_model_name)
    

In [None]:
print(f'The best ROC: {best_roc} The best acc {best_acc} was achieved on epoch: {best_epoch}.')
print(f'The Best saved model is: {best_model_name}')

In [None]:
del model
gc.collect() 
torch.cuda.empty_cache()

# Prediction

In [None]:
model  = Net(CFG, use_pretrain=True)
model.load_state_dict(torch.load(best_model_name))
model     = model.to(CFG.device)
model.eval()   
''

In [None]:
df_test             = pd.read_csv('../input/seti-breakthrough-listen/sample_submission.csv')
df_test['img_path'] = df_test['id'].apply(lambda x: f'../input/seti-breakthrough-listen/test/{x[0]}/{x}.npy')


test_dataset              = SETIDataset(image_paths=df_test['img_path'], targets=df_test['target'])    
test_loader               = DataLoader(test_dataset, batch_size=CFG.batch_size,num_workers = CFG.num_workers, shuffle = False ,pin_memory=True)
   
    
local_progress = tqdm(test_loader, desc=f'Val Epoch = {epoch}/{ CFG.num_epochs }', disable= False)

temp_preds = None
predicted_labels = None

with torch.no_grad():
    for sample_batched in local_progress:
            Batch_image = sample_batched['image'].to(CFG.device, dtype=torch.float, non_blocking=True)
            Batch_label = sample_batched['label'].to(CFG.device, dtype=torch.float, non_blocking=True).view(-1, 1)
            output      = model(Batch_image)
            
            predictions = torch.sigmoid(output).cpu().numpy()
            
            if temp_preds is None:
                temp_preds = predictions
            else:
                temp_preds = np.vstack((temp_preds, predictions))

    
if predicted_labels is None:
    predicted_labels = temp_preds
else:
    predicted_labels += temp_preds


In [None]:
sub_df = pd.DataFrame()
sub_df['id'] = df_test['id']
sub_df['target'] = predicted_labels

In [None]:
sub_df.head()

In [None]:
sub_df

In [None]:
sub_df.to_csv('submission.csv', index=False)