In [1]:
import sys
sys.path.append('../scripts')
import os

import yaml
import torch
from argparse import Namespace
from tqdm.notebook import tqdm

from runner import Runner
from metrics import LWLRAP


Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .


In [2]:
from transformers import (
    AdamW,
    get_linear_schedule_with_warmup,
)

In [3]:
import numpy as np

In [4]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [5]:
with open('/home/jupyter/rfcx_submission/config/training_config.yaml', 'r') as f:
    args= yaml.load(f,  yaml.FullLoader)

In [6]:
args['epochs'] = 10
args['batch_size'] =2
args['gradient_accumulation_steps'] =1
args['warmup_steps'] = 0
save_path = 'result/run1_no_aug'
args

{'batch_size': 2,
 'ckpt_path': None,
 'csv_path': '/home/jupyter/rfcx_submission/notebooks/train_tp.csv',
 'data_dir': '/home/jupyter/rfcx/data/train',
 'transformer_config_path': '/home/jupyter/rfcx_submission/config/upstream_config.yaml',
 'transformer_weights_path': '/home/jupyter/rfcx/rfcx/model_weights/mockingjay_mel80_no_delta_cmvn_run4/states-2000.ckpt',
 'training': {'upstream': False, 'downstream': True},
 'epochs': 10,
 'gradient_accumulation_steps': 1,
 'warmup_steps': 0}

In [7]:
os.makedirs(save_path, exist_ok=True)

In [8]:
args= Namespace(**args)

In [9]:
runner= Runner(device, args)

In [10]:
runner.set_data_loader()

training: number of docs : 1
evaluation: number of docs : 1


In [11]:
runner.set_model()

  "num_layers={}".format(dropout, num_layers))


In [12]:
runner.args.training['upstream']

False

In [27]:
def save_ckpt(model, optimizer, save_path,args, i):
    print(f'saving model at epoch {i}')
    all_states = {
        'upstream' : model.upstream.state_dict(),
        'downstream' : model.downstream.state_dict(),
        'optimizer' : optimizer.state_dict(),
        'config' : vars(args)
    }
    torch.save(all_states, f'{save_path}/best_model_{i}.pt')

In [13]:
class WeightedFocalLoss(torch.nn.Module):
    "Non weighted version of Focal Loss"
    def __init__(self, alpha=.25, gamma=1):
        super(WeightedFocalLoss, self).__init__()
        self.alpha = torch.tensor([alpha, 1-alpha]).cuda()
        self.gamma = gamma

    def forward(self, inputs, targets):
        BCE_loss = torch.nn.functional.binary_cross_entropy(inputs, targets, reduction='none')
        targets = targets.type(torch.long)
        
        at = self.alpha.gather(0, targets.data.view(-1))
        at = at.data.view(targets.shape[0], -1)

        pt = torch.exp(-BCE_loss)
        F_loss = at*(1-pt)**self.gamma * BCE_loss
        return F_loss.mean()
    
    
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=5, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma


    def forward(self, inputs, targets):

        BCE_loss = torch.nn.functional.binary_cross_entropy(inputs, targets, reduce=False)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

 
        return torch.mean(F_loss)

# num_birds = 24
# pos_weights = torch.ones(num_birds)
# pos_weights = pos_weights * 8   

loss_function = torch.nn.BCELoss()

In [14]:
t_total = (
    len(runner.train_dataloader)
    // args.gradient_accumulation_steps
    * args.epochs
)

print(t_total)

10


In [15]:

    
optimizer = AdamW(params= runner.model.downstream.parameters(), lr= 0.01, )

scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)

In [16]:
# plist = [
#          {'params': runner.model.upstream.parameters(), 'lr': 1e-4,},
#          {'params':runner.model.downstream.parameters(), 'lr': 1e-2}
#          ]

# optimizer = torch.optim.Adam(plist, lr=0.001 )


# optimizer = torch.optim.Adam(runner.model.downstream.parameters(), lr=0.01 )


In [17]:
# lr_finder = LRFinder(runner.model, optimizer, loss_function, device=device)
# lr_finder.range_test(runner.train_dataloader, val_loader=runner.eval_dataloader, end_lr=1, num_iter=100, step_mode="linear")
# lr_finder.plot(log_lr=False)
# lr_finder.reset()

In [18]:
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

In [19]:
accumulation_steps= args.gradient_accumulation_steps

In [None]:
best_lwrap= 0

for i in range(args.epochs):
    
    train_loss = []
    train_lwlrap = []
    
    runner.model.train()
    if not runner.args.training['upstream']:
        runner.model.upstream.eval()

    runner.model.downstream.train()

    pbar = tqdm(enumerate(runner.train_dataloader), 
                                      total= len(runner.train_dataloader))

    runner.model.zero_grad()
    for batch, (data, target) in  pbar :
        
        target = target.cpu()
        
        output = runner.model([d.cpu() for d in data])
        loss = loss_function(output, target)
        
        _score= LWLRAP(output.cpu(), target.cpu())
        
        train_lwlrap.append(_score)
        train_loss.append(loss.item())
        
        pbar.set_description(f"loss : {round(loss.item(), 3)} score: {round(_score,3)}")
        
        
        loss = loss / accumulation_steps
        loss.backward()
        
        if (i+1) % accumulation_steps == 0 or (batch+1) == len(runner.train_dataloader):
            optimizer.step()
            scheduler.step()
            runner.model.zero_grad()
    
    for g in optimizer.param_groups:
        
        lr = g['lr']
    print(f'Epoch : {i}  training end. LR: {round(lr, 3)}  Loss: {round (np.mean(train_loss) , 3)}  lwrlrap_score: {round(np.mean(train_lwlrap), 3)}')
        
        
    with torch.no_grad():
        val_loss = []
        val_lwlrap = []
        
        runner.model.eval()
        for batch, (data, target) in tqdm(enumerate(runner.eval_dataloader), total= len(runner.eval_dataloader)):

            target = target.cuda()

            output = runner.model([d.cuda() for d in data])
            loss = loss_function(output, target)
            
        
            val_loss.append(loss.item())
                                        
            _score= LWLRAP(output.cpu(), target.cpu())
            val_lwlrap.append(_score)
    
        print(f'Valid Loss: {np.mean(val_loss)}  lwrlrap_score: {np.mean(val_lwlrap)}')
    
    if np.mean(val_lwlrap) > best_lwrap:
        save_ckpt(runner.model, optimizer, save_path,args, i)
        best_lwrap = np.mean(val_lwlrap)
        

save_ckpt(runner.model, optimizer, save_path,args, i)