In [1]:
import config
import dataset

asasas


In [4]:



"""Train and test engines
"""

from tqdm import tqdm
import torch
import torch.nn as nn
import numpy as np

def loss_fn(outputs, targets):
    return nn.BCEWithLogitsLoss()(outputs, targets.view(-1, 1))

def train_fn(data_loader, model, optimizer, scheduler, device):

    model.train()
    total_train_loss = 0

    final_outputs = []
    final_targets = []

    for idx, data in tqdm(enumerate(data_loader), total = len(data_loader)):

        ids = data['ids']
        mask = data['mask']
        target = data['target']

        ids = ids.to(device, dtype = torch.long)
        mask = mask.to(device, dtype = torch.long)
        target = target.to(device, dtype = torch.float)
        
        optimizer.zero_grad()
        outputs = model(
            ids, 
            token_type_ids = None, 
            attention_mask = mask,
            return_dict=True
        )
        
        logits = outputs.logits
        loss = loss_fn(logits, target)

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        loss.backward()

        optimizer.step()
        scheduler.step()

        total_train_loss += loss.item()


        final_targets.extend(target.cpu().detach().tolist())
        final_outputs.extend(torch.sigmoid(logits).cpu().detach().flatten().tolist())

        # print(len(final_targets), len(final_outputs))
    
    return np.array(final_outputs), final_targets, total_train_loss / len(data_loader)


def eval_fn(data_loader, model, device):

    model.eval()
    total_val_loss = 0

    final_outputs = []
    final_targets = []

    with torch.no_grad():
        for idx, data in tqdm(enumerate(data_loader), total = len(data_loader)):
            ids = data['ids']
            mask = data['mask']
            target = data['target']

            ids = ids.to(device, dtype = torch.long)
            mask = mask.to(device, dtype = torch.long)
            target = target.to(device, dtype = torch.float)
            
            outputs = model(
                ids, 
                token_type_ids = None, 
                attention_mask = mask, 
                return_dict=True
            )
            
            logits = outputs.logits
            loss = loss_fn(logits, target)

            total_val_loss += loss.item()
            
            final_targets.extend(target.cpu().detach().tolist())
            final_outputs.extend(torch.sigmoid(logits).cpu().detach().flatten().tolist())

    return np.array(final_outputs), final_targets, total_val_loss / len(data_loader)



In [5]:

import pandas as pd
from sklearn import model_selection
import torch
import torch.nn as nn

import wandb
import numpy as np
from sklearn import metrics
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup

wandb.init()



df = pd.read_csv(config.TRAINING_FILE, delimiter = '\t', header = None, names=['sentence_source', 'label', 'label_notes', 'sentence'])

df_train, df_valid = model_selection.train_test_split(
    df,
    test_size = 0.2,
    random_state = 42,
    stratify = df.label.values
)

# Drop indices
df_train = df_train.reset_index(drop=True)
df_valid = df_valid.reset_index(drop=True)

train_dataset = dataset.BERTDataset(
    sentence = df_train.sentence.values,
    target = df_train.label.values
)

train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size = config.TRAIN_BATCH_SIZE,
    sampler = torch.utils.data.RandomSampler(train_dataset)
)

valid_dataset = dataset.BERTDataset(
    sentence = df_valid.sentence.values,
    target = df_valid.label.values
)

valid_dataloader = torch.utils.data.DataLoader(
    valid_dataset,
    batch_size = config.VALID_BATCH_SIZE,
    sampler = torch.utils.data.SequentialSampler(valid_dataset)
)

model = config.MODEL.to(config.DEVICE)

param_list = list(model.named_parameters()) # list of model parameters
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] # bias of hidden layer and layer norm are not decayed
optimizer_parameters = [ # removing the no_decay params
    {'params': [param for name, param in param_list if not any(nd in name for nd in no_decay)], 'weight_deacy': 0.001}, # without no_decay params
    {'params': [param for name, param in param_list if any(nd in name for nd in no_decay)], 'weight_deacy': 0.} # only no_decay params
]

num_train_steps = int((len(df_train) / config.TRAIN_BATCH_SIZE) * config.EPOCHS)

optimizer = AdamW(optimizer_parameters, lr = 3e-5, eps = 1e-8)
scheduler = get_linear_schedule_with_warmup( # gets called in train_fn as this scheduler is independant on valid loss
    optimizer,
    num_warmup_steps = 0,
    num_training_steps = num_train_steps
)

num_device = torch.cuda.device_count()
device_ids = list(range(num_device))
if len(device_ids) > 1:
    model = nn.DataParallel(model, device_ids=device_ids) # distributed training

best_accuracy = 0

for epoch in range(config.EPOCHS):

    train_outputs, train_targets, train_loss = train_fn(train_dataloader, model, optimizer, scheduler, config.DEVICE)
    valid_outputs, valid_targets, valid_loss = eval_fn(valid_dataloader, model, config.DEVICE)

    wandb.log({'Train loss': train_loss, 'Epoch': epoch})
    wandb.log({'Valid loss': valid_loss, 'Epoch': epoch})

    tr = train_outputs[train_outputs >= 0.5]

    # train_accuracy = metrics.accuracy_score(train_targets, train_outputs)












    # train_mcc = metrics.matthews_corrcoef(train_targets, train_outputs)
    # print(f"Train Accuracy Score: {train_accuracy}")
    # wandb.log({'Train Accuracy Score': train_accuracy, 'Epoch': epoch})
    # print(f"Train MCC Score: {train_mcc}")
    # wandb.log({'Train MCC Score': train_mcc, 'Epoch': epoch})

    # valid_outputs =valid_outputs[valid_outputs >= 0.5]

    # accuracy = metrics.accuracy_score(valid_targets, valid_outputs)
    # valid_mcc = metrics.matthews_corrcoef(valid_targets, valid_outputs)
    # print(f"Valid Accuracy Score: {accuracy}")
    # wandb.log({'Valid Accuracy Score': accuracy, 'Epoch': epoch})
    # print(f"Valid MCC Score: {valid_mcc}")
    # wandb.log({'Valid MCC Score': valid_mcc, 'Epoch': epoch})
    break




VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train loss,0.48635
Epoch,0.0
_step,1.0
_runtime,220.0
_timestamp,1635411821.0
Valid loss,0.44647


0,1
Train loss,▁
Epoch,▁▁
_step,▁█
_runtime,▁▁
_timestamp,▁▁
Valid loss,▁


wandb: wandb version 0.12.6 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


100%|██████████| 428/428 [03:16<00:00,  2.18it/s]
100%|██████████| 107/107 [00:13<00:00,  7.91it/s]


array([ True, False,  True, ...,  True, False,  True])