In [11]:
import torch
import torch.nn as nn

from transformers import AutoTokenizer, AutoModel

from transformers import get_cosine_schedule_with_warmup

from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from torch.optim import AdamW

import random

import numpy as np
import pandas as pd

import os

In [12]:
from train import train_epoch, eval_epoch
from model import MegaSiameseModel
from dataset import TopG_Dataset, topg_collate

In [37]:
def single_model(model, 
                     dataset, 
                     loss_function,
                     collate_fn,
                     device=torch.device("cuda"),
                     random_state: int=69, 
                     shuffle=True,
                     epochs: int=15, 
                     lr: float=1e-6,
                     batch_size: int=32,
                     start_epoch=0,
                     ):
    random.seed(random_state),
    np.random.seed(random_state)
    torch.manual_seed(random_state)
    torch.cuda.manual_seed_all(random_state)
    
    loss_function.to(device)
    model.to(device)
    optimizer = AdamW(
        model.parameters(),
#        lr = lr, # args.learning_rate - default is 5e-5, our notebook had 2e-5
    )
    data_loader = torch.utils.data.DataLoader(
                    dataset,
                    batch_size=batch_size,
                    shuffle=shuffle,
                     collate_fn=collate_fn
    )
    
    total_steps = len(data_loader) * epochs 

    scheduler = get_cosine_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)

    for epoch_i in range(0, epochs):
        if epoch_i >= start_epoch:
            train_metrics = train_epoch(model, data_loader, loss_function, optimizer, scheduler, device)
            print("EPOCH", epoch_i)
            print(train_metrics)
            #eval_metrics = eval_epoch(model, eval_loader, loss_function, device)
            #print(eval_metrics)

In [38]:
DATA = "data/"
train = pd.read_csv(f"{DATA}train.csv")

In [39]:

tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")

#model = AutoModelForPreTraining.from_pretrained("cointegrated/rubert-tiny2")
bert1 = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
bert2 = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
model = MegaSiameseModel(bert1, bert2, 2)

Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertModel: ['cls.

In [40]:
dataset = TopG_Dataset(train, tokenizer, 200, 60)

In [None]:
single_model(model = model,
             dataset=dataset, 
             loss_function=nn.CrossEntropyLoss(), 
             collate_fn=topg_collate,
             device=torch.device("cuda"),
                     random_state=69,
                     shuffle=True)

  0%|          | 0/193 [00:00<?, ?it/s]

EPOCH 0
{'Train Loss': 0.5796950814637496, 'Train Accuracy': 0.7210209965705872, 'Train F1*100': 70.7865168539326}


  0%|          | 0/193 [00:00<?, ?it/s]

EPOCH 1
{'Train Loss': 0.5507272705821793, 'Train Accuracy': 0.752560555934906, 'Train F1*100': 74.54849498327759}


  0%|          | 0/193 [00:00<?, ?it/s]

EPOCH 2
{'Train Loss': 0.5712387172669326, 'Train Accuracy': 0.7382539510726929, 'Train F1*100': 73.30901856763926}


  0%|          | 0/193 [00:00<?, ?it/s]

EPOCH 3
{'Train Loss': 0.5707265342455454, 'Train Accuracy': 0.7293123006820679, 'Train F1*100': 71.26833477135462}


  0%|          | 0/193 [00:00<?, ?it/s]

EPOCH 4
{'Train Loss': 0.5630969778861407, 'Train Accuracy': 0.7312632203102112, 'Train F1*100': 70.8208296557811}


  0%|          | 0/193 [00:00<?, ?it/s]

EPOCH 5
{'Train Loss': 0.5823116112558335, 'Train Accuracy': 0.7205332517623901, 'Train F1*100': 67.41232227488152}


  0%|          | 0/193 [00:00<?, ?it/s]

EPOCH 6
{'Train Loss': 0.5387461676498769, 'Train Accuracy': 0.7623150944709778, 'Train F1*100': 76.95460277427492}


  0%|          | 0/193 [00:00<?, ?it/s]