# Beating Text-Graph-17 with only Text

Current plan is following

- preprocess text into **q-a connection prediction** (question + question entities [SEP] answer + answer entities (+ linear. graph))
- finetune bert-like model (bigger=better) with some cool LoRA (this one needs to be tuned too)
- abuse augmentations for upsampling minor "correct" label examples

In [2]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch
import os
import random
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import transformers
from transformers import AutoTokenizer, AutoModel, T5ForConditionalGeneration
import torch.optim as optim
from sklearn.metrics import precision_score, f1_score, recall_score
from tqdm import tqdm

In [3]:
SEED = 42

torch.manual_seed(SEED)
torch.random.manual_seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.cuda.random.manual_seed(SEED)
torch.cuda.random.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True

torch.backends.cuda.matmul.allow_tf32 = True

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

## Data Preproc

In [4]:
train_path = '../../data/tsv/train.tsv'
test_path = '../../data/tsv/test.tsv'

from data.dataset import TextGraphDataset

## Model prep and finetuning

In [5]:
# %pip install accelerate bitsandbytes

In [6]:
# Load model directly
# model_name = "whaleloops/phrase-bert"
# model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_name = "DeepPavlov/t5-wikidata5M-with-neighbors"

DTYPE = torch.float32

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Not loading in 8 bit when fp16=True or bf16=True for training
pretrained_bert = AutoModel.from_pretrained(
    model_name,
    device_map='auto',
    low_cpu_mem_usage=True,
    offload_state_dict=True,
#     load_in_8bit=True,
#     torch_dtype=DTYPE,  
)

Some weights of the model checkpoint at DeepPavlov/t5-wikidata5M-with-neighbors were not used when initializing T5Model: ['lm_head.weight']
- This IS expected if you are initializing T5Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing T5Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:
class QuestionClassifier(nn.Module):
    def __init__(self, pretrained_bert, encoder_only=False):
        super().__init__()
        
        self.encoder_only = encoder_only
        if encoder_only:
            self.bert_backbone = pretrained_bert.encoder
        else:
            self.bert_backbone = pretrained_bert
            
        self.hidden_size = pretrained_bert.config.hidden_size
        self.head = nn.Sequential(
            nn.Linear(self.hidden_size, self.hidden_size // 2),
            nn.ELU(),
            nn.Linear(self.hidden_size // 2, 1)
        )
        self.head.to(DTYPE)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None):
        # outputs = self.bert_backbone(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) # for bert, minilm
        if self.encoder_only:
            outputs = self.bert_backbone(input_ids=input_ids, attention_mask=attention_mask)
        else:
            outputs = self.bert_backbone(input_ids=input_ids, attention_mask=attention_mask, decoder_input_ids=input_ids)
        last_hidden_state = outputs.last_hidden_state  # Access the last hidden states
        pooled_output = last_hidden_state[:, -1, :]  # Take the eos token representation because it is Enc-Decoder, T5
        logits = self.head(pooled_output)
        return logits
    
model = QuestionClassifier(
    pretrained_bert,
    encoder_only=False
).to(DEVICE)

for p in model.bert_backbone.parameters():
    p.requires_grad = False

In [8]:
model

QuestionClassifier(
  (bert_backbone): T5Model(
    (shared): Embedding(32128, 512)
    (encoder): T5Stack(
      (embed_tokens): Embedding(32128, 512)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=512, out_features=512, bias=False)
                (k): Linear(in_features=512, out_features=512, bias=False)
                (v): Linear(in_features=512, out_features=512, bias=False)
                (o): Linear(in_features=512, out_features=512, bias=False)
                (relative_attention_bias): Embedding(32, 8)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseActDense(
                (wi): Linear(in_features=512, out_features=2048, bias=False)
                (wo): Linear(in_features=204

In [9]:
# %pip install peft -q

In [10]:
from peft import LoraConfig, LoraModel

LORA_RANK=16 # 16 default
LORA_ALPHA=64. # 32
LORA_DROPOUT=1e-1

config = LoraConfig(
    task_type="SEQ_CLS",
    r=LORA_RANK,
    lora_alpha=LORA_ALPHA,
#     target_modules=["query", "value"], # minilm
    target_modules=["q", "v"], # T5 ["q", "v", "k", "o"]
    lora_dropout=LORA_DROPOUT,
    use_rslora=True,
)

lora_model = LoraModel(model, config, "default")

for p in lora_model.head.parameters():
    p.requires_grad = True

In [11]:
def get_trainable_params(model: nn.Module):
    params = []
    for name, p in model.named_parameters():
        if p.requires_grad:
            params.append(p)
    return params

trainable_params = get_trainable_params(lora_model)
len(trainable_params)

76

In [12]:
q = 0
for i in lora_model.parameters():
    q+=(i.requires_grad)
q

76

### Functions for training

In [15]:
def train_epoch(model, loader, optimizer, loss_fn, scaler):
    model.train()

    avg_loss = 0.

    predictions = []
    true_labels = []
    
    for i, batch in tqdm(enumerate(loader), total=len(loader)):

        optimizer.zero_grad()

        input_ids = batch["input_ids"].to(DEVICE)
        # token_type_ids = batch["token_type_ids"].to(DEVICE) # not for T5
        attention_mask = batch["attention_mask"].to(DEVICE)
        labels = batch["labels"].to(DEVICE).float()
        #logits = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids).squeeze()
        
        # https://pytorch.org/docs/stable/notes/amp_examples.html#typical-mixed-precision-training
        with torch.autocast(device_type='cuda', dtype=torch.float16):
            logits = model(input_ids=input_ids, attention_mask=attention_mask).squeeze() # for T5
            loss = loss_fn(logits, labels)
        #loss.backward()
        #optimizer.step()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        avg_loss += loss.item()
        with torch.no_grad():
            preds = F.sigmoid(logits).detach().cpu().numpy()
            preds = (preds > 0.5) * 1
            y_true = labels.detach().cpu().numpy()
            
            predictions += preds.tolist()
            true_labels += y_true.tolist()
    
    avg_loss /= len(loader) + 1
    f1 = f1_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)    
    
    return avg_loss, f1, precision, recall


@torch.no_grad()
def eval_epoch(model, loader, loss_fn):
    model.eval()

    avg_loss = 0.
    predictions, true_labels = [], []

    for i, batch in tqdm(enumerate(loader), total=len(loader)):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        #token_type_ids = batch["token_type_ids"].to(DEVICE)
        labels = batch["labels"].to(DEVICE).float()
        
        # logits = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids).squeeze()
        logits = model(input_ids=input_ids, attention_mask=attention_mask).squeeze() # for T5
        loss = loss_fn(logits, labels)
        
        avg_loss += loss.item()
        preds = F.sigmoid(logits).detach().cpu().numpy()
        preds = (preds > 0.5) * 1
        y_true = labels.detach().cpu().numpy()
        predictions += preds.tolist()
        true_labels += y_true.tolist()

    avg_loss /= len(loader)
    f1 = f1_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)

    return avg_loss, f1, precision, recall


def train(model, train_loader, val_loader, optimizer, loss_fn, epochs=10):
    scaler = torch.cuda.amp.GradScaler()
    
    best_f1_val = 0
    for e in range(epochs):
        loss, f1, prec, rec = train_epoch(model, train_loader, optimizer, loss_fn, scaler=scaler)
        print(f"Train epoch {e + 1} - loss: {loss:.3f}, f1: {f1:.3f}, precision: {prec:.3f}, recall: {rec:.3f}")
        
        loss, f1, prec, rec = eval_epoch(model, val_loader, loss_fn)
        print(f"Eval epoch {e + 1} - loss: {loss:.3f}, f1: {f1:.3f}, precision: {prec:.3f}, recall: {rec:.3f}")
        
        if f1 > best_f1_val:
            best_f1_val = f1
            torch.save(model.state_dict(), "T5DP--lora-fixed_oversampling-includegraphs-BEST.pth")    
        
        torch.save(model.state_dict(), "T5DP--lora-fixed_oversampling-includegraphs-LAST.pth")    

## Training, evaluation and submit

In [16]:
def split_train_dev_test(df: pd.DataFrame):
        all_questions = list(df["question"].unique())
        num_questions = len(all_questions)
        random.shuffle(all_questions)

        train_dev_ratio = 0.8
        train_ratio = 0.9
        num_train_dev_questions = int(num_questions * train_dev_ratio)
        train_dev_questions = all_questions[:num_train_dev_questions]
        test_questions = set(all_questions[num_train_dev_questions:])
        
        num_train_questions = int(len(train_dev_questions) * train_ratio)
        train_questions = set(train_dev_questions[:num_train_questions])
        dev_questions = set(train_dev_questions[num_train_questions:])

        train_df = df[df["question"].isin(train_questions)]
        dev_df = df[df["question"].isin(dev_questions)]
        test_df = df[df["question"].isin(test_questions)]

        return train_df, dev_df, test_df
    
def split_train_dev(df: pd.DataFrame):
        all_questions = list(df["question"].unique())
        num_questions = len(all_questions)
        random.shuffle(all_questions)

        train_dev_ratio = 0.8
        num_train_questions = int(num_questions * train_dev_ratio)
        train_questions = set(all_questions[:num_train_questions])
        dev_questions = set(all_questions[num_train_questions:])
        
        train_df = df[df["question"].isin(train_questions)]
        dev_df = df[df["question"].isin(dev_questions)]

        return train_df, dev_df

In [17]:
BATCH_SIZE=32
MAX_LENGTH=256
EPOCHS=30
LR = 8e-5

INCLUDE_GRAPH = True

from sklearn.utils.class_weight import compute_sample_weight
from torch.utils.data import WeightedRandomSampler

# Loading dataframe for making splits
df = pd.read_csv(train_path, sep='\t')
df["label"] = df["correct"].astype(np.float32)
# df_train, df_dev, df_test = split_train_dev_test(df)
df_train, df_dev = split_train_dev(df)

train_ds = TextGraphDataset(tokenizer, MAX_LENGTH, train_path=train_path, test_path=test_path, 
                            split='train',
                            df_split=df_train, 
                            include_graph=INCLUDE_GRAPH,
                            is_T5=True,
                           )
dev_ds = TextGraphDataset(tokenizer, MAX_LENGTH, train_path=train_path, test_path=test_path,
                          split='val',
                          df_split=df_dev,
                          include_graph=INCLUDE_GRAPH,
                          is_T5=True,
                         )                         
# test_ds = TextGraphDataset(tokenizer, MAX_LENGTH, train_path=train_path, test_path=test_path,
#                            split='test',
#                            df_split=df_test, 
#                            include_graph=INCLUDE_GRAPH,
#                            is_T5=True,
#                           )

weights = compute_sample_weight('balanced', train_ds.labels)
sampler = WeightedRandomSampler(weights, len(weights)) # we will oversample correct answers :)

CONFIG_DATALOADER = {"num_workers":4, "pin_memory":True}
train_loader = DataLoader(
    train_ds, 
    batch_size=BATCH_SIZE, 
    sampler=sampler, # does not allow to use shuffle
#     shuffle=True, 
    **CONFIG_DATALOADER
)
dev_loader = DataLoader(dev_ds, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, **CONFIG_DATALOADER)
# test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, **CONFIG_DATALOADER)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.df["graph"] = self.df["graph"].apply(eval)


In [23]:
tokenizer.decode(dev_ds[0]['input_ids'], skip_special_tokens=False)

'predict[SEP] Whose is the oldest MLB player to hit a home run?[SEP] Yogi Berra league Major League Baseball[SEP] Yogi Berra</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><p

In [24]:
loss_fn = torch.nn.BCEWithLogitsLoss()
# optimizer = optim.AdamW(params=trainable_params, lr=LR)
optimizer = transformers.optimization.Adafactor(
    params=lora_model.parameters(), 
    lr=LR, 
    scale_parameter=False, 
    relative_step=False,
    warmup_init=False,
)

In [25]:
import gc
torch.cuda.empty_cache()
gc.collect()

20

## PQlet run - with linearized graph

### T5 from DeepPavlov

In [27]:
%%time 

train(
    lora_model,
    train_loader,
    dev_loader,
    optimizer,
    loss_fn,
    epochs=EPOCHS
)

# torch.save(model.state_dict(), "phrase_bert-lora-fixed_oversampling-pqlet.pth")
# torch.save(model.state_dict(), "all-MiniLM-L6-v2--lora-fixed_oversampling-pqlet-includegraphs.pth")
# torch.save(lora_model.state_dict(), "T5DP--lora-fixed_oversampling-pqlet-includegraphs.pth")

100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:58<00:00,  3.16it/s]


Train epoch 1 - loss: 0.335, f1: 0.834, precision: 0.800, recall: 0.870


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:36<00:00,  6.41it/s]


Eval epoch 1 - loss: 0.353, f1: 0.439, precision: 0.295, recall: 0.858


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:59<00:00,  3.14it/s]


Train epoch 2 - loss: 0.285, f1: 0.862, precision: 0.827, recall: 0.900


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.56it/s]


Eval epoch 2 - loss: 0.305, f1: 0.479, precision: 0.345, recall: 0.784


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 3 - loss: 0.260, f1: 0.880, precision: 0.846, recall: 0.916


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.59it/s]


Eval epoch 3 - loss: 0.345, f1: 0.490, precision: 0.350, recall: 0.817


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 4 - loss: 0.234, f1: 0.895, precision: 0.863, recall: 0.930


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.58it/s]


Eval epoch 4 - loss: 0.323, f1: 0.494, precision: 0.357, recall: 0.805


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 5 - loss: 0.219, f1: 0.906, precision: 0.874, recall: 0.940


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.61it/s]


Eval epoch 5 - loss: 0.309, f1: 0.509, precision: 0.387, recall: 0.744


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 6 - loss: 0.204, f1: 0.913, precision: 0.882, recall: 0.946


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.59it/s]


Eval epoch 6 - loss: 0.380, f1: 0.475, precision: 0.333, recall: 0.831


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 7 - loss: 0.187, f1: 0.921, precision: 0.892, recall: 0.952


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.59it/s]


Eval epoch 7 - loss: 0.325, f1: 0.528, precision: 0.406, recall: 0.753


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 8 - loss: 0.176, f1: 0.928, precision: 0.901, recall: 0.958


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.60it/s]


Eval epoch 8 - loss: 0.370, f1: 0.521, precision: 0.392, recall: 0.774


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 9 - loss: 0.169, f1: 0.931, precision: 0.905, recall: 0.960


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.58it/s]


Eval epoch 9 - loss: 0.314, f1: 0.549, precision: 0.449, recall: 0.707


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 10 - loss: 0.156, f1: 0.936, precision: 0.913, recall: 0.961


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.57it/s]


Eval epoch 10 - loss: 0.342, f1: 0.548, precision: 0.443, recall: 0.716


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 11 - loss: 0.148, f1: 0.942, precision: 0.919, recall: 0.967


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.58it/s]


Eval epoch 11 - loss: 0.377, f1: 0.542, precision: 0.422, recall: 0.757


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 12 - loss: 0.142, f1: 0.944, precision: 0.923, recall: 0.967


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.60it/s]


Eval epoch 12 - loss: 0.354, f1: 0.548, precision: 0.449, recall: 0.703


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 13 - loss: 0.137, f1: 0.946, precision: 0.927, recall: 0.967


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.58it/s]


Eval epoch 13 - loss: 0.362, f1: 0.539, precision: 0.420, recall: 0.751


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 14 - loss: 0.126, f1: 0.953, precision: 0.933, recall: 0.974


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.57it/s]


Eval epoch 14 - loss: 0.391, f1: 0.548, precision: 0.448, recall: 0.707


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 15 - loss: 0.125, f1: 0.953, precision: 0.934, recall: 0.972


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.57it/s]


Eval epoch 15 - loss: 0.356, f1: 0.547, precision: 0.438, recall: 0.728


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 16 - loss: 0.120, f1: 0.955, precision: 0.938, recall: 0.973


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.58it/s]


Eval epoch 16 - loss: 0.365, f1: 0.531, precision: 0.406, recall: 0.766


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 17 - loss: 0.111, f1: 0.958, precision: 0.942, recall: 0.975


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.60it/s]


Eval epoch 17 - loss: 0.318, f1: 0.574, precision: 0.494, recall: 0.685


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 18 - loss: 0.113, f1: 0.957, precision: 0.940, recall: 0.975


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.61it/s]


Eval epoch 18 - loss: 0.390, f1: 0.557, precision: 0.458, recall: 0.711


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 19 - loss: 0.111, f1: 0.959, precision: 0.943, recall: 0.977


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.58it/s]


Eval epoch 19 - loss: 0.368, f1: 0.563, precision: 0.472, recall: 0.698


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 20 - loss: 0.104, f1: 0.962, precision: 0.946, recall: 0.978


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.57it/s]


Eval epoch 20 - loss: 0.427, f1: 0.556, precision: 0.455, recall: 0.716


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 21 - loss: 0.107, f1: 0.960, precision: 0.944, recall: 0.977


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.58it/s]


Eval epoch 21 - loss: 0.352, f1: 0.571, precision: 0.485, recall: 0.695


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 22 - loss: 0.099, f1: 0.964, precision: 0.949, recall: 0.979


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.59it/s]


Eval epoch 22 - loss: 0.424, f1: 0.558, precision: 0.460, recall: 0.707


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 23 - loss: 0.092, f1: 0.967, precision: 0.954, recall: 0.980


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.59it/s]


Eval epoch 23 - loss: 0.382, f1: 0.562, precision: 0.471, recall: 0.698


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 24 - loss: 0.095, f1: 0.965, precision: 0.951, recall: 0.979


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.57it/s]


Eval epoch 24 - loss: 0.391, f1: 0.583, precision: 0.521, recall: 0.662


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 25 - loss: 0.093, f1: 0.967, precision: 0.954, recall: 0.981


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.58it/s]


Eval epoch 25 - loss: 0.404, f1: 0.573, precision: 0.483, recall: 0.703


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 26 - loss: 0.095, f1: 0.965, precision: 0.952, recall: 0.979


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.59it/s]


Eval epoch 26 - loss: 0.405, f1: 0.585, precision: 0.513, recall: 0.681


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 27 - loss: 0.087, f1: 0.969, precision: 0.957, recall: 0.981


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.58it/s]


Eval epoch 27 - loss: 0.432, f1: 0.565, precision: 0.474, recall: 0.699


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 28 - loss: 0.086, f1: 0.970, precision: 0.957, recall: 0.983


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.57it/s]


Eval epoch 28 - loss: 0.395, f1: 0.583, precision: 0.515, recall: 0.672


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 29 - loss: 0.081, f1: 0.971, precision: 0.959, recall: 0.983


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.61it/s]


Eval epoch 29 - loss: 0.442, f1: 0.584, precision: 0.538, recall: 0.637


100%|████████████████████████████████████████████████████████████████████████████████| 943/943 [04:54<00:00,  3.20it/s]


Train epoch 30 - loss: 0.086, f1: 0.969, precision: 0.957, recall: 0.982


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:35<00:00,  6.59it/s]


Eval epoch 30 - loss: 0.459, f1: 0.571, precision: 0.488, recall: 0.689
CPU times: total: 2h 45min 36s
Wall time: 2h 52min 33s


In [28]:
@torch.no_grad()
def make_submit_predictions(model, tokenizer, include_graph, filename='test_result_1.tsv'):
    model.eval()
    eval_ds = TextGraphDataset(tokenizer, max_length=MAX_LENGTH,  train_path=train_path, test_path=test_path,
                               split='eval', include_graph=include_graph)
    preds = []
    for idx, data in tqdm(enumerate(eval_ds)):
        input_ids = data["input_ids"].to(DEVICE).unsqueeze(0)
        attention_mask = data["attention_mask"].to(DEVICE).unsqueeze(0)
        token_type_ids = data["token_type_ids"].to(DEVICE).unsqueeze(0)
        
        logit = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids).squeeze()
        pred = (logit.detach().cpu().numpy() > 0) * 1
        preds.append(pred)

    df = eval_ds.df
    df['prediction'] = preds
    df['prediction'] = df['prediction'].astype(int)
    df[["sample_id", "prediction"]].to_csv(filename, sep='\t', index=False)

@torch.no_grad()
def make_submit_predictions_ranked(model, tokenizer, include_graph, filename='test_result_2.tsv', is_t5=False):
    """based of Vika's idea - select all candidate answers for questions, select one with max prob"""
    model.eval()
    eval_ds = TextGraphDataset(tokenizer, max_length=MAX_LENGTH,  train_path=train_path, test_path=test_path,
                               split='eval', 
                               df_split=None,
                               include_graph=include_graph)
    eval_df = eval_ds.df
    eval_df["correct"] = False

    for question in tqdm(eval_df['question'].unique()):
        ids = eval_df.index[eval_df['question'] == question].tolist()
        
        logits = []
        for idx in ids:
            data = eval_ds[idx]
            input_ids = data["input_ids"].to(DEVICE).unsqueeze(0)
            attention_mask = data["attention_mask"].to(DEVICE).unsqueeze(0)
            if is_t5:
                logit = model(input_ids=input_ids, attention_mask=attention_mask,).squeeze()
            else:
                token_type_ids = data["token_type_ids"].to(DEVICE).unsqueeze(0)
                logit = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids).squeeze()
            
            
            logits.append(logit.detach().cpu().item())

        right_ans_id = ids[np.argmax(logits)]
        eval_df.loc[right_ans_id, 'correct'] = True

    eval_df['prediction'] = eval_df['correct']
    eval_df['prediction'] = eval_df['prediction'].astype(int)
    eval_df[["sample_id", "prediction"]].to_csv(filename, sep='\t', index=False)

In [29]:
make_submit_predictions_ranked(
    lora_model,
    tokenizer,
    include_graph=INCLUDE_GRAPH,
    filename="T5_includegraphs_v3.csv",
    is_t5=True
)

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [02:14<00:00,  7.44it/s]
