## Instalação de bibliotecas

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
!pip install transformers -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m50.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m96.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.2/199.2 KB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import random
import torch
import torch.nn.functional as F
import numpy as np

Definição da semente para aleatoriedade, para fins de reprodução do treinamento.

In [None]:
random.seed(123)
np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x7fd4f01cf730>

## Download e processamento das bases (treinamento e teste)

In [None]:
!wget https://storage.googleapis.com/unicamp-dl/ia368dd_2023s1/msmarco/msmarco_triples.train.tiny.tsv

--2023-03-12 22:05:34--  https://storage.googleapis.com/unicamp-dl/ia368dd_2023s1/msmarco/msmarco_triples.train.tiny.tsv
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.157.128, 64.233.189.128, 108.177.97.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.157.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8076179 (7.7M) [text/tab-separated-values]
Saving to: ‘msmarco_triples.train.tiny.tsv’


2023-03-12 22:05:34 (122 MB/s) - ‘msmarco_triples.train.tiny.tsv’ saved [8076179/8076179]



In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('msmarco_triples.train.tiny.tsv', sep='\t', header=None)

In [None]:
df.head()

Unnamed: 0,0,1,2
0,is a little caffeine ok during pregnancy,We donât know a lot about the effects of caf...,It is generally safe for pregnant women to eat...
1,what fruit is native to australia,Passiflora herbertiana. A rare passion fruit n...,"The kola nut is the fruit of the kola tree, a ..."
2,how large is the canadian military,The Canadian Armed Forces. 1 The first large-...,The Canadian Physician Health Institute (CPHI)...
3,types of fruit trees,Cherry. Cherry trees are found throughout the ...,"The kola nut is the fruit of the kola tree, a ..."
4,how many calories a day are lost breastfeeding,"Not only is breastfeeding better for the baby,...","However, you still need some niacin each day; ..."


In [None]:
df_pos = pd.DataFrame()
df_neg = pd.DataFrame()

for index, row in df.iterrows():
  df_pos = df_pos.append({"query":row[0], "passage":row[1], "score":1}, ignore_index=True)
  df_neg = df_neg.append({"query":row[0], "passage":row[2], "score":0}, ignore_index=True)
    

In [None]:
from sklearn.model_selection import train_test_split

X_train_pos = df_pos.drop("score", axis=1)
Y_train_pos = df_pos["score"]

X_train_pos, X_val_pos, Y_train_pos, Y_val_pos = train_test_split(X_train_pos, Y_train_pos, test_size=0.2, random_state=42)

X_train_neg = df_neg.drop("score", axis=1)
Y_train_neg = df_neg["score"]

X_train_neg, X_val_neg, Y_train_neg, Y_val_neg = train_test_split(X_train_neg, Y_train_neg, test_size=0.2, random_state=42)

In [None]:
X_train = pd.concat([X_train_pos, X_train_neg], axis=0, ignore_index=True)
Y_train = pd.concat([Y_train_pos, Y_train_neg], axis=0, ignore_index=True)
X_val = pd.concat([X_val_pos, X_val_neg], axis=0, ignore_index=True)
Y_val = pd.concat([Y_val_pos, Y_val_neg], axis=0, ignore_index=True)

## Finetuning

Preparação dos dados, com tokenização e construção do dataset e dataloaders.

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

#miniLM (modelo BERT pequeno, 5x mais rápido): 
#https://huggingface.co/nreimers/MiniLM-L6-H384-uncased pois oferece um bom 
#compromisso entre qualidade e velocidade.
#Este aparentemente sofreu overfitting, com com 20 épocas de treinamento, embora 
#tenha atingido acurácia de validação igual a 0.938, levou a NDCG@10=0.427 (softmaxes)
#model_name = "nreimers/MiniLM-L6-H384-uncased"

#Este, com 5 épocas de finetuning, leva a uma acurácia de validação=0.937, e NDCG@10=0.621
model_name = 'microsoft/MiniLM-L12-H384-uncased' 

tokenizer = AutoTokenizer.from_pretrained(model_name)

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
from statistics import mean, stdev

lengths = [len(tokens) for tokens in tokenizer(list(X_train["passage"])[:1_000])['input_ids']]
print(f'Mean length in tokens: {mean(lengths):0.2f}')
print(f'Stdev length in tokens: {stdev(lengths):0.2f}')

Mean length in tokens: 80.45
Stdev length in tokens: 31.20


In [None]:
max_length = 512

In [None]:
train_queries = list(X_train["query"])
train_passages = list(X_train["passage"])
val_queries = list(X_val["query"])
val_passages = list(X_val["passage"])

train_queries_tokenized = tokenizer(train_queries, max_length=max_length, truncation=True)
train_passages_tokenized = tokenizer(train_passages, max_length=max_length, truncation=True)
val_queries_tokenized = tokenizer(val_queries, max_length=max_length, truncation=True)
val_passages_tokenized = tokenizer(val_passages, max_length=max_length, truncation=True)

In [None]:
from torch.utils import data

class Dataset(data.Dataset):
    def __init__(self, queries, passages, targets):
        self.queries = queries
        self.passages = passages
        self.targets = targets
    
    def __len__(self):
        return len(self.queries['input_ids'])
    
    def __getitem__(self, idx):
        return {
            'input_ids': self.queries['input_ids'][idx] + self.passages['input_ids'][idx],
            'attention_mask': self.queries['attention_mask'][idx] + self.passages['attention_mask'][idx],
            'labels': int(self.targets[idx]),
        }



In [None]:
dataset_train = Dataset(train_queries_tokenized, train_passages_tokenized, Y_train)
assert len(dataset_train[0]['input_ids']) > 0
assert len(dataset_train[1]['attention_mask']) > 0
assert type(dataset_train[2]['labels']) == int

In [None]:
dataset_valid = Dataset(val_queries_tokenized, val_passages_tokenized, Y_val)

In [None]:
from transformers import BatchEncoding

# Tokens do tipo "pad" para textos com tamanho inferior ao máximo suportado.
def collate_fn(batch):
  return BatchEncoding(tokenizer.pad(batch, return_tensors='pt'))



Confirmando se os datasets de treinamento e validação gerados são sempre os mesmos.

In [None]:
X_train.head()

Unnamed: 0,query,passage
0,"how far is cumberland md from washington, dc","Driving distance from Washington, DC to Cumber..."
1,average cost to move and set up mobile home,"1 On average, a short mobile home move that is..."
2,how big can siberian huskies get,male husky can get 23 feet long and females ca...
3,how long can you hold someone's attention,"So much so, that as you approach the pulpit, t..."
4,hcpcs code for medication management,"Inpatient telehealth pharmacologic management,..."


In [None]:
X_val.head()

Unnamed: 0,query,passage
0,where to get corso riggs,Background. Always cheerful and eternally opti...
1,what rivers flow into the wabash river,The Wabash River rises south of Grand Lake abo...
2,where is fiji islands,1 The Fiji Islands are an archipelago of over ...
3,how long do nipple piercings take to heal,"Typically, nipple piercings take anywhere from..."
4,what is a pole pocket banner,Pole pockets on banners are something that can...


In [None]:
dataloader_train = data.DataLoader(dataset_train, batch_size=32, shuffle=True, collate_fn=collate_fn)
dataloader_valid = data.DataLoader(dataset_valid, batch_size=32, shuffle=False, collate_fn=collate_fn)

for batch in dataloader_train:
    assert batch['input_ids'].shape[0] <= dataloader_train.batch_size
    assert batch['input_ids'].shape[1] <= max_length
    break

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


### Training Loop

In [None]:
def evaluate(model, dataloader, set_name):
    losses = []
    correct = 0
    model.eval()
    with torch.no_grad():
        for batch in tqdm(dataloader, mininterval=0.5, desc=set_name, disable=False):
            outputs = model(**batch.to(device))
            loss_val = outputs.loss
            losses.append(loss_val.cpu().item())
            preds = outputs.logits.argmax(dim=1)
            correct += (preds == batch['labels']).sum().item()

    print(f'{set_name} loss: {mean(losses):0.3f}; {set_name} accuracy: {correct / len(dataloader.dataset):0.3f}')

In [None]:
from torch import nn
from torch import optim
from tqdm.auto import tqdm
from transformers import get_linear_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def train(model_name, epochs = 5):
  model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
  print('Parameters', model.num_parameters())

  optimizer = optim.AdamW(model.parameters(), lr=5e-5)
  num_training_steps = epochs * len(dataloader_train)

  num_warmup_steps = int(num_training_steps * 0.1)
  scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps)

  evaluate(model=model, dataloader=dataloader_valid, set_name='Valid')

  # Training loop
  for epoch in tqdm(range(epochs), desc='Epochs'):
      model.train()
      train_losses = []
      for batch in tqdm(dataloader_train, mininterval=0.5, desc='Train', disable=False):
          optimizer.zero_grad()
          outputs = model(**batch.to(device))
          loss = outputs.loss
          loss.backward()
          optimizer.step()
          scheduler.step()
          train_losses.append(loss.cpu().item())

      print(f'Epoch: {epoch + 1} Training loss: {mean(train_losses):0.2f}')
      evaluate(model=model, dataloader=dataloader_valid, set_name='Valid')
  
  return model

In [None]:
model_name

'microsoft/MiniLM-L12-H384-uncased'

In [None]:
MODELS_PATH = '/content/gdrive/MyDrive/Unicamp-aula-3'

### Primeiro experimento

In [None]:
model = train(model_name, 5)
model_name = model_name.replace('/','_')
model.save_pretrained(f'{MODELS_PATH}/models_ranker_{model_name}')
tokenizer.save_pretrained(f'{MODELS_PATH}/tokenizer_ranker')

Downloading pytorch_model.bin:   0%|          | 0.00/133M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Parameters 33360770


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.694; Valid accuracy: 0.500


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 1 Training loss: 0.42


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.236; Valid accuracy: 0.910


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 2 Training loss: 0.20


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.208; Valid accuracy: 0.919


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 3 Training loss: 0.12


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.208; Valid accuracy: 0.932


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 4 Training loss: 0.07


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.224; Valid accuracy: 0.934


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 5 Training loss: 0.04


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.247; Valid accuracy: 0.937


('/content/gdrive/MyDrive/Unicamp-aula-3/tokenizer_ranker/tokenizer_config.json',
 '/content/gdrive/MyDrive/Unicamp-aula-3/tokenizer_ranker/special_tokens_map.json',
 '/content/gdrive/MyDrive/Unicamp-aula-3/tokenizer_ranker/vocab.txt',
 '/content/gdrive/MyDrive/Unicamp-aula-3/tokenizer_ranker/added_tokens.json',
 '/content/gdrive/MyDrive/Unicamp-aula-3/tokenizer_ranker/tokenizer.json')

## Etapa de reranqueamento (estágio 2)

In [None]:
main_path = '/content/gdrive/MyDrive/Unicamp-aula-2/'

In [None]:
topics = 'dl20'
qrels_eval = 'dl20-passage'
qrels_eval_file = 'pyserini/tools/topics-and-qrels/qrels.dl20-passage.txt'
topics_file = 'pyserini/tools/topics-and-qrels/topics.dl20.txt'

In [None]:
root_dir = '/content/gdrive/MyDrive/Unicamp-aula-3/'

In [None]:
#Carrega os ids e textos das queries em memória
df_queries = pd.read_csv(f'{root_dir}topics.dl20.small.tsv', sep='\t', header=None)

In [None]:
collection_path = '/content/gdrive/MyDrive/Unicamp-aula-2/collections/msmarco-passage/collection.tsv'
df_passages = pd.read_csv(collection_path, sep='\t', header=None)

In [None]:
#Dúvida: não sei se é realmente necessário carregar o mesmo tokenizador usado no 
#finetuning, porque creio que ele não foi modificado.
tokenizer = AutoTokenizer.from_pretrained(f'{MODELS_PATH}/tokenizer_ranker')

In [None]:
import pickle

tokenized_queries = None
tokenized_passages = None

with open(f"{root_dir}tok_queries_test.pickle", "rb") as f:
  print("Carregando queries de teste...")
  tokenized_queries = pickle.load(f) 

with open(f"{root_dir}tok_passages_test.pickle", "rb") as f:
  print("Carregando passagens de teste...")
  tokenized_passages = pickle.load(f) 

query_ids = []
queries = []
passage_ids = []
passages = []

with open(f'{root_dir}runs/run.dl20.bm25tuned.txt') as f:
  for line in f:
      fields = line.strip().split('\t')
      query_id = fields[0]
      query_ids.append(query_id)
      passage_id = fields[1]
      passage_ids.append(passage_id)
      
      if not tokenized_queries:
        query_text = df_queries[df_queries.iloc[:,0] == int(query_id)].iloc[0,1]
        queries.append(query_text)

      if not tokenized_passages:
        passage_text = df_passages[df_passages.iloc[:,0] == int(passage_id)].iloc[0,1]
        passages.append(passage_text)

if not tokenized_queries:
  tokenized_queries = tokenizer(queries, max_length=max_length, truncation=True)

  with open(f"{root_dir}tok_queries_test.pickle", 'wb') as f:
    pickle.dump(tokenized_queries, f)

if not tokenized_passages:
  tokenized_passages = tokenizer(passages, max_length=max_length, truncation=True)

  with open(f"{root_dir}tok_passages_test.pickle", 'wb') as f:
    pickle.dump(tokenized_passages, f)
      

Carregando queries de teste...
Carregando passagens de teste...


In [None]:
# Aqui, pouco importa o target, porque ele não será utilizado para calcular acurácia.  Por isso coloquei todos iguais 
# a 1 (relevantes)
dataset_test = Dataset(tokenized_queries, tokenized_passages, [1]*len(tokenized_queries['input_ids']))

In [None]:
dataloader_test = data.DataLoader(dataset_test, batch_size=32, shuffle=False, collate_fn=collate_fn)


In [None]:
def evaluate_test_dataset(model, dataloader, set_name, use_logits=False):
    scores = []
    model.eval()
    with torch.no_grad():
        for batch in tqdm(dataloader, mininterval=0.5, desc=set_name, disable=False):
            outputs = model(**batch.to(device))
            if use_logits:
              # Usa os logits brutos
              pos_score = outputs.logits[:,1]
            else:
              # Usa os logits normalizados pelo softmax (por default)
              pos_score = torch.softmax(outputs.logits,1)[:,1]
            scores = scores + pos_score.tolist()
    return scores

In [None]:
def evaluate_ndcg_10(scores, model_name, eval_desc):
  zipped_results = []

  #Por alguma razão misteriosa, o zip do Python não funcionou, deixou a lista 
  #vazia ou impossível de ser iterada.
  for i, query_id in enumerate(query_ids):
    zipped_results.append((query_id, passage_ids[i], scores[i]))

  #Quebra a lista em sublistas por query
  prev_query_id = -1
  sublists = []
  current_list = []

  for query_id, passage_id, score in zipped_results:
    if query_id != prev_query_id:
      if len(current_list) > 0:
        sublists.append(current_list)
        current_list = []
    current_list.append((query_id, passage_id, score))
    prev_query_id = query_id

  if len(current_list) > 0:
    sublists.append(current_list)

  # Ordena cada sublista
  sorted_list = []

  for sublist in sublists:
    sorted_sublist = sorted(sublist, key=lambda x: x[2], reverse=True)
    sorted_list += sorted_sublist

  # Gera o arquivo de run no formato TREC
  trec_run_file = f"{root_dir}run.dl20.bert_reranked_{model_name}_{eval_desc}.trec"
  with open(trec_run_file, "w") as f:
    for i, (query_id, passage_id, score) in enumerate(sorted_list):
      f.write(f'{query_id}\tQ0\t{passage_id}\t{i+1}\t{score}\tbert_reranked_{model_name}\n')

  return trec_run_file
  

### Primeira avaliação

In [None]:
#model = AutoModelForSequenceClassification.from_pretrained(f'{MODELS_PATH}/models_ranker').to(device)
model = AutoModelForSequenceClassification.from_pretrained(f'{MODELS_PATH}/models_ranker_{model_name}').to(device)

In [None]:
softmax_scores = evaluate_test_dataset(model=model, dataloader=dataloader_test, set_name='Test')
logit_scores = evaluate_test_dataset(model=model, dataloader=dataloader_test, set_name='Test', use_logits=True)

Test:   0%|          | 0/1688 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Test:   0%|          | 0/1688 [00:00<?, ?it/s]

In [None]:
assert len(softmax_scores) == len(logit_scores) == 54000

Compara a avaliação do NDCG@10 utilizando softmax versus logits brutos.  É esperado que a avaliação softmax dê melhor resultado...

In [None]:
trec_file_softmax = evaluate_ndcg_10(softmax_scores, model_name, "softmax")
trec_file_logits = evaluate_ndcg_10(logit_scores, model_name, "logits")

In [None]:
!chmod 755 {root_dir}/pyserini/tools/eval/trec_eval.9.0.4/trec_eval

In [None]:
!/content/gdrive/MyDrive/Unicamp-aula-3/pyserini/tools/eval/trec_eval.9.0.4/trec_eval -c -m map -m ndcg_cut.10 -l 2 \
   {root_dir}collections/msmarco-passage/qrels.{qrels_eval}.trec {trec_file_softmax}


map                   	all	0.4340
ndcg_cut_10           	all	0.6210


... porém os logits levaram a uma ligeira vantagem para a métrica.  Cabem experimentos adicionais (ex.: teste de hipóteses) para checar esse dado.  Possivel explicação: inconsistências no dataset?

In [None]:
!/content/gdrive/MyDrive/Unicamp-aula-3/pyserini/tools/eval/trec_eval.9.0.4/trec_eval -c -m map -m ndcg_cut.10 -l 2 \
   {root_dir}collections/msmarco-passage/qrels.{qrels_eval}.trec {trec_file_logits}

map                   	all	0.4338
ndcg_cut_10           	all	0.6223


### Reproduzindo o experimento mais 4 vezes para checar média/variância

In [None]:
model_name = 'microsoft/MiniLM-L12-H384-uncased' 

In [45]:
import subprocess

def run(command):
  process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
  output, error = process.communicate()
  print(output.decode())
  output = output.decode()
  ndcg10 = output.split()[5]
  return float(ndcg10)

#ndcgs_softmax = [0.6210]
#ndcgs_logit = [0.6223]

for i in range(1,4):
  model_name = 'microsoft/MiniLM-L12-H384-uncased' 
  model = train(model_name, 5)
  model_name = model_name.replace('/','_')
  model.save_pretrained(f'{MODELS_PATH}/models_ranker_{model_name}_{i}')
  tokenizer.save_pretrained(f'{MODELS_PATH}/tokenizer_ranker')

  softmax_scores = evaluate_test_dataset(model=model, dataloader=dataloader_test, set_name='Test')
  logit_scores = evaluate_test_dataset(model=model, dataloader=dataloader_test, set_name='Test', use_logits=True)

  trec_file_softmax = evaluate_ndcg_10(softmax_scores, model_name, f"softmax_{i}")
  trec_file_logits = evaluate_ndcg_10(logit_scores, model_name, f"logits_{i}")

  #command = f"""/content/gdrive/MyDrive/Unicamp-aula-3/pyserini/tools/eval/trec_eval.9.0.4/trec_eval -c -m map -m ndcg_cut.10 -l 2 
   # {root_dir}collections/msmarco-passage/qrels.{qrels_eval}.trec {trec_file_softmax}"""
  #ndcg_softmax = run(command)
  #ndcgs_softmax.append(ndcg_softmax)

  #command = f"""/content/gdrive/MyDrive/Unicamp-aula-3/pyserini/tools/eval/trec_eval"9.0.4/trec_eval -c -m map -m ndcg_cut.10 -l 2 \
   #{root_dir}collections/msmarco-passage/qrels.{qrels_eval}.trec {trec_file_logits}"""
  #ndcg_logit = run(command)
  #ndcgs_logit.append(ndcg_logit)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Parameters 33360770


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.693; Valid accuracy: 0.500


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 1 Training loss: 0.41


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.274; Valid accuracy: 0.895


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 2 Training loss: 0.20


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.216; Valid accuracy: 0.920


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 3 Training loss: 0.12


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.239; Valid accuracy: 0.917


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 4 Training loss: 0.07


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.237; Valid accuracy: 0.932


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 5 Training loss: 0.04


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.261; Valid accuracy: 0.935


Test:   0%|          | 0/1688 [00:00<?, ?it/s]

Test:   0%|          | 0/1688 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Parameters 33360770


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.693; Valid accuracy: 0.468


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 1 Training loss: 0.40


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.241; Valid accuracy: 0.906


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 2 Training loss: 0.20


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.215; Valid accuracy: 0.920


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 3 Training loss: 0.12


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.210; Valid accuracy: 0.934


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 4 Training loss: 0.07


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.234; Valid accuracy: 0.930


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 5 Training loss: 0.04


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.239; Valid accuracy: 0.936


Test:   0%|          | 0/1688 [00:00<?, ?it/s]

Test:   0%|          | 0/1688 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Parameters 33360770


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.693; Valid accuracy: 0.507


Epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 1 Training loss: 0.40


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.241; Valid accuracy: 0.909


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 2 Training loss: 0.21


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.243; Valid accuracy: 0.916


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 3 Training loss: 0.12


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.209; Valid accuracy: 0.927


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 4 Training loss: 0.07


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.230; Valid accuracy: 0.929


Train:   0%|          | 0/550 [00:00<?, ?it/s]

Epoch: 5 Training loss: 0.04


Valid:   0%|          | 0/138 [00:00<?, ?it/s]

Valid loss: 0.252; Valid accuracy: 0.930


Test:   0%|          | 0/1688 [00:00<?, ?it/s]

Test:   0%|          | 0/1688 [00:00<?, ?it/s]