### 1. INSTALL NECESSARY MODULES

In [1]:
!pip install transformers



In [2]:
!pip install ipywidgets




### 2. IMPORTS

In [3]:
#import required libraries
import pandas as pd
from transformers import (
    DPRContextEncoder,
    DPRQuestionEncoder,
    DPRContextEncoderTokenizer,
    DPRQuestionEncoderTokenizer,
)
import torch.nn.functional as F
import torch
import json
import gc
from tqdm import tqdm

In [4]:
def memory_stats():
    print(torch.cuda.memory_allocated()/1024**2)
    print(torch.cuda.memory_cached()/1024**2)

In [5]:
def clear_memory():
  gc.collect()
  torch.cuda.empty_cache()
  memory_stats()

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
# device = xm.xla_device()

In [7]:
device

device(type='cuda')

In [8]:
DIR = "./"

In [9]:
#import required libraries
import torch
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F

### 3. TRAINING

#### 3.1 CUSTOM DATA SET FUNCTION -- CHANGE JSON DATA TO REQUIRED FORMAT TO FEED TO THE MODEL

In [10]:
class CustomDataset(Dataset):

    def __init__(self, data, question_tokenizer, context_tokenizer):
        self.data = data
        self.question_tokenizer = question_tokenizer
        self.context_tokenizer = context_tokenizer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        job_description = self.data["job_description"].iloc[index]
        resume = self.data["resume"].iloc[index]

        tokenized_question = self.question_tokenizer(job_description, return_tensors="pt", padding="max_length", max_length=512, truncation=True)
        tokenized_context = self.context_tokenizer(resume, return_tensors="pt", padding="max_length", max_length=512, truncation=True)

        question_input_ids = torch.flatten(tokenized_question["input_ids"])
        question_attention_mask = torch.flatten(tokenized_question["attention_mask"])

        context_input_ids = torch.flatten(tokenized_context["input_ids"])
        context_attention_mask = torch.flatten(tokenized_context["attention_mask"])
        return (
            question_input_ids,
            question_attention_mask,
            context_input_ids,
            context_attention_mask,
        )

#### 3.2 DEFINE CLASS HANDLING MODEL INITIALIZATION, FORWARD PASS, AND SAVING BEST MODEL

In [11]:
class Model(nn.Module):
    def __init__(self, question_encoder, context_encoder):
        super().__init__()
        self.question_encoder = question_encoder
        self.context_encoder = context_encoder

    def forward(self, question_ids, question_att_mask, context_ids, context_att_mask):
        question_output = self.question_encoder(
            question_ids.to(device), question_att_mask.to(device)
        ).pooler_output
        context_output = self.context_encoder(
            context_ids.to(device), context_att_mask.to(device)
        ).pooler_output
        return question_output, context_output

    def save(self, path):
        self.question_encoder.save_pretrained(path + "/finetune_question_encoder")
        self.context_encoder.save_pretrained(path + "/finetune_context_encoder")

#### 3.3 DEFINE TRAIN FUNCTION

In [12]:
def train(
    model,
    model_path,
    optimizer,
    criterion,
    train_loader,
    n_epochs,
    eval_fn
):
    # number of epochs to train the model
    n_epochs = n_epochs
    best_accuracy = -1.0

    for epoch in range(n_epochs):
        # monitor training loss
        train_loss = 0.0

        model.train()  # prep model for training
        with tqdm(train_loader, unit="batch", position=0, leave=True) as tepoch:
          for question_ids, question_att_mask, context_ids, context_att_mask in tqdm(tepoch, position=0, leave=True):
              tepoch.set_description(f"Epoch {epoch + 1}")
              # clear the gradients of all optimized variables
              optimizer.zero_grad()
              # forward pass: compute predicted outputs by passing inputs to the model
              # inputs = [question_ids, question_att_mask, context_ids, context_att_mask]
              question_output, context_output = model(
                  question_ids, question_att_mask, context_ids, context_att_mask
              )
              # calculate the loss
              loss = criterion(question_output, context_output)
              # backward pass: compute gradient of the loss with respect to model parameters
              loss.backward()
              # perform a single optimization step (parameter update)
              optimizer.step()
              # update running training loss
              train_loss += loss.item() * question_ids.size(0)
              tepoch.set_postfix(loss=train_loss)

        accuracy = eval_fn(model)

        # Best model is saved based on Accuracy, MAP, MNDCG
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            model.save(model_path)

#### 3.4 DEFINE LOSS FUNCTION

In [13]:
def loss(question_output, context_output):
    S = torch.matmul(question_output, context_output.T)
    softmax = F.softmax(S, dim=1)
    diagonal = torch.diagonal(softmax)
    diagonal = torch.add(diagonal, torch.tensor([0.0000000001]).to(device))
    log_diagonal = -torch.log(diagonal)
    loss = torch.mean(log_diagonal)
    return loss

#### 3.5 DEFINE EVALUATE FUNCTION TO EVALUATE MODEL ON VALIDATION DATA AND SEE HOW MODEL IS PROCEEDING

In [14]:
def evaluate(model):
    model.eval()
    question_encoder = model.question_encoder
    context_encoder = model.context_encoder
    with open(f"{DIR}/data/valid_data.json", "r") as f:
        test_data = json.load(f)
    
    question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(
        "facebook/dpr-question_encoder-single-nq-base"
    )
    context_tokenizer = DPRContextEncoderTokenizer.from_pretrained(
        "facebook/dpr-ctx_encoder-single-nq-base"
    )
    total_num_correct = 0
    total_num_pos = 0
    for data in test_data:
        label = data["label"]
        job_description = data["description"]
        pos = data["pos"]
        neg = data["neg"]
        all = neg + pos
        # Tokenize the question and the context
        tokenized_question = question_tokenizer(
            job_description,
            return_tensors="pt",
            padding="max_length",
            max_length=512,
            truncation=True,
        )
        question_input_ids = tokenized_question["input_ids"]
        question_attention_mask = tokenized_question["attention_mask"]

        tokenized_context = context_tokenizer(
            all,
            return_tensors="pt",
            padding="max_length",
            max_length=512,
            truncation=True,
        )
        context_input_ids = tokenized_context["input_ids"]
        context_attention_mask = tokenized_context["attention_mask"]

        # Encode the question and the context
        question_output = question_encoder(
            question_input_ids.to(device), question_attention_mask.to(device)
        ).pooler_output
        context_output = context_encoder(
            context_input_ids.to(device), context_attention_mask.to(device)
        ).pooler_output
        scores = F.cosine_similarity(question_output, context_output)
        _, indices = torch.topk(scores, 5)
        relevant_passages = np.array(all)[indices.cpu().numpy()]
        num_correct = 0
        for p in relevant_passages:
            if p in pos:
                num_correct += 1
        total_num_correct += num_correct
        total_num_pos += 5
        print(f"Accuracy ({label}): {num_correct}/{len(relevant_passages)}")
    print(f"Total accuracy: {total_num_correct}/{total_num_pos}")
    return total_num_correct / total_num_pos
     

#### 3.6 INITIALIZATIONS

In [15]:
df_train = pd.read_json(f"{DIR}/data/data.json")
question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(
    "facebook/dpr-question_encoder-single-nq-base"
)
context_tokenizer = DPRContextEncoderTokenizer.from_pretrained(
    "facebook/dpr-ctx_encoder-single-nq-base"
)
train_dataset = CustomDataset(df_train, question_tokenizer, context_tokenizer)
train_dataloader = DataLoader(
    train_dataset, batch_size=16, shuffle=True,
)

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


In [16]:
question_encoder = DPRQuestionEncoder.from_pretrained(
    "facebook/dpr-question_encoder-single-nq-base"
).to(device)

context_encoder = DPRContextEncoder.from_pretrained(
    "facebook/dpr-ctx_encoder-single-nq-base"
).to(device)

model = Model(question_encoder, context_encoder).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)
criterion = loss
eval_fn = evaluate

Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

#### 3.7 CALLING TRAIN FUNCTION

In [None]:
train(model, f"{DIR}/models", optimizer, criterion, train_dataloader, 12, eval_fn)

Epoch 1:   0%|          | 0/62 [00:00<?, ?batch/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Epoch 1: 100%|██████████| 62/62 [31:20<00:00, 30.33s/batch, loss=626]
100%|██████████| 62/62 [31:20<00:00, 30.33s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 1/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 1/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 2/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 1/5
Accuracy (Python_Developer): 4/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 1/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 3/5
Accuracy (Front_End_Developer): 2/5
Accuracy (Web_Developer): 2/5
Accuracy (Java_Developer): 3/5
Accuracy (Network_Administrator): 4/

Epoch 2: 100%|██████████| 62/62 [21:53<00:00, 21.19s/batch, loss=272] 
100%|██████████| 62/62 [21:53<00:00, 21.19s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 1/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 2/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 4/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 4/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 3/5
Accuracy (Front_End_Developer): 2/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 3/5
Accuracy (Network_Administrator): 3/

Epoch 3: 100%|██████████| 62/62 [22:14<00:00, 21.52s/batch, loss=186] 
100%|██████████| 62/62 [22:14<00:00, 21.52s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 1/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 1/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 2/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 1/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 4/5
Accuracy (Security_Analyst): 4/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 4/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 3/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 2/5
Accuracy (Java_Developer): 3/5
Accuracy (Network_Administrator): 3/

Epoch 4: 100%|██████████| 62/62 [21:46<00:00, 21.07s/batch, loss=144] 
100%|██████████| 62/62 [21:46<00:00, 21.07s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 1/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 2/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 4/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 4/5
Accuracy (Network_Administrator): 3/

Epoch 5: 100%|██████████| 62/62 [21:23<00:00, 20.70s/batch, loss=112] 
100%|██████████| 62/62 [21:23<00:00, 20.70s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 5/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 2/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 4/5
Accuracy (Network_Administrator): 3/

Epoch 6: 100%|██████████| 62/62 [29:01<00:00, 28.09s/batch, loss=91.4]
100%|██████████| 62/62 [29:01<00:00, 28.09s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 1/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 5/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 2/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 1/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 3/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 4/5
Accuracy (Network_Administrator): 3/

Epoch 7: 100%|██████████| 62/62 [28:41<00:00, 27.76s/batch, loss=77.2]
100%|██████████| 62/62 [28:41<00:00, 27.76s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 1/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 5/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 2/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 1/5
Accuracy (Python_Developer): 4/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 3/5
Accuracy (Network_Administrator): 3/

Epoch 8: 100%|██████████| 62/62 [32:03<00:00, 31.02s/batch, loss=81]  
100%|██████████| 62/62 [32:03<00:00, 31.02s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 5/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 4/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 1/5
Accuracy (Python_Developer): 4/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 4/5
Accuracy (Network_Administrator): 3/

Epoch 9: 100%|██████████| 62/62 [28:31<00:00, 27.60s/batch, loss=44.6]
100%|██████████| 62/62 [28:31<00:00, 27.60s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 5/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 3/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 4/5
Accuracy (Network_Administrator): 3/

Epoch 10: 100%|██████████| 62/62 [27:48<00:00, 26.91s/batch, loss=41]  
100%|██████████| 62/62 [27:48<00:00, 26.91s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 1/5
Accuracy (Database_Administrator): 5/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 1/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 4/5
Accuracy (Network_Administrator): 3/

Epoch 11: 100%|██████████| 62/62 [28:53<00:00, 27.96s/batch, loss=52.4]
100%|██████████| 62/62 [28:53<00:00, 27.96s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 1/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 5/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 1/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 4/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 1/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 4/5
Accuracy (Network_Administrator): 3/

Epoch 12: 100%|██████████| 62/62 [46:00<00:00, 44.53s/batch, loss=38.6]
100%|██████████| 62/62 [46:00<00:00, 44.53s/it]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


Accuracy (Security_Analyst): 1/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 5/5
Accuracy (Software_Developer): 5/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 2/5
Accuracy (Python_Developer): 5/5
Accuracy (Security_Analyst): 3/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 2/5
Accuracy (Database_Administrator): 4/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 1/5
Accuracy (Java_Developer): 2/5
Accuracy (Network_Administrator): 1/5
Accuracy (Python_Developer): 4/5
Accuracy (Security_Analyst): 2/5
Accuracy (Systems_Administrator): 4/5
Accuracy (Project_manager): 3/5
Accuracy (Database_Administrator): 2/5
Accuracy (Software_Developer): 4/5
Accuracy (Front_End_Developer): 3/5
Accuracy (Web_Developer): 3/5
Accuracy (Java_Developer): 4/5
Accuracy (Network_Administrator): 3/

In [19]:
torch.cuda.empty_cache()
gc.collect()

45