In [17]:
import torch
import json
from torch.utils.data import DataLoader, Dataset
from transformers import BertForQuestionAnswering, BertTokenizer

In [18]:
class QAInputExample:
    def __init__(self, question, context, answer=None):
        self.question = question
        self.context = context
        self.answer = answer

class QADataset(Dataset):
    def __init__(self, file_path):
        self.data = self.load_data(file_path)
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        example = self.data[idx]
        question = example.question
        context = example.context
        answer = example.answer

        # Tokenize the question and context
        encoded_inputs = self.tokenizer.encode_plus(
            question,
            context,
            add_special_tokens=True,
            truncation=True,
            padding='max_length',
            max_length=256,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoded_inputs['input_ids'].squeeze(),
            'attention_mask': encoded_inputs['attention_mask'].squeeze(),
            'start_positions': torch.tensor(answer['start']),
            'end_positions': torch.tensor(answer['end'])
        }
    
    def load_data(self, file_path):
        with open(file_path, 'r') as file:
            data = json.load(file)
        
        examples = []
        for item in data:
            question = item['question']
            context = item['context']
            answer = item['answer']
            example = QAInputExample(question, context, answer)
            examples.append(example)
        
        return examples


In [31]:
# Define the model architecture
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')

# Define your training parameters
batch_size = 16
num_epochs = 5
learning_rate = 2e-3

# Create a DataLoader for batching and shuffling the data
dataset = QADataset('../data/questionanswer.json')
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Set device (GPU if available, else CPU)
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

# Move the model to the device
model = model.to(device)

# Set the optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss()

# Training loop
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch in dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        start_positions = batch['start_positions'].to(device)
        end_positions = batch['end_positions'].to(device)
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
        loss = outputs.loss
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    average_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs} - Average Loss: {average_loss:.4f}")

# Save the trained model
model.save_pretrained('../models/question_answer')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased a

Epoch 1/5 - Average Loss: 5.4969
Epoch 2/5 - Average Loss: 5.5679
Epoch 3/5 - Average Loss: 5.6137
Epoch 4/5 - Average Loss: 5.5431
Epoch 5/5 - Average Loss: 5.5391


In [32]:
model = BertForQuestionAnswering.from_pretrained('../models/question_answer')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

context = "Dear Hiring Manager,\n\nThank you for considering my application. I hold a Bachelor's degree in Computer Science from XYZ University. During my studies, I gained a solid foundation in computer programming, algorithms, and software engineering principles. I also actively participated in various coding competitions and hackathons, which honed my problem-solving skills and ability to work under pressure."
question = "What is your educational background?"

encoded_input = tokenizer.encode_plus(question, context, add_special_tokens=True, truncation=True, max_length=256, return_tensors="pt")
input_ids = encoded_input["input_ids"]
attention_mask = encoded_input["attention_mask"]

outputs = model(input_ids, attention_mask=attention_mask)
start_scores = outputs.start_logits
end_scores = outputs.end_logits

all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
start_index = torch.argmax(start_scores)
end_index = torch.argmax(end_scores) + 1
answer_tokens = all_tokens[start_index:end_index]

# Remove special tokens ([CLS] and [SEP]) from answer tokens
# answer_tokens = [token for token in answer_tokens if token not in tokenizer.all_special_tokens]

answer = tokenizer.convert_tokens_to_string(answer_tokens)

print("Answer:", answer)


Answer: ##z university . during my studies , i gained a solid foundation in computer programming , algorithms , and software engineering principles . i also actively participated in
