In [1]:
import pandas as pd
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertForQuestionAnswering, AdamW
from torch.utils.data import DataLoader, Dataset

  from .autonotebook import tqdm as notebook_tqdm
2023-07-19 22:57:07.403977: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Step 1: Data Loading and Preprocessing
class MohlerDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        question = self.data['question'][index]
        answer = self.data['student_answer'][index]
        scores = self.data[['score_me', 'score_other', 'score_avg']].iloc[index].values

        # Tokenize the input and encode the target answer
        inputs = self.tokenizer.encode_plus(question, answer, add_special_tokens=True, max_length=self.max_length, return_tensors='pt', padding='max_length', truncation=True)

        input_ids = inputs['input_ids'][0]
        attention_mask = inputs['attention_mask'][0]

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'scores': torch.tensor(scores, dtype=torch.float32)
        }

# Step 2: Load the Mohler Dataset
df = pd.read_csv('mohler_dataset.csv')

# Step 3: Load BERT Model and Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')

# Step 4: Hyperparameters and Configurations
batch_size = 16
max_length = 256
num_epochs = 5
learning_rate = 2e-5

# Step 5: Prepare DataLoader
dataset = MohlerDataset(df, tokenizer, max_length)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Step 6: Fine-tuning the Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

optimizer = AdamW(model.parameters(), lr=learning_rate)

loss_function = nn.MSELoss()

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        scores = batch['scores'].to(device)

        optimizer.zero_grad()

        outputs = model(input_ids, attention_mask=attention_mask)
        predicted_scores = outputs.logits.squeeze(1)

        loss = loss_function(predicted_scores, scores)
        loss.backward()

        optimizer.step()

        total_loss += loss.item()

    average_loss = total_loss / len(data_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {average_loss:.4f}")

# Step 7: Evaluation (Optional)
# If you have a separate evaluation dataset, you can evaluate the model's performance here.

# Step 8: Save the Fine-Tuned Model
model.save_pretrained("fine_tuned_bert_mohler")

# Step 9: Deployment
# You can use the fine-tuned model for question-answering and answer scoring in your application.


Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 526kB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 152kB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 570/570 [00:00<00:00, 3.40MB/s]
Downloading model.safetensors: 100%|██████████| 440M/440M [01:59<00:00, 3.70MB/s] 
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initial