In [7]:
# Run this cell to configure the Google Colab runtime

import os
try:
    from google.colab import drive
    drive.mount('/content/drive')
    os.chdir(next((root for root, _, files in os.walk(".") if "dsait4090_project_location" in files), "."))
    print(f'Google Colab: {os.getcwd()}')
except ImportError:
    print(f'Local: {os.getcwd()}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Colab: /content/drive/Othercomputers/macbook_pro/fact-checking


In [8]:
%load_ext autoreload
%autoreload

from src.common import get_device, read_json, DATA_PATH
from src.quantemp_processor import QuantempProcessor, QT_VERACITY_LABELS
from src.models.gpt2 import Gpt2Tokenizer, Gpt2Classifier
from src.classification_training import ClassificationTraining

import torch
import logging
from torch import nn

# os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

logging.basicConfig(level=logging.ERROR)

device = get_device()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
CUDA: 1, use NVIDIA A100-SXM4-40GB


In [9]:
train_data = read_json(f"{DATA_PATH}/raw_data/train_claims_quantemp.json")
val_data = read_json(f"{DATA_PATH}/raw_data/val_claims_quantemp.json")

# limit = 200
# train_data = train_data[:limit]
# val_data = val_data[:limit]

In [10]:
%autoreload

tokenizer = Gpt2Tokenizer()
data_processor = QuantempProcessor(tokenizer, claim_decomposition=False)

train_dataset = data_processor.transform(train_data)
val_dataset = data_processor.transform(val_data)

  0%|          | 0/9935 [00:00<?, ?it/s]

  0%|          | 0/3084 [00:00<?, ?it/s]

In [11]:
%autoreload

model = Gpt2Classifier("gpt2", len(QT_VERACITY_LABELS), mlp_dim=1024, dropout=0.1).to(device)

In [23]:
%autoreload

training = ClassificationTraining(
    model_name="gpt2/basic",
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    model=model,
    optimizer=torch.optim.AdamW(model.parameters(), lr=2e-3, eps=1e-8),
    loss_function=nn.CrossEntropyLoss(),
    batch_size=16,
    early_stopping_patience=10,
    device=device,
    random_state=42
)

training.train(epochs=30)

Starting new training

EPOCH 1


train:   0%|          | 0/621 [00:00<?, ?it/s]

eval:   0%|          | 0/193 [00:00<?, ?it/s]

    train accuracy: 0.580
     eval accuracy: 0.582

    avg train loss: 0.981
     avg eval loss: 0.969

Saved checkpoint to models/gpt2/basic/01_acc_582

EPOCH 2


train:   0%|          | 0/621 [00:00<?, ?it/s]

eval:   0%|          | 0/193 [00:00<?, ?it/s]

    train accuracy: 0.580
     eval accuracy: 0.582

    avg train loss: 0.986
     avg eval loss: 0.971

Saved checkpoint to models/gpt2/basic/02_acc_582
Early stopping counter: 1/10

EPOCH 3


train:   0%|          | 0/621 [00:00<?, ?it/s]

eval:   0%|          | 0/193 [00:00<?, ?it/s]

    train accuracy: 0.581
     eval accuracy: 0.582

    avg train loss: 0.973
     avg eval loss: 0.977

Saved checkpoint to models/gpt2/basic/03_acc_582
Early stopping counter: 2/10

EPOCH 4


train:   0%|          | 0/621 [00:00<?, ?it/s]

eval:   0%|          | 0/193 [00:00<?, ?it/s]

    train accuracy: 0.581
     eval accuracy: 0.582

    avg train loss: 0.970
     avg eval loss: 0.974

Saved checkpoint to models/gpt2/basic/04_acc_582
Early stopping counter: 3/10

EPOCH 5


train:   0%|          | 0/621 [00:00<?, ?it/s]

KeyboardInterrupt: 