In [1]:
import sys
sys.path.append('../structural-probes/')
import torch
import numpy as np
from pytorch_lightning import Trainer
from pytorch_lightning.loggers.comet import CometLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from utils.setup_runs import parse_args, get_default_args, get_comet_key
from finetune_bert_module import SST_Test

import os
import random

INFO:transformers.file_utils:PyTorch version 1.5.0 available.
INFO:transformers.file_utils:TensorFlow version 2.1.0 available.


In [2]:
desired_hparams = {
    'lr': 7e-3,
    'batch_size': 48,
}
desired_params = {}
hparams, args = get_default_args(desired_hparams, desired_params)

In [3]:
# Set device to cuda if available unless explicitly disabled
if args.device == torch.device('cuda'):
    num_gpus = 1
else:
    num_gpus = 0

# Output debug logs in debug mode
if args.debug:
    args.log_level = "debug"

In [4]:
# Set all seeds manually for consistent results
torch.manual_seed(hparams.seed)
np.random.seed(hparams.seed)
random.seed(hparams.seed)

In [5]:
###############
# CometLogger configuration
###############

comet_key = get_comet_key()
comet_logger = CometLogger(
    api_key = comet_key,
    workspace = "mykobob",
    project_name = "structural-probes-extension",
    experiment_name = args.run_name
)
# Log args to comet
comet_logger.log_hyperparams(hparams)
comet_logger.experiment.set_name(args.run_name)
comet_logger.experiment.add_tag("sst-tests")

INFO:lightning:CometLogger will be initialized in online mode
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/mykobob/structural-probes-extension/1d8f1b6ff76341259210aa48cb1fff5c



In [6]:
###############
# Other callback configuration
###############

checkpoint_callback = ModelCheckpoint(
    filepath= os.path.join("lightning_logs", args.run_name, "checkpoints"),
    save_top_k=args.num_saved_models,
    verbose=True,
    monitor="val_loss",
    mode="min",
)

In [7]:
if args.early_stopping:
    early_stopping = EarlyStopping(
        monitor='val_loss',
        min_delta=0.00,
        patience=args.early_stopping,
        verbose=False,
        mode='min'
        #mode='max'
    )
else:
    early_stopping = None

In [8]:
###############
# Model creation
###############

testing = False
if testing:
    model = SST_Test.load_from_metrics(
            weights_path='lightning_logs/regression_training/_ckpt_epoch_19',
            map_location=None)
else:
    model = SST_Test(args, hparams).to(args.device)

comet_logger.experiment.add_tag("SST")

INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt from cache at /home/mli/.cache/torch/transformers/cee054f6aafe5e2cf816d2228704e326446785f940f5451a5b26033516a4ac3d.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1
INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt from cache at /home/mli/.cache/torch/transformers/cee054f6aafe5e2cf816d2228704e326446785f940f5451a5b26033516a4ac3d.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1
INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-config.json from cache at /home/mli/.cache/torch/transformers/90deb4d9dd705272dc4b3db1364d759d551d72a9f70a91f60e3a1f5e278b985d.9019d8d0ae95e32b896211ae7ae130d7c36bb19ccf35c90a9e51923309458f70
INFO:transformers.configuration_utils:Model config BertC

In [9]:
###############
# Create Trainer with specified attributes
###############

trainer = Trainer(
    fast_dev_run=args.debug,
    max_nb_epochs=hparams.epochs,
    gpus=num_gpus,
    train_percent_check=hparams.train_pct,
    val_percent_check=hparams.val_pct,
    checkpoint_callback=checkpoint_callback,
    early_stop_callback=early_stopping,
    logger=comet_logger
)

INFO:lightning:GPU available: True, used: True
INFO:lightning:CUDA_VISIBLE_DEVICES: [0]


In [10]:
if testing:
    trainer.test(model)
else:
    trainer.fit(model)

INFO:lightning:
    | Name                                                  | Type                          | Params
----------------------------------------------------------------------------------------------------
0   | bert                                                  | BertForSequenceClassification | 333 M 
1   | bert.bert                                             | BertModel                     | 333 M 
2   | bert.bert.embeddings                                  | BertEmbeddings                | 30 M  
3   | bert.bert.embeddings.word_embeddings                  | Embedding                     | 29 M  
4   | bert.bert.embeddings.position_embeddings              | Embedding                     | 524 K 
5   | bert.bert.embeddings.token_type_embeddings            | Embedding                     | 2 K   
6   | bert.bert.embeddings.LayerNorm                        | LayerNorm                     | 2 K   
7   | bert.bert.embeddings.dropout                          | Dropout      

Validation dataset has 1101 examples
INFO:finetune_bert_module:Validation dataset has 1101 examples


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Training dataset has 8544 examples
INFO:finetune_bert_module:Training dataset has 8544 examples
Validation dataset has 1101 examples
INFO:finetune_bert_module:Validation dataset has 1101 examples


Val loss: 0.068


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

RuntimeError: CUDA out of memory. Tried to allocate 184.00 MiB (GPU 0; 5.93 GiB total capacity; 4.44 GiB already allocated; 122.75 MiB free; 4.47 GiB reserved in total by PyTorch)

In [None]:
try:
    model_path = f'../best_models/{args.run_name}'
    os.mkdir(model_path)
except:
    pass

model.bert.save_pretrained(model_path)