This notebook will run through an example, training a model for the translation task by fine-tuning BERT. To start, connect to a GPU runtime.


Find this on colab at https://colab.research.google.com/github/noncuro/BERThoven/blob/master/Sample%20Notebook%20using%20BERT.ipynb

In [0]:
!git clone https://github.com/noncuro/BERThoven.git
%cd BERThoven

In [0]:
# Run this only once

!wget https://competitions.codalab.org/my/datasets/download/c748d2c0-d6be-4e36-9f12-ca0e88819c4d -O files.zip
!unzip files.zip
!pip install transformers

In [0]:
from bert_lib import * 
from tqdm import tqdm_notebook as tqdm
import transformers
from transformers import (AdamW,
                          get_linear_schedule_with_warmup,get_constant_schedule_with_warmup)
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import torch
from torch import nn
import torch.nn.functional as F

import scipy
import time
import sklearn
from sklearn import preprocessing, pipeline

HBox(children=(IntProgress(value=0, description='Downloading', max=569, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='Downloading', max=995526, style=ProgressStyle(description_wid…




In [0]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    raise RuntimeError("We reaaaaaally recommend you use a GPU...")

In [0]:
train_df, dev_df = import_train_dev(1/8)
test_df = import_file("test")

score_preprocessor = sklearn.pipeline.make_pipeline(
    # More preprocessing steps, to be applied to scores, can go here.
    preprocessing.MinMaxScaler()
)

# We start with a data loader that runs with just the original dataset
data_loader_train = get_data_loader(train_df, batch_size=32, preprocessor=score_preprocessor, fit=True)
data_loader_dev = get_data_loader(dev_df, batch_size=32, preprocessor=score_preprocessor)
data_loader_test = get_data_loader(test_df, batch_size=32, test=True, preprocessor=score_preprocessor)

In [0]:
train_df_without_sames = train_df[train_df.src != train_df.mt]
q1 = train_df.scores.quantile(0.25)
q3 = train_df.scores.quantile(0.75)

# Create a dataloader that upsamples outliers
train_df_upsampling = augment_dataset(train_df_without_sames, 
                            lambda score: score<q1, 
                            lambda score: score>q3)
data_loader_train_upsampled = get_data_loader(train_df_upsampling, batch_size=32,preprocessor=score_preprocessor)

In [0]:

# A dataloader that removes outliers:
# iqr = q3-q1
# train_df_downsampling = train_df_without_sames[train_df_without_sames.scores>q1-3*iqr]
# data_loader_train_downsampled = get_data_loader(train_df_downsampling, batch_size=32,preprocessor=score_preprocessor)

In [0]:
# A dataloader that with added masks:

data_loader_train_masked = get_data_loader_masked(train_df_without_sames, batch_size=32, preprocessor=score_preprocessor,)

In [0]:
model = BERThoven(cls=True, dropout=False, concat_outputs=False)
check_accuracy(data_loader_dev, model, device=device, preprocessor=score_preprocessor);
# Get a baseline for the model's metrics before any training

warmup_proportion = 0.1

delta = 0.1 # Smooth l1 switches between L1 and MSE at delta
loss_function = lambda x, y: F.smooth_l1_loss(x/delta, y/delta)*delta

training_steps = 0
training_steps += len(data_loader_train_upsampled)
training_steps += len(data_loader_train_masked)
training_steps += len(data_loader_train)

warmup_steps = int(training_steps*warmup_proportion)

optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8, correct_bias=True, weight_decay=0.1)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=training_steps)

def train(data_loader, epochs):
  return train_part(model, data_loader, optimizer, scheduler,  val_loader=data_loader_dev, epochs=epochs, 
             val_every = 3,return_losses=True, preprocessor=score_preprocessor,
             print_every=60, loss_function=loss_function, return_metrics=False,device=device)

# Train for 1 epoch on each data loader.

train(data_loader_train_upsampled, epochs=1);
train(data_loader_train_masked, epochs=1);
train(data_loader_train, epochs=1);

In [0]:
# Get the test labels
scores = get_test_labels(data_loader_test,model, device, preprocessor=score_preprocessor)

# Save them to a file
writeScores(scores)