# Prose Kaleidoscopes - BERT Experiment


In [1]:
# Imports
import copy
import os
import re
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# If Jupyter complains that torch is an unknown module, try:
# conda install pytorch torchvision -c pytorch
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

# Transformers
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig
from transformers import BertModel

%matplotlib inline

# Own files
import dataset_io

# These params should never change
NUM_EPOCHS = 4 
SEED = 42
BATCH_SIZE = 8
LEARNING_RATE = 4e-5
NUM_WARMUP_STEPS = 100
MAX_SEQ_LEN = 512

# Constants
DATASET_TYPE_IMDB = "imdb"
DATASET_TYPE_AMAZON = "amazon"
DATASET_TYPE_SST2 = "sst2"

# Experiment parameters.
IS_FINE_TUNING = False
FREEZE_BERT = IS_FINE_TUNING


In [2]:
# Load the BERT tokenizer
BERT_TOKENIZER = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

# Set up GPU for training.
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

No GPU available, using the CPU instead.


In [3]:
def normalize_list_labels(lst, dataset_type):
    if dataset_type == DATASET_TYPE_IMDB:
        # Labels are either "positive" or "negative"
        return [0 if v == "negative" else 1 for v in lst]
    if dataset_type == DATASET_TYPE_AMAZON:
        # Labels are in the range [1, 5]
        return [(v - 1) * (1.0 / 5) for v in lst]
    # SST-2 are 0 (negative) or 1 (positive).
    return lst

def normalize_labels(df, label_field):
    labels = getattr(df, label_field).to_numpy()
    # Fix IMDB labels.
    is_imdb = len(set(labels)) == 2 and not str(labels[0]).isdigit() and "positive" in labels and "negative" in labels
    # Fix Amazon labels.
    is_amazon = len(set(labels)) == 5 and min(set(labels)) == 1 and max(set(labels)) == 5
    if is_imdb:
        return pd.get_dummies(labels)["positive"]  # 0 is neg, 1 is pos
    if is_amazon:
        return labels - 1
    return labels

# Create a function to tokenize a set of texts
def preprocessing_for_bert(data):
    """Perform required preprocessing steps for pretrained BERT.
    @param    data (np.array): Array of texts to be processed.
    @return   input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
    @return   attention_masks (torch.Tensor): Tensor of indices specifying which
                  tokens should be attended to by the model.
    """
    max_len = MAX_SEQ_LEN
    
    # Create empty lists to store outputs
    input_ids = []
    attention_masks = []

    # For every sentence...
    for sent in data:
        # `encode_plus` will:
        #    (1) Tokenize the sentence
        #    (2) Add the `[CLS]` and `[SEP]` token to the start and end
        #    (3) Truncate/Pad sentence to max length
        #    (4) Map tokens to their IDs
        #    (5) Create attention mask
        #    (6) Return a dictionary of outputs
              
        encoded_sent = BERT_TOKENIZER.encode_plus(
            text=sent, 
            add_special_tokens=True,        # Add `[CLS]` and `[SEP]`
            max_length=max_len,             # Max length to truncate/pad
            truncation=True,
            pad_to_max_length=True,         # Pad sentence to max length
            #return_tensors='pt',           # Return PyTorch tensor
            return_attention_mask=True      # Return attention mask
            )

        # Add the outputs to the lists
        input_ids.append(encoded_sent.get('input_ids'))
        attention_masks.append(encoded_sent.get('attention_mask'))

    # Convert lists to tensors
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)

    return input_ids, attention_masks

def create_dataloader(inputs, masks, labels, is_val=False):
    data = TensorDataset(inputs, masks, labels)
    sampler = SequentialSampler(data) if is_val else RandomSampler(data) 
    dataloader = DataLoader(data, sampler=sampler, batch_size=BATCH_SIZE)
    return data, sampler, dataloader


## Create Models

In [4]:
%%time
import torch
import torch.nn as nn

# Create the BertClassfier class
class BertClassifier(nn.Module):
    """Bert Model for Classification Tasks.
    """
    def __init__(self, num_classes, freeze_bert=False):
        """
        @param    bert: a BertModel object
        @param    classifier: a torch.nn.Module classifier
        @param    freeze_bert (bool): Set `False` to fine-tune the BERT model
        """
        super(BertClassifier, self).__init__()
        # Specify hidden size of BERT, hidden size of our classifier, and number of labels
        D_in, H, D_out = 768, 50, num_classes

        # Instantiate BERT model
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        #self.bert = BertForSequenceClassification.from_pretrained(
        #    'bert-base-uncased', num_labels=num_classes, ignore_mismatched_sizes=True)
        
        if torch.cuda.is_available():
            self.bert.cuda()

        # Instantiate an one-layer feed-forward classifier
        self.classifier = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            #nn.Dropout(0.5),
            nn.Dropout(0.1),
            nn.Linear(H, D_out)
        )
        self.sig = torch.sigmoid

        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False
        
    def forward(self, input_ids, attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        # Feed input to BERT
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask)
        
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:, 0, :]

        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)
        logits = self.sig(logits)

        return logits


CPU times: user 30 µs, sys: 1 µs, total: 31 µs
Wall time: 33.1 µs


In [6]:
# Optimizer and Learning Rate Scheduler

from transformers import AdamW, get_linear_schedule_with_warmup

def initialize_model(num_classes, train_dataloader, epochs=4):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    # Instantiate Bert Classifier
    bert_classifier = BertClassifier(num_classes, freeze_bert=FREEZE_BERT)

    # Tell PyTorch to run the model on GPU
    bert_classifier.to(device)

    # Create the optimizer
    optimizer = AdamW(bert_classifier.parameters(),
                      #lr=5e-5,    # Default learning rate
                      lr=LEARNING_RATE,
                      eps=1e-8    # Default epsilon value
                      )

    # Total number of training steps
    total_steps = len(train_dataloader) * epochs
    print("len(train): {0}, epochs: {1}, total_steps: {2}".format(len(train_dataloader), epochs, total_steps))
    #total_steps = len(train_dataloader) * epochs

    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                #num_warmup_steps=0, # Default value
                                                num_warmup_steps=NUM_WARMUP_STEPS, # Default value
                                                num_training_steps=total_steps)
    return bert_classifier, optimizer, scheduler


In [7]:
# Train and Evaluate
import random
import time

# Specify loss function
loss_fn = nn.CrossEntropyLoss()

def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train_bert(model, train_dataloader, optimizer, scheduler, val_dataloader=None, epochs=4, evaluation=False):
    """Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    for epoch_i in range(epochs):
        # =======================================
        #               Training
        # =======================================
        # Print the header of the result table
        print(f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")
        print("-"*70)

        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):
            batch_counts +=1
            # Load batch to GPU
            b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

            # Zero out any previously calculated gradients
            model.zero_grad()

            # Perform a forward pass. This will return logits.
            logits = model(b_input_ids, b_attn_mask)

            # Compute loss and accumulate the loss values
            loss = loss_fn(logits, b_labels)
            batch_loss += loss.item()
            total_loss += loss.item()

            # Perform a backward pass to calculate gradients
            loss.backward()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

            # Print the loss values and time elapsed for every 20 batches
            if (step % 20 == 0 and step != 0) or (step == len(train_dataloader) - 1):
                # Calculate time elapsed for 20 batches
                time_elapsed = time.time() - t0_batch

                # Print training results
                print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")

                # Reset batch tracking variables
                batch_loss, batch_counts = 0, 0
                t0_batch = time.time()

        # Calculate the average loss over the entire training data
        avg_train_loss = total_loss / len(train_dataloader)

        print("-"*70)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy = evaluate(model, val_dataloader)

            # Print performance over the entire training data
            time_elapsed = time.time() - t0_epoch
            
            print(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f} | {time_elapsed:^9.2f}")
            print("-"*70)
        print("\n")
    
    print("Training complete!")


def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    val_accuracy = []
    val_loss = []

    # For each batch in our validation set...
    for batch in val_dataloader:
        # Load batch to GPU
        b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)

        # Compute loss
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())

        # Get the predictions
        preds = torch.argmax(logits, dim=1).flatten()

        # Calculate the accuracy rate
        accuracy = (preds == b_labels).cpu().numpy().mean() * 100
        val_accuracy.append(accuracy)

    # Compute the average accuracy and loss over the validation set.
    val_loss = np.mean(val_loss)
    val_accuracy = np.mean(val_accuracy)

    return val_loss, val_accuracy

## Evaluation Methods

In [8]:
import torch.nn.functional as F
import sys

def bert_predict(model, test_dataloader):
    """Perform a forward pass on the trained BERT model to predict probabilities
    on the test set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    all_logits = []

    # For each batch in our test set...
    batch_i = 0
    dl_len = len(test_dataloader)
    num_eq = 70
    for batch in test_dataloader:
        if batch_i % 10 == 0:
            num_prog = int(num_eq * batch_i / dl_len)
            sys.stdout.write("Predicting batch {0} / {1} [{2}{3}]\r".format(
                batch_i, dl_len, "=" * num_prog, "." * (num_eq - num_prog)))
            sys.stdout.flush()
        batch_i += 1
        # Load batch to GPU
        b_input_ids, b_attn_mask = tuple(t.to(device) for t in batch)[:2]

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)
        all_logits.append(logits)
    
    # Concatenate logits from each batch
    all_logits = torch.cat(all_logits, dim=0)

    # Apply softmax to calculate probabilities
    probs = F.softmax(all_logits, dim=1).cpu().numpy()

    return probs

## Metric Methods

In [9]:
from sklearn.metrics import accuracy_score, roc_curve, auc

def get_roc_metrics(probs, y_true, num_classes, dataset_type):
    preds = np.array(probs)
    if dataset_type != DATASET_TYPE_AMAZON:
        fpr, tpr, threshold = roc_curve(y_true, preds)
        roc_auc = auc(fpr, tpr)
        
        # Get accuracy over the test set
        y_pred = np.where(preds >= 1.0 / num_classes, 1, 0)
        accuracy = accuracy_score(y_true, y_pred)
        return { 
            'fpr' : fpr, 
            'tpr' : tpr, 
            'roc_auc' : roc_auc, 
            'accuracy' : accuracy
        }
    
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    # Force any empty categories to be present
    cat_preds = pd.DataFrame(preds)
    cat_preds = cat_preds.astype(pd.CategoricalDtype(categories=list([round(i / num_classes, 1) for i in range(num_classes)])))
    y_true = pd.get_dummies(y_true).to_numpy()
    #preds = pd.get_dummies(cat_preds).to_numpy()

    for i in range(num_classes):
        fpr[i], tpr[i], _ = roc_curve(y_true[:, i], preds[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), preds.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(num_classes)]))
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(num_classes):
        mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
    mean_tpr /= num_classes
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    
    if dataset_type == DATASET_TYPE_AMAZON:
        y_pred = preds
        row_maxes = preds.max(axis=1).reshape(-1, 1)
        y_pred[:] = np.where(y_pred == row_maxes, 1, 0)
    else:
        y_pred = np.where(preds >= 1.0 / num_classes, 1, 0)
    accuracy = accuracy_score(y_true, y_pred)
    metrics = {'accuracy' : accuracy}
    for k in fpr.keys():
        metrics["fpr_{0}".format(k)] = fpr[k]
        metrics["tpr_{0}".format(k)] = tpr[k]
        metrics["roc_auc_{0}".format(k)] = roc_auc[k]
    return metrics
    
def evaluate_roc_twoclass(probs, y_true, num_classes, dataset_type):
    """
    - Print AUC and accuracy on the test set
    - Plot ROC
    @params    probs (np.array): an array of predicted probabilities with shape (len(y_true), 2)
    @params    y_true (np.array): an array of the true values with shape (len(y_true),)
    """
    preds = [np.argmax(p) for p in probs]   
    fpr, tpr, roc_auc, accuracy = list(get_roc_metrics(preds, y_true, num_classes, dataset_type).values())
    print(f'AUC: {roc_auc:.4f}')
    print(f'Accuracy: {accuracy*100:.4f}%')
    
    # Plot ROC AUC
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
    
def evaluate_roc_multiclass(probs, y_true, num_classes, dataset_type):
    metrics = get_roc_metrics(probs, y_true, num_classes, dataset_type)
    ks = ["macro", "micro"] + [i for i in range(num_classes)]
    fpr = { k : metrics["fpr_{0}".format(k)] for k in ks }
    tpr = { k : metrics["tpr_{0}".format(k)] for k in ks }
    roc_auc = { k : metrics["roc_auc_{0}".format(k)] for k in ks }
    accuracy = metrics['accuracy']
    
    print(f'AUC: {0}', roc_auc)
    print(f'Accuracy: {accuracy*100:.4f}%')
    
    # Plot ROC AUC
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr["micro"], tpr["micro"], 'b', label = 'Micro-avg AUC = %0.2f' % roc_auc["micro"], color = 'navy')
    plt.plot(fpr["macro"], tpr["macro"], 'b', label = 'Macro-avg AUC = %0.2f' % roc_auc["macro"], color = 'darkviolet')
    colors = ['orange', 'forestgreen', 'cornflowerblue', 'darkgoldenrod', 'tomato', 'dodgerblue']
    lw=2
    for i, color in zip(range(num_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='AUC of class {0} = {1:0.2f})'.format(i, roc_auc[i]))
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

def evaluate_roc(probs, y_true, num_classes, dataset_type):
    ys = pd.get_dummies(y_true)
    if dataset_type == DATASET_TYPE_AMAZON:
        evaluate_roc_multiclass(probs, ys, num_classes, dataset_type)
    else:
        evaluate_roc_twoclass(probs, y_true, num_classes, dataset_type)

## Prediction and More Evaluation Methods

In [18]:
# Evaluation and Metrics
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix
from sklearn.metrics import accuracy_score, balanced_accuracy_score, cohen_kappa_score, roc_auc_score


def get_pred(value, num_classes):
    interval = 1.0 / num_classes
    for i in range(0, num_classes):
        if value < (i + 1) * interval:
            return i
    return num_classes - 1

def run_and_eval(train_dataloader, dev_dataloader, test_dataloader, test_labels, num_classes, dataset_type):
    set_seed(SEED)    # Set seed for reproducibility
    bert_classifier, optimizer, scheduler = initialize_model(
        num_classes, train_dataloader, epochs=NUM_EPOCHS)
    train_bert(bert_classifier, train_dataloader,
               optimizer, scheduler, 
               val_dataloader=dev_dataloader, epochs=NUM_EPOCHS, evaluation=True)
    
    # Compute predicted probabilities on the test set.
    test_probs = bert_predict(bert_classifier, test_dataloader)
    evaluate_roc(test_probs, test_labels, num_classes, dataset_type)
    
    if dataset_type == DATASET_TYPE_AMAZON:
        test_preds = copy.deepcopy(test_probs)
        row_maxes = test_probs.max(axis=1).reshape(-1, 1)
        test_preds[:] = np.where(test_preds == row_maxes, 1, 0)
        num_rows = test_preds.shape[0]
        test_preds = [np.argmax(test_preds[r]) for r in range(num_rows)]
    else:
        test_preds = np.array([get_pred(p[1], num_classes) for p in test_probs])
    return test_preds
    

def get_metrics_report(train, test, test_preds, test_labels, 
                       train_path, test_path, label_field,
                       dataset_type, expr_type, num_samples, run_id, report_destpath=None):
    metrics_report = classification_report(test_labels, test_preds, output_dict=True, digits=4)
    run_report = { 
        "dataset" : dataset_type, 
        "expr" : expr_type,
        "num_samples" : num_samples, 
        "run_id" : run_id
    }
    metrics_report.update(run_report)
    report_str = "### BERT | Dataset: {0}, Expr: {1}, N{2}, R{3}\n".format(
        dataset_type, expr_type, num_samples, run_id)
    report_str += "```\n"
    report_str += "Train: {0}\nTest: {1}\n-------------------------------\n".format(
        train_path, test_path)
    
    if dataset_type != DATASET_TYPE_AMAZON:
            report_str += "AUC: {0:.4f}\n".format(roc_auc_score(test_labels, test_preds, multi_class='ovr'))
    report_str += "Accuracy:\t{0:.4f}\t\tBalanced Acc: {1:.4f}\n".format(
        accuracy_score(test_preds, test_labels), balanced_accuracy_score(test_preds, test_labels))
    report_str += "Kappa:\t{0}\n".format(cohen_kappa_score(test_preds, test_labels))
    report_str += classification_report(test_labels, test_preds, digits=4)
    report_str += "\n"
    report_str += "Train distribution:\t{0}\nTest distribution:\t{1}\n".format(
        getattr(train, label_field).value_counts().to_dict(),
        getattr(test, label_field).value_counts().to_dict())
    labels = [0, 1] if dataset_type != DATASET_TYPE_AMAZON else [0, 1,2 , 3, 4]
    cm = multilabel_confusion_matrix(test_labels, test_preds, labels=labels)
    report_str += "Confusion matrix:\n\t{0}".format(
        "\n\t".join(["Label {0}: TP {1}, FP {2}, TN {3}, FN {4}".format(
            labels[i], cm[i][1][1], cm[i][0][1], cm[i][0][0], cm[i][1][0]) for i in range(len(cm))]))
    report_str += "\n```\n\n"
    
    if report_destpath is not None:
        file_obj = open(report_destpath, 'a')
        file_obj.write(report_str)
        file_obj.close()

## Experiment Parameters

Uncomment the appropriate block for the dataset you wish to experiment with.

In [19]:
def get_train_path(expr_type, num_samples, run_id, dataset_filename, file_ext):
    return "expr_data/{0}/{1}/r{2}/expr_{0}_n{1}_r{2}_{3}.{4}".format(
        expr_type, num_samples, run_id, dataset_filename, file_ext)

"""
# SST-2
DATASET_FILENAME = "sst2_train"
DATASET_TYPE = "sst2"
FILE_EXT = "tsv"
LABEL_FIELD = "label"
TEXT_FIELD = "sentence"
DEV_PATH = "expr_data/devtest/sst2_dev.tsv"
TEST_PATH = "expr_data/devtest/{0}_test.{1}".format(DATASET_TYPE, FILE_EXT)
"""

"""
# IMDB
DATASET_FILENAME = "imdb"
DATASET_TYPE = "imdb"
FILE_EXT = "csv"
LABEL_FIELD = "sentiment"
TEXT_FIELD = "review"
DEV_PATH = "expr_data/devtest/imdb_dev.csv"
TEST_PATH = "expr_data/devtest/{0}_test.{1}".format(DATASET_FILENAME, FILE_EXT)
"""

#"""
# Amazon Reviews
DATASET_FILENAME = "amazon_reviews_digital_music"
DATASET_TYPE = "amazon"
FILE_EXT = "json"
LABEL_FIELD = "overall"
TEXT_FIELD = "reviewText"
DEV_PATH = "expr_data/devtest/amazon_reviews_digital_music_dev.json"
TEST_PATH = "expr_data/devtest/{0}_test.{1}".format(DATASET_FILENAME, FILE_EXT)
#"""


DEV = dataset_io.to_df(DEV_PATH)
_DEV_LABELS = normalize_labels(DEV, LABEL_FIELD)
TEST = dataset_io.to_df(TEST_PATH)
_TEST_LABELS = normalize_labels(TEST, LABEL_FIELD)
NUM_CLASSES = len(set(getattr(DEV, LABEL_FIELD)))

DEV_INPUTS, DEV_MASKS = preprocessing_for_bert(getattr(DEV, TEXT_FIELD).to_numpy())
TEST_INPUTS, TEST_MASKS = preprocessing_for_bert(getattr(TEST, TEXT_FIELD).to_numpy())

DEV_LABELS = torch.tensor(_DEV_LABELS, dtype=torch.long)
TEST_LABELS = torch.tensor(_TEST_LABELS, dtype=torch.long)

DEV_DATA, DEV_SAMPLER, DEV_DATALOADER = create_dataloader(DEV_INPUTS, DEV_MASKS, DEV_LABELS, is_val=True)
TEST_DATA, TEST_SAMPLER, TEST_DATALOADER = create_dataloader(TEST_INPUTS, TEST_MASKS, TEST_LABELS, is_val=True)




### Change the number of samples here

In [20]:
# NUM_SAMPLES can be 10 or 50.
NUM_SAMPLES = 10

RESULTS_DIR = "results"
REPORT_RESULTS_PATH = "{0}/{1}_{2}_bert_report.md".format(RESULTS_DIR, DATASET_TYPE, NUM_SAMPLES)

if not os.path.exists(RESULTS_DIR):
    os.mkdir(RESULTS_DIR)

if not os.path.exists(REPORT_RESULTS_PATH):
    open(REPORT_RESULTS_PATH, 'a').close()
    print("Metrics report will be written to {0}".format(REPORT_RESULTS_PATH))
else:
    print("Metrics report will be appended to {0}".format(REPORT_RESULTS_PATH))

Metrics report will be appended to results/amazon_10_bert_report.md


In [21]:
RUN_IDS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
EXPR_TYPES = ["orig", "para", "para_tc", "para_editdist", "para_tc_editdist", "tc", "tc_editdist", "eda", "bt"]

for expr_type in EXPR_TYPES:
    for run_id in RUN_IDS:
        train_path = get_train_path(expr_type, NUM_SAMPLES, run_id, DATASET_FILENAME, FILE_EXT)
        train = dataset_io.to_df(train_path)
        train_inputs, train_masks = preprocessing_for_bert(getattr(train, TEXT_FIELD).to_numpy())
        
        _train_labels = normalize_labels(train, LABEL_FIELD)
        train_labels = torch.tensor(_train_labels, dtype=torch.long)
        train_data, train_sampler, train_dataloader = create_dataloader(train_inputs, train_masks, train_labels)
        
        # Train and get metrics.
        print("\n\nRunning {0} experiment on {1}, N{2}, run #{3}".format(
            DATASET_TYPE, expr_type, NUM_SAMPLES, run_id))
        test_preds = run_and_eval(train_dataloader, DEV_DATALOADER, TEST_DATALOADER, 
                                  _TEST_LABELS, NUM_CLASSES, DATASET_TYPE)

        get_metrics_report(train, TEST, test_preds, _TEST_LABELS, 
                          train_path, TEST_PATH, LABEL_FIELD, 
                          DATASET_TYPE, expr_type, NUM_SAMPLES, run_id, REPORT_RESULTS_PATH)

print("Finished. Please see the experiment metrics report at {0}".format(REPORT_RESULTS_PATH))





Running amazon experiment on orig, N10, run# 0


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


len(train): 7, epochs: 4, total_steps: 28
Start training...

 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------


KeyboardInterrupt: 