In [1]:
from lr.models.transformers.util import *
import logging
import os
import shutil
import torch
import numpy as np
import pandas as pd
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
from time import time
from sklearn.model_selection import train_test_split

### Params

In [2]:
folder = "toy"

hyperparams = {"local_rank": -1,
               "max_seq_length": 200,
               "overwrite_cache": False,
               "num_train_epochs":1.0,
               "per_gpu_train_batch_size":32,
               "per_gpu_eval_batch_size":32,
               "gradient_accumulation_steps": 1,
               "learning_rate":5e-5,
               "weight_decay":0.0,
               "adam_epsilon": 1e-8,
               "max_grad_norm": 1.0,
               "max_steps": -1,
               "warmup_steps": 0,
               "save_steps": 10,
               "no_cuda":False,
               "n_gpu":1,
               "model_name_or_path":"bert",
               "output_dir":"bert",
               "random_state": 42,
               "fp16":False,
               "fp16_opt_level":"01",
               "device":"cpu",
               "verbose":True,
               "model_type": "bert",
               "train_cached_features_file": "data/{}/cached_train".format(folder),
               "dev_cached_features_file": "data/{}/cached_dev".format(folder),
               "train_to_eval_cached_features_file": "data/{}/cached_train_to_eval".format(folder),
               "dev_to_eval_cached_features_file": "data/{}/cached_dev_to_eval".format(folder)} 



set_seed(hyperparams["random_state"], hyperparams["n_gpu"])

## Set results dict

In [3]:
meta_results = {"moment":[],
                "type":[],
                "loss":[],
                "acc":[],
                "time":[]}

# df

In [4]:
train_path = "data/{}/train.csv".format(folder)

eval_sample_size = 100


df = pd.read_csv(train_path)


train_test_split
df_train, df_dev = train_test_split(df, test_size=0.2)
df_train_to_eval = df_train.sample(n=eval_sample_size)
df_dev_to_eval = df_dev.sample(n=eval_sample_size)

# examples

In [5]:
processor = NLIProcessor()
train_examples = processor.df2examples(df_train, "train")
train_examples_to_eval = processor.df2examples(df_train_to_eval, "train_to_eval")
dev_examples = processor.df2examples(df_dev, "dev")
dev_examples_to_eval = processor.df2examples(df_dev_to_eval, "dev_to_eval")

all_examples = [train_examples, train_examples_to_eval,
                dev_examples, dev_examples_to_eval]

## Creating features

In [6]:
# paths
tail = "_{}".format(hyperparams["max_seq_length"])
train_cached_features_file = hyperparams["train_cached_features_file"] + tail
dev_cached_features_file = hyperparams["dev_cached_features_file"] + tail
train_to_eval_cached_features_file = hyperparams["train_to_eval_cached_features_file"] + tail
dev_to_eval_cached_features_file = hyperparams["dev_to_eval_cached_features_file"] + tail

paths = [train_cached_features_file,
         train_to_eval_cached_features_file,
         dev_cached_features_file,
         dev_to_eval_cached_features_file]

# loading tokenizers
pretrained_weights = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
label_map = processor.get_label_map()
max_seq_length = hyperparams["max_seq_length"]

# # creating features

for example_set, path in zip(all_examples, paths):
    if not os.path.exists(path):
        features = convert_examples_to_features(examples=example_set,
                                                tokenizer=tokenizer,
                                                label_map=label_map,
                                                max_length=max_seq_length)
        torch.save(features, path)
        

# loading datasets

In [7]:
train_dataset = features2dataset(train_cached_features_file)
train_dataset_to_eval = features2dataset(train_to_eval_cached_features_file)
dev_dataset = features2dataset(dev_cached_features_file)
dev_dataset_to_eval = features2dataset(dev_to_eval_cached_features_file)

## Loading Model

In [8]:
model = BertForSequenceClassification.from_pretrained(pretrained_weights, num_labels = 3)

### Eval before training

#### train

In [9]:
train_loss, train_results = evaluate(train_dataset_to_eval, hyperparams, model)
train_acc = (train_results.prediction==train_results.label).mean()

meta_results["moment"].append("before")
meta_results["type"].append("train")
meta_results["loss"].append(train_loss)
meta_results["acc"].append(train_acc)
meta_results["time"].append(np.nan)

Evaluating: 100%|██████████| 4/4 [00:12<00:00,  3.03s/it]


#### Dev 

In [10]:
dev_loss, results = evaluate(dev_dataset_to_eval, hyperparams, model)
dev_acc = (results.prediction==results.label).mean()


meta_results["moment"].append("before")
meta_results["type"].append("dev")
meta_results["loss"].append(dev_loss)
meta_results["acc"].append(dev_acc)
meta_results["time"].append(np.nan)

Evaluating: 100%|██████████| 4/4 [00:12<00:00,  3.05s/it]


# Train

In [11]:
init = time()
global_step, tr_loss = train(train_dataset, model, tokenizer, hyperparams)
train_time = time() - init

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]
Iteration:   0%|          | 0/20 [00:00<?, ?it/s][A
Iteration:   5%|▌         | 1/20 [00:12<03:57, 12.51s/it][A
Iteration:  10%|█         | 2/20 [00:24<03:44, 12.50s/it][A
Iteration:  15%|█▌        | 3/20 [00:37<03:31, 12.46s/it][A
Iteration:  20%|██        | 4/20 [00:49<03:18, 12.44s/it][A
Iteration:  25%|██▌       | 5/20 [01:02<03:05, 12.40s/it][A
Iteration:  30%|███       | 6/20 [01:14<02:53, 12.37s/it][A
Iteration:  35%|███▌      | 7/20 [01:26<02:40, 12.37s/it][A
Iteration:  40%|████      | 8/20 [01:38<02:28, 12.34s/it][A
Iteration:  45%|████▌     | 9/20 [01:51<02:15, 12.34s/it][A
Iteration:  50%|█████     | 10/20 [02:10<02:24, 14.40s/it][A
Iteration:  55%|█████▌    | 11/20 [02:22<02:03, 13.76s/it][A
Iteration:  60%|██████    | 12/20 [02:35<01:46, 13.32s/it][A
Iteration:  65%|██████▌   | 13/20 [02:47<01:31, 13.02s/it][A
Iteration:  70%|███████   | 14/20 [02:59<01:16, 12.81s/it][A
Iteration:  75%|███████▌  | 15/20 [03:12<01

### Eval After training

#### train

In [12]:
train_loss, train_results = evaluate(train_dataset_to_eval, hyperparams, model)
train_acc = (train_results.prediction==train_results.label).mean()

meta_results["moment"].append("after")
meta_results["type"].append("train")
meta_results["loss"].append(train_loss)
meta_results["acc"].append(train_acc)
meta_results["time"].append(train_time)

Evaluating: 100%|██████████| 4/4 [00:11<00:00,  2.91s/it]


#### dev

In [13]:
dev_loss, results = evaluate(dev_dataset_to_eval, hyperparams, model)
dev_acc = (results.prediction==results.label).mean()

meta_results["moment"].append("after")
meta_results["type"].append("dev")
meta_results["loss"].append(dev_loss)
meta_results["acc"].append(dev_acc)
meta_results["time"].append(train_time)

Evaluating: 100%|██████████| 4/4 [00:11<00:00,  2.93s/it]


## Save results

In [14]:
meta_results = pd.DataFrame(meta_results)
meta_results.to_csv("meta.csv",index=False)

In [15]:
meta_results

Unnamed: 0,moment,type,loss,acc,time
0,before,train,1.332663,0.3,
1,before,dev,1.26188,0.29,
2,after,train,1.051574,0.55,260.497859
3,after,dev,1.101638,0.27,260.497859
