In [16]:
from lr.models.transformers.util import *
import logging
import os
import shutil
import torch
import numpy as np
import pandas as pd
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
from time import time
from sklearn.model_selection import train_test_split

### Params

In [2]:
folder = "snli"

hyperparams = {"local_rank": -1,
               "max_seq_length": 200,
               "overwrite_cache": False,
               "num_train_epochs":3.0,
               "per_gpu_train_batch_size":32,
               "per_gpu_eval_batch_size":32,
               "gradient_accumulation_steps": 1,
               "learning_rate":5e-5,
               "weight_decay":0.0,
               "adam_epsilon": 1e-8,
               "max_grad_norm": 1.0,
               "max_steps": -1,
               "warmup_steps": 0,
               "save_steps": 500,
               "no_cuda":False,
               "n_gpu":1,
               "model_name_or_path":"bert",
               "output_dir":"bert",
               "random_state": 42,
               "fp16":False,
               "fp16_opt_level":"01",
               "device":"cpu",
               "verbose":True,
               "model_type": "bert",
               "train_cached_features_file": "data/{}/base_train_".format(folder),
               "dev_cached_features_file": "data/{}/base_dev_".format(folder)} 


set_seed(hyperparams["random_state"], hyperparams["n_gpu"])

## Set results dict

In [3]:
meta_results = {"moment":[],
                "type":[],
                "loss":[],
                "acc":[],
                "time":[]}

# df

In [4]:
train_path = "data/{}/train.csv".format(folder)

df = pd.read_csv(train_path)
train_test_split
df_train, df_dev = train_test_split(df, test_size=0.1)

# examples

In [5]:
processor = NLIProcessor()
train_examples = processor.df2examples(df_train, "train")
dev_examples = processor.df2examples(df_dev, "dev")

# features

In [6]:
pretrained_weights = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
label_map = processor.get_label_map()
max_seq_length = hyperparams["max_seq_length"]

train_cached_features_file = hyperparams["train_cached_features_file"]
dev_cached_features_file = hyperparams["dev_cached_features_file"]



train_features = convert_examples_to_features(examples=train_examples,
                                              tokenizer=tokenizer,
                                              label_map=label_map,
                                              max_length=max_seq_length)


dev_features = convert_examples_to_features(examples=dev_examples,
                                              tokenizer=tokenizer,
                                              label_map=label_map,
                                              max_length=max_seq_length)

torch.save(train_features, train_cached_features_file)

torch.save(dev_features, dev_cached_features_file)

# dataset

In [7]:
train_dataset = features2dataset(train_cached_features_file, hyperparams, evaluate=False)
dev_dataset = features2dataset(dev_cached_features_file, hyperparams, evaluate=True)

## Loading Model

In [8]:
model = BertForSequenceClassification.from_pretrained(pretrained_weights, num_labels = 3)

### Eval before training

#### train

In [9]:
train_loss, train_results = evaluate(train_dataset, hyperparams, model)
train_acc = (train_results.prediction==train_results.label).mean()

meta_results["moment"].append("before")
meta_results["type"].append("train")
meta_results["loss"].append(train_loss)
meta_results["acc"].append(train_acc)
meta_results["time"].append(np.nan)

Evaluating: 100%|██████████| 25/25 [00:58<00:00,  2.36s/it]


#### Dev 

In [10]:
dev_loss, results = evaluate(dev_dataset, hyperparams, model)
dev_acc = (results.prediction==results.label).mean()


meta_results["moment"].append("before")
meta_results["type"].append("dev")
meta_results["loss"].append(dev_loss)
meta_results["acc"].append(dev_acc)
meta_results["time"].append(np.nan)

Evaluating: 100%|██████████| 7/7 [00:14<00:00,  2.11s/it]


# Train

In [11]:
init = time()
global_step, tr_loss = train(train_dataset, model, tokenizer, hyperparams)
train_time = time() - init

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]
Iteration:   0%|          | 0/25 [00:00<?, ?it/s][A
Iteration:   4%|▍         | 1/25 [00:07<03:10,  7.93s/it][A
Iteration:   8%|▊         | 2/25 [00:15<03:01,  7.91s/it][A
Iteration:  12%|█▏        | 3/25 [00:23<02:53,  7.86s/it][A
Iteration:  16%|█▌        | 4/25 [00:31<02:44,  7.84s/it][A
Iteration:  20%|██        | 5/25 [00:39<02:36,  7.81s/it][A
Iteration:  24%|██▍       | 6/25 [00:46<02:28,  7.81s/it][A
Iteration:  28%|██▊       | 7/25 [00:54<02:20,  7.80s/it][A
Iteration:  32%|███▏      | 8/25 [01:02<02:12,  7.79s/it][A
Iteration:  36%|███▌      | 9/25 [01:10<02:04,  7.79s/it][A
Iteration:  40%|████      | 10/25 [01:18<01:56,  7.79s/it][A
Iteration:  44%|████▍     | 11/25 [01:25<01:48,  7.78s/it][A
Iteration:  48%|████▊     | 12/25 [01:33<01:40,  7.77s/it][A
Iteration:  52%|█████▏    | 13/25 [01:41<01:33,  7.77s/it][A
Iteration:  56%|█████▌    | 14/25 [01:49<01:25,  7.76s/it][A
Iteration:  60%|██████    | 15/25 [01:56<01

### Eval After training

#### train

In [13]:
train_loss, train_results = evaluate(train_dataset, hyperparams, model)
train_acc = (train_results.prediction==train_results.label).mean()

meta_results["moment"].append("after")
meta_results["type"].append("train")
meta_results["loss"].append(train_loss)
meta_results["acc"].append(train_acc)
meta_results["time"].append(train_time)

Evaluating: 100%|██████████| 25/25 [00:58<00:00,  2.34s/it]


#### dev

In [14]:
dev_loss, results = evaluate(dev_dataset, hyperparams, model)
dev_acc = (results.prediction==results.label).mean()

meta_results["moment"].append("after")
meta_results["type"].append("dev")
meta_results["loss"].append(dev_loss)
meta_results["acc"].append(dev_acc)
meta_results["time"].append(train_time)

Evaluating: 100%|██████████| 7/7 [00:14<00:00,  2.09s/it]


## Save results

In [15]:
meta_results = pd.DataFrame(meta_results)
meta_results.to_csv("meta.csv",index=False)