In [1]:
from lr.models.transformers.processor import *
from lr.models.transformers.util import evaluate, train

import logging
import os
import shutil
import torch
import numpy as np
import pandas as pd
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
from time import time
from sklearn.model_selection import train_test_split

### Params

In [2]:
folder = "snli"

hyperparams = {"local_rank": -1,
               "max_seq_length": 200,
               "overwrite_cache": False,
               "num_train_epochs":1.0,
               "per_gpu_train_batch_size":32,
               "per_gpu_eval_batch_size":32,
               "gradient_accumulation_steps": 1,
               "learning_rate":5e-5,
               "weight_decay":0.0,
               "adam_epsilon": 1e-8,
               "max_grad_norm": 1.0,
               "max_steps": 300,
               "warmup_steps": 0,
               "save_steps": 50,
               "no_cuda":False,
               "n_gpu":1,
               "model_name_or_path":"bert",
               "output_dir":"bert",
               "random_state": 42,
               "fp16":False,
               "fp16_opt_level":"01",
               "device":"cpu",
               "verbose":True,
               "model_type": "bert",
               "pad_on_left":False,
               "pad_token":0,
               "pad_token_segment_id":0,
               "mask_padding_with_zero":True,
               "base_path": "data/{}/cached_".format(folder)} 

# loading tokenizers

In [3]:
set_seed(hyperparams["random_state"], hyperparams["n_gpu"])

pretrained_weights = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
hyperparams["tokenizer"] = tokenizer

## Set results dict

In [4]:
meta_results = {"moment":[],
                "type":[],
                "loss":[],
                "acc":[],
                "time":[]}

# df

In [5]:
train_path = "data/{}/train.csv".format(folder)
set_seed(hyperparams["random_state"], hyperparams["n_gpu"])

eval_sample_size = 1000


df = pd.read_csv(train_path)


df_train, df_dev = train_test_split(df, test_size=0.2)
df_train_to_eval = df_train.sample(n=eval_sample_size, random_state=hyperparams["random_state"])
df_dev_to_eval = df_dev.sample(n=eval_sample_size,random_state=hyperparams["random_state"])

assert np.all(df_train.index[:3] == [305830, 486798, 386780])

## Creating features

In [6]:
processor = NLIProcessor(hyperparams)
init = time()
train_cached_features_file = processor.df2features(df=df_train,
                                                   n_cores=8,
                                                   mode="train")

train_to_eval_cached_features_file = processor.df2features(df=df_train_to_eval,
                                                           n_cores=8,
                                                           mode="train_to_eval")

dev_cached_features_file = processor.df2features(df=df_dev,
                                                   n_cores=8,
                                                   mode="dev")

dev_to_eval_cached_features_file = processor.df2features(df=df_dev_to_eval,
                                                         n_cores=8,
                                                         mode="dev_to_eval")

p_time = time() - init
print("total time = {:.3f}".format(p_time / 60))

total time = 2.804


## Loading Datasets

In [7]:
init = time()
train_dataset = features2dataset(train_cached_features_file)
train_dataset_to_eval = features2dataset(train_to_eval_cached_features_file)
dev_dataset = features2dataset(dev_cached_features_file)
dev_dataset_to_eval = features2dataset(dev_to_eval_cached_features_file)
p_time = time() - init
print("total time = {:.3f}".format(p_time / 60))

total time = 0.271


In [8]:
print(len(train_dataset))
print(len(train_dataset_to_eval))
print(len(dev_dataset))
print(len(dev_dataset_to_eval))

439494
1000
109867
1000


## Loading Model

In [9]:
model = BertForSequenceClassification.from_pretrained(pretrained_weights, num_labels = 3)

### Eval before training

#### Train sample

In [10]:
train_loss, train_results = evaluate(train_dataset_to_eval, hyperparams, model)
train_acc = (train_results.prediction==train_results.label).mean()

lmap = processor.get_label_map()
filtered = filter_df_by_label(df_train_to_eval.dropna()).reset_index(drop=True)
assert np.all(filtered.label.map(lambda x: lmap[x]) == train_results.label)



meta_results["moment"].append("before")
meta_results["type"].append("train")
meta_results["loss"].append(train_loss)
meta_results["acc"].append(train_acc)
meta_results["time"].append(np.nan)

Evaluating: 100%|██████████| 32/32 [01:57<00:00,  3.67s/it]


#### Dev sample

In [11]:
dev_loss, results = evaluate(dev_dataset_to_eval, hyperparams, model)
dev_acc = (results.prediction==results.label).mean()


filtered = filter_df_by_label(df_dev_to_eval.dropna()).reset_index(drop=True)
assert np.all(filtered.label.map(lambda x: lmap[x]) == results.label)



meta_results["moment"].append("before")
meta_results["type"].append("dev")
meta_results["loss"].append(dev_loss)
meta_results["acc"].append(dev_acc)
meta_results["time"].append(np.nan)

Evaluating: 100%|██████████| 32/32 [01:57<00:00,  3.67s/it]


# Train

In [12]:
init = time()
global_step, tr_loss = train(train_dataset, model, tokenizer, hyperparams)
train_time = time() - init

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]
Iteration:   0%|          | 0/13735 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/13735 [00:12<48:02:45, 12.59s/it][A
Iteration:   0%|          | 2/13735 [00:25<47:58:13, 12.58s/it][A
Iteration:   0%|          | 3/13735 [00:37<47:50:03, 12.54s/it][A
Iteration:   0%|          | 4/13735 [00:49<47:38:54, 12.49s/it][A
Iteration:   0%|          | 5/13735 [01:02<47:27:44, 12.44s/it][A
Iteration:   0%|          | 6/13735 [01:14<47:20:12, 12.41s/it][A
Iteration:   0%|          | 7/13735 [01:26<47:13:53, 12.39s/it][A
Iteration:   0%|          | 8/13735 [01:39<47:15:21, 12.39s/it][A
Iteration:   0%|          | 9/13735 [01:51<47:10:01, 12.37s/it][A
Iteration:   0%|          | 10/13735 [02:04<47:14:09, 12.39s/it][A
Iteration:   0%|          | 11/13735 [02:16<47:14:45, 12.39s/it][A
Iteration:   0%|          | 12/13735 [02:28<47:10:14, 12.37s/it][A
Iteration:   0%|          | 13/13735 [02:41<47:08:12, 12.37s/it][A
Iteration:   0%|        

### Eval after training

#### Train sample

In [13]:
train_loss, train_results = evaluate(train_dataset_to_eval, hyperparams, model)
train_acc = (train_results.prediction==train_results.label).mean()

filtered = filter_df_by_label(df_train_to_eval.dropna()).reset_index(drop=True)
assert np.all(filtered.label.map(lambda x: lmap[x]) == train_results.label)



meta_results["moment"].append("after")
meta_results["type"].append("train")
meta_results["loss"].append(train_loss)
meta_results["acc"].append(train_acc)
meta_results["time"].append(train_time)

Evaluating: 100%|██████████| 32/32 [01:57<00:00,  3.68s/it]


#### Dev sample

In [14]:
dev_loss, results = evaluate(dev_dataset_to_eval, hyperparams, model)
dev_acc = (results.prediction==results.label).mean()


filtered = filter_df_by_label(df_dev_to_eval.dropna()).reset_index(drop=True)
assert np.all(filtered.label.map(lambda x: lmap[x]) == results.label)



meta_results["moment"].append("after")
meta_results["type"].append("dev")
meta_results["loss"].append(dev_loss)
meta_results["acc"].append(dev_acc)
meta_results["time"].append(train_time)

Evaluating: 100%|██████████| 32/32 [01:57<00:00,  3.68s/it]


## Results

In [15]:
meta_results = pd.DataFrame(meta_results)
meta_results.to_csv("meta.csv",index=False)
meta_results

Unnamed: 0,moment,type,loss,acc,time
0,before,train,1.233214,0.33,
1,before,dev,1.240603,0.325,
2,after,train,0.598539,0.768,3719.075275
3,after,dev,0.5581,0.78,3719.075275


In [None]:
# df_eval = pd.read_csv("bert/eval_log.csv")
# df_eval.set_index(["step"])[["train_acc","dev_acc"]].plot();

### loading models

In [None]:
from transformers import BertForSequenceClassification
s = 'bert/checkpoint-2/'
model = BertForSequenceClassification.from_pretrained(s)

dev_loss, results = evaluate(dev_dataset_to_eval, hyperparams, model)
dev_acc = (results.prediction==results.label).mean()
print(dev_acc )