In [1]:
import os
import numpy as np
import pandas as pd
from time import time
from lr.models.transformers.BertWrapper import BertWrapper
from lr.models.transformers.processor import clean_df
from lr.training.util import filter_df_by_label
from tqdm import tqdm

In [2]:
## Variables

In [3]:
# Get data
folder = "snli"
train_path = "data/{}/train_sample.csv".format(folder)
dev_path = "data/{}/dev.csv".format(folder)
output_dir_name = "hyperparams/bert_base_snli"
verbose= True
n_cores = 2
random_state = 52

train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)

if verbose:
    print("clean train")
train = clean_df(train, n_cores=n_cores)

if verbose:
    print("clean dev")
dev = clean_df(dev, n_cores=n_cores)

if verbose:
    print("train.shape", train.shape)
    print("dev.shape", dev.shape)



clean train
clean dev
train.shape (49914, 4)
dev.shape (9842, 4)


In [4]:
# Get hyperarams
basic_hyperparams = {"local_rank": -1,
                     "max_seq_length": 200,
                     "overwrite_cache": False,
                     "num_train_epochs": 1.0,
                     "per_gpu_train_batch_size": 32,
                     "per_gpu_eval_batch_size": 50,
                     "gradient_accumulation_steps": 1,
                     "learning_rate": 5e-5,
                     "weight_decay": 0.0,
                     "adam_epsilon": 1e-8,
                     "max_grad_norm": 1.0,
                     "max_steps": 50,
#                      "max_steps": -1,
                     "warmup_steps": 0,
                     "save_steps": 8580,
                     "no_cuda": False,
                     "n_gpu": 1,
                     "data_set_name": folder,
                     "transformation_name": None,
                     "number_of_simulations": 1000,
                     "rho": 0.0,
                     "model_name_or_path": "bert",
                     "output_dir": "zero",
                     "random_state": random_state,
                     "dgp_seed": random_state,
                     "fp16": False,
                     "fp16_opt_level": "01",
                     "device": "cpu",
                     "verbose": True,
                     "model_type": "bert",
                     "pad_on_left": False,
                     "pad_token": 0,
                     "n_cores": n_cores,
                     'eval_sample_size': 200,
                     "pad_token_segment_id": 0,
                     "mask_padding_with_zero": True,
                     "base_path": "data/{}/cached_".format(folder),
                     "pretrained_weights": 'bert-base-uncased'}

In [5]:
hyperparams = basic_hyperparams.copy()
model = BertWrapper(hyperparams)
init = time()
model.fit(train)
train_time = time() - init

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]
Iteration:   0%|          | 0/1554 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/1554 [00:12<5:25:39, 12.58s/it][A
Iteration:   0%|          | 2/1554 [00:25<5:25:21, 12.58s/it][A
Iteration:   0%|          | 3/1554 [00:37<5:24:19, 12.55s/it][A
Iteration:   0%|          | 4/1554 [00:50<5:23:17, 12.51s/it][A
Iteration:   0%|          | 5/1554 [01:02<5:22:19, 12.48s/it][A
Iteration:   0%|          | 6/1554 [01:14<5:21:29, 12.46s/it][A
Iteration:   0%|          | 7/1554 [01:27<5:21:22, 12.46s/it][A
Iteration:   1%|          | 8/1554 [01:39<5:21:01, 12.46s/it][A
Iteration:   1%|          | 9/1554 [01:52<5:20:31, 12.45s/it][A
Iteration:   1%|          | 10/1554 [02:04<5:19:59, 12.43s/it][A
Iteration:   1%|          | 11/1554 [02:17<5:19:55, 12.44s/it][A
Iteration:   1%|          | 12/1554 [02:29<5:19:22, 12.43s/it][A
Iteration:   1%|          | 13/1554 [02:41<5:19:23, 12.44s/it][A
Iteration:   1%|          | 14/1554 [02:54<5:19:12,

In [6]:
result = model.get_results(dev, mode="dev")
acc = result.indicator.mean()

eval path =  data/snli/cached_dev_200


Evaluating: 100%|██████████| 197/197 [19:12<00:00,  5.85s/it]


In [19]:
n_iter = 2

choice_0 = {'num_train_epochs':1.0,
             "max_seq_length": 200,
             "learning_rate": 5e-5,
             "weight_decay": 0.0,
             "adam_epsilon": 1e-8,
             "max_grad_norm": 1.0}

param_grid = {"max_seq_length": range(50, 210, 10),
              "num_train_epochs": np.linspace(1, 2.5, 10),
              "learning_rate": np.linspace(0.00005, 1, 10),
              "weight_decay": np.linspace(0, 0.1, 10),
              "adam_epsilon": np.linspace(1e-8, 0.1, 10),
              "max_grad_norm": np.linspace(0.00005, 1, 10)}

np.random.seed(random_state)
choices = [choice_0]

for i in range(n_iter - 1):
    hyper_choice = {}
    for k in param_grid:
        hyper_choice[k] = np.random.choice(param_grid[k])
    choices.append(hyper_choice)

In [7]:

#     param_grid = {"max_seq_length": range(50, 210, 10),
#                   "num_train_epochs": np.linspace(1, 2.5, 10),
#                   "learning_rate": np.linspace(0.00005, 1, 10),
#                   "weight_decay": np.linspace(0, 0.1, 10),
#                   "adam_epsilon": np.linspace(1e-8, 0.1, 10),
#                   "max_grad_norm": np.linspace(0.00005, 1, 10)}

#     np.random.seed(random_state)
#     choices = []

#     for i in range(n_iter):
#         hyper_choice = {}
#         for k in param_grid:
#             hyper_choice[k] = np.random.choice(param_grid[k])
#         choices.append(hyper_choice)

#     # Search

#     all_accs = []
#     all_train_times = []
#     init_search = time()

#     for hyper_choice in tqdm(choices):
#         hyperparams = basic_hyperparams.copy()
#         hyperparams.update(hyper_choice)
#         model = BertWrapper(hyperparams)
#         init = time()
#         model.fit(train)
#         train_time = time() - init
#         result = model.get_results(dev, mode="dev")
#         acc = result.indicator.mean()
#         all_accs.append(acc)
#         all_train_times.append(train_time)
#         clean_folder(folder)

#     search_time = time() - init_search
#     search_time = search_time / 3600

#     # Store Results

#     best_id = np.argmax(all_accs)
#     best_score = all_accs[best_id]
#     param_df = pd.DataFrame(choices[best_id], index=[0])

#     dict_ = {"search_random_state": [random_state],
#              "number_of_search_trails": [n_iter],
#              "expected_val_score": [np.mean(all_accs)],
#              "best_val_score": [best_score],
#              "mean_fit_time": [np.mean(all_train_times) / 3600],
#              "search_time": [search_time]}
#     search_results = pd.DataFrame(dict_)
#     search_results = pd.merge(
#         search_results,
#         param_df,
#         left_index=True,
#         right_index=True)
#     path = output_dir_name + "/search_{}.csv".format(random_state)
#     clean_folder_log(output_dir_name)
#     search_results.to_csv(path, index=False)


# if __name__ == '__main__':

#     parser = argparse.ArgumentParser()

#     parser.add_argument('random_state',
#                         type=int,
#                         help='random_state')

#     parser.add_argument('n_iter',
#                         type=int,
#                         help='number of iterations')

#     parser.add_argument('n_cores',
#                         type=int,
#                         help='number of cores')
#     args = parser.parse_args()

#     folder = "snli"
#     train_path = "data/{}/train_sample.csv".format(folder)
#     dev_path = "data/{}/dev.csv".format(folder)
#     output_dir_name = "hyperparams/bert_base_snli"

#     search(train_path=train_path,
#            dev_path=dev_path,
#            random_state=args.random_state,
#            folder=folder,
#            n_iter=args.n_iter,
#            n_cores=args.n_cores,
#            output_dir_name=output_dir_name,
#            verbose=True)


In [21]:
print("***** acc = {:.1%} *****\n".format(0.3))

***** acc = 30.0% *****

