In [1]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from lr.models.transformers.processor import clean_df
from lr.models.transformers.BertWrapper import BertWrapper
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from time import time
import shutil
import os
import argparse

In [2]:
# Variables
folder = "snli"
# folder = "toy"

n_cores = 8
rho = 0
random_state = None
dgp_seed = None
n_inter = 50

output_dir_name = "bert_base_snli_search"

basic_hyperparams = {"local_rank": -1,
             "max_seq_length": 10,
             "overwrite_cache": False,
             "num_train_epochs": 1.0,
             "per_gpu_train_batch_size": 32,
             "per_gpu_eval_batch_size": 50,
             "gradient_accumulation_steps": 1,
             "learning_rate": 5e-5,
             "weight_decay": 0.0,
             "adam_epsilon": 1e-8,
             "max_grad_norm": 1.0,
             "max_steps": -1,
             "warmup_steps": 0,
             "save_steps": 8580,
             "no_cuda": False,
             "n_gpu": 1,
             "data_set_name": folder,
             "transformation_name": None,
             "number_of_simulations": 1000,
             "rho": rho,
             "model_name_or_path": "bert",
             "output_dir": output_dir_name,
             "random_state": random_state,
             "dgp_seed": dgp_seed,
             "fp16": False,
             "fp16_opt_level": "01",
             "device": "cpu",
             "verbose": True,
             "model_type": "bert",
             "pad_on_left": False,
             "pad_token": 0,
             "n_cores": n_cores,
             'eval_sample_size': 200,
             "pad_token_segment_id": 0,
             "mask_padding_with_zero": True,
             "base_path": "data/{}/cached_".format(folder),
             "pretrained_weights": 'bert-base-uncased'}

In [3]:
train = pd.read_csv("data/{}/train.csv".format(folder))
train = train.sample(10000)


train, dev_o = train_test_split(train, test_size=0.2)
print("clean train")
train = clean_df(train, n_cores=n_cores)

print("clean dev")
dev_o = clean_df(dev_o, n_cores=n_cores)




print("train.shape", train.shape)
print("dev.shape", dev_o.shape)

clean train
clean dev
train.shape (640, 4)
dev.shape (160, 4)


In [4]:
param_grid = {"max_seq_length": range(50, 210,10),
             "num_train_epochs": np.linspace(1,2.5, 10),       
             "learning_rate": np.linspace(0.00005,1, 100),
             "weight_decay": np.linspace(0,0.1, 100),
             "adam_epsilon": np.linspace(1e-8, 0.1, 100),
             "max_grad_norm": np.linspace(0.00005,1, 100)}

In [6]:
all_hypers = []
all_times = []
all_accs = []

for i in range(n_inter):
    hyperparams = basic_hyperparams.copy()
    for param in param_grid:
        hyperparams[param] =  np.random.choice(param_grid[param])
    if hyperparams["random_state"] is None:
        hyperparams["random_state"] = np.random.choice(range(1, 2333233))
    model = BertWrapper(hyperparams)
    init = time()
    model.fit(train)
    train_time = time() - init
    result = model.get_results(dev_o, mode="dev")
    acc = result.indicator.mean()
    all_hypers.append(hyperparams)
    all_times.append(train_time)
    all_accs.append(acc)
    del hyperparams, model

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]
Iteration:   0%|          | 0/14 [00:00<?, ?it/s][A
Iteration:   7%|▋         | 1/14 [00:03<00:51,  3.98s/it][A
Iteration:  14%|█▍        | 2/14 [00:07<00:47,  3.96s/it][A
Iteration:  21%|██▏       | 3/14 [00:11<00:43,  3.95s/it][A
Iteration:  29%|██▊       | 4/14 [00:15<00:39,  3.95s/it][A
Iteration:  36%|███▌      | 5/14 [00:19<00:35,  3.95s/it][A
Iteration:  43%|████▎     | 6/14 [00:23<00:31,  3.94s/it][A
Iteration:  50%|█████     | 7/14 [00:27<00:27,  3.95s/it][A
Iteration:  57%|█████▋    | 8/14 [00:31<00:23,  3.95s/it][A
Iteration:  64%|██████▍   | 9/14 [00:35<00:19,  3.95s/it][A
Iteration:  71%|███████▏  | 10/14 [00:39<00:15,  3.94s/it][A
Iteration:  79%|███████▊  | 11/14 [00:43<00:11,  3.95s/it][A
Iteration:  86%|████████▌ | 12/14 [00:47<00:07,  3.95s/it][A
Iteration:  93%|█████████▎| 13/14 [00:51<00:03,  3.95s/it][A
Iteration: 100%|██████████| 14/14 [00:54<00:00,  3.89s/it][A
Epoch: 100%|██████████| 1/1 [00:54<00:00, 5

eval path =  data/toy/cached_dev_60


Evaluating: 100%|██████████| 4/4 [00:05<00:00,  1.33s/it]
Epoch:   0%|          | 0/2 [00:00<?, ?it/s]
Iteration:   0%|          | 0/14 [00:00<?, ?it/s][A
Iteration:   7%|▋         | 1/14 [00:06<01:28,  6.82s/it][A
Iteration:  14%|█▍        | 2/14 [00:13<01:22,  6.84s/it][A
Iteration:  21%|██▏       | 3/14 [00:20<01:15,  6.85s/it][A
Iteration:  29%|██▊       | 4/14 [00:27<01:08,  6.85s/it][A
Iteration:  36%|███▌      | 5/14 [00:34<01:01,  6.85s/it][A
Iteration:  43%|████▎     | 6/14 [00:41<00:54,  6.86s/it][A
Iteration:  50%|█████     | 7/14 [00:48<00:48,  6.87s/it][A
Iteration:  57%|█████▋    | 8/14 [00:54<00:41,  6.87s/it][A
Iteration:  64%|██████▍   | 9/14 [01:01<00:34,  6.87s/it][A
Iteration:  71%|███████▏  | 10/14 [01:08<00:27,  6.87s/it][A
Iteration:  79%|███████▊  | 11/14 [01:15<00:20,  6.88s/it][A
Iteration:  86%|████████▌ | 12/14 [01:22<00:13,  6.88s/it][A
Iteration:  93%|█████████▎| 13/14 [01:29<00:06,  6.88s/it][A
Iteration: 100%|██████████| 14/14 [01:34<00:00,

eval path =  data/toy/cached_dev_110


Evaluating: 100%|██████████| 4/4 [00:09<00:00,  2.49s/it]


In [8]:
# Save best params Define the majority model
i = np.argmax(all_accs)
best_assigment = all_hypers[i]
t_time = all_times[i]
best_score = all_accs[i]



with open(output_dir_name + "/params.txt", "w") as file:
    for key in best_assigment:
        file.write("{} = {}\n".format(key, best_assigment[key]))
    file.write("\nbest_acc = {:.1%}".format(best_score))
    file.write("\ntime = {:.1f} s".format(t_time))
    file.write("\nnumber of search trials = {}".format(n_inter))