In [1]:
import os
import numpy as np
import pandas as pd
from time import time
from lr.models.transformers.BertWrapper import BertWrapper
from lr.text_processing.util import pre_process_nli_df
from lr.training.util import get_ternary_label, filter_df_by_label
from lr.text_processing.transformations.wordnet import path_base_transformation
from lr.stats.h_testing import DGP
from lr.stats.h_testing import get_matched_results_transformers
from lr.stats.h_testing import get_paired_t_statistic
from lr.stats.h_testing import get_cochran_statistic
from lr.stats.h_testing import get_boots_series_under_H0
from lr.stats.h_testing import get_boot_paired_t_p_value
from lr.stats.h_testing import get_boot_cochran_p_value

In [2]:
## Variables

In [3]:
rho = 0.4
search_random_state =  52
dgp_random_state = 34
train_random_state = 56
boot_random_state = 562
n_cores = 2
folder = "snli"
number_of_simulations = 1000
verbose = True
save_steps = 7500

train_path = "data/snli/train.csv"
dev_path = "data/snli/dev.csv"
test_path = "data/snli/test.csv"

train_path_mod = "data/snli/train_p_h_syn_noun.csv"
dev_path_mod = "data/snli/dev_p_h_syn_noun.csv"
test_path_mod = "data/snli/test_p_h_syn_noun.csv"

search_path = "hyperparams/bert_base_snli/search_{}.csv".format(
    search_random_state)
assert os.path.exists(search_path)

transformation_name = "wordnet syn tranformation p and h"
output_raw_result = "raw_results/snli/bert_base/syn_p_h/rho_{:.2f}_results".format(
    rho)
output_raw_result = output_raw_result.replace(".", "p") + ".csv"
output_result = "results/snli/bert_base/syn_p_h/rho_{:.2f}_results".format(rho)
output_result = output_result.replace(".", "p") + ".csv"
output_dir = "results/snli/bert_base/syn_p_h/"

In [4]:
# Get data

In [5]:
init_test = time()

train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)

train = filter_df_by_label(train.dropna()).reset_index(drop=True)
dev = filter_df_by_label(dev.dropna()).reset_index(drop=True)
test = filter_df_by_label(test.dropna()).reset_index(drop=True)

pre_process_nli_df(train)
pre_process_nli_df(dev)

In [6]:
# Get hyperarams

In [7]:
params_keys = ['num_train_epochs', "max_seq_length",
                "learning_rate", "weight_decay",
                "adam_epsilon", "max_grad_norm"]

hyperparams = {"local_rank": -1,
               "overwrite_cache": False,
               "per_gpu_train_batch_size": 32,
               "per_gpu_eval_batch_size": 50,
               "gradient_accumulation_steps": 1,
               "max_steps": 50, # debug
#                "max_steps": -1,
               "warmup_steps": 0,
               "save_steps": save_steps,
               "no_cuda": False,
               "n_gpu": 1,
               "data_set_name": folder,
               "transformation_name": transformation_name,
               "rho": rho,
               "model_name_or_path": "bert",
               "output_dir": output_dir,
               "fp16": False,
               "fp16_opt_level": "01",
               "device": "cpu",
               "verbose": True,
               "model_type": "bert",
               "pad_on_left": False,
               "pad_token": 0,
               "n_cores": n_cores,
               'eval_sample_size': 200,
               "pad_token_segment_id": 0,
               "mask_padding_with_zero": True,
               "base_path": "data/{}/cached_".format(folder),
               "pretrained_weights": 'bert-base-uncased',
               "number_of_simulations": number_of_simulations,
               "search_random_state": search_random_state,
               "dgp_random_state": dgp_random_state,
               "train_random_state": train_random_state,
               "random_state": train_random_state,
               "boot_random_state": boot_random_state,
               "output_raw_result": output_raw_result,
               "output_result": output_result}

search_results = pd.read_csv(search_path)

for k in params_keys:
    hyperparams[k] = search_results.loc[0, k]

In [8]:
# Set transformed version of the datasets

def train_trans(df): return path_base_transformation(df, train_path_mod)

def dev_trans(df): return path_base_transformation(df, dev_path_mod)

def test_trans(df): return path_base_transformation(df, test_path_mod)

In [9]:
# get_training_sample

train.loc[:, "o_index"] = train.index.values

dgp_train = DGP(data=train,
                transformation=train_trans,
                rho=rho)

train_ = dgp_train.sample_transform(random_state=dgp_random_state)

In [10]:
# Train 

model = BertWrapper(hyperparams)
_, _, train_time = model.fit(train_.sample(1000, random_state=10)) # debug
# _, _, train_time = model.fit(train_)

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
Iteration:   0%|          | 0/25 [00:00<?, ?it/s][A
Iteration:   4%|▍         | 1/25 [00:06<02:31,  6.31s/it][A
Iteration:   8%|▊         | 2/25 [00:12<02:25,  6.32s/it][A
Iteration:  12%|█▏        | 3/25 [00:18<02:18,  6.31s/it][A
Iteration:  16%|█▌        | 4/25 [00:25<02:12,  6.30s/it][A
Iteration:  20%|██        | 5/25 [00:31<02:05,  6.29s/it][A
Iteration:  24%|██▍       | 6/25 [00:37<01:59,  6.29s/it][A
Iteration:  28%|██▊       | 7/25 [00:44<01:53,  6.30s/it][A
Iteration:  32%|███▏      | 8/25 [00:50<01:46,  6.29s/it][A
Iteration:  36%|███▌      | 9/25 [00:56<01:40,  6.29s/it][A
Iteration:  40%|████      | 10/25 [01:02<01:34,  6.29s/it][A
Iteration:  44%|████▍     | 11/25 [01:09<01:28,  6.29s/it][A
Iteration:  48%|████▊     | 12/25 [01:15<01:22,  6.31s/it][A
Iteration:  52%|█████▏    | 13/25 [01:21<01:15,  6.30s/it][A
Iteration:  56%|█████▌    | 14/25 [01:28<01:09,  6.29s/it][A
Iteration:  60%|██████    | 15/25 [01:34<01

In [11]:
# Test set Eval 

In [12]:
test_t = test_trans(test)
test_results = model.get_results(test.iloc[:1000], mode="test")
test_t_results = model.get_results(test_t.iloc[:1000], mode="test_t")

Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

eval path =  data/snli/cached_test_100


Evaluating: 100%|██████████| 20/20 [00:56<00:00,  2.83s/it]
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

eval path =  data/snli/cached_test_t_100


Evaluating: 100%|██████████| 20/20 [00:56<00:00,  2.83s/it]


In [13]:
# Getting statistics

In [14]:
m_results = get_matched_results_transformers(test_results, test_t_results)

test_acc = m_results.A.mean()
transformed_test_acc = m_results.B.mean()
t_obs, acc_diff, test_size, standart_error = get_paired_t_statistic(
    m_results)
cochran_obs = get_cochran_statistic(m_results)

# get simulations
def get_paired_t(matched_results):
    t_obs, _, _, _ = get_paired_t_statistic(matched_results)
    return t_obs

paired_t_boots = get_boots_series_under_H0(m_results,
                                           get_paired_t,
                                           number_of_simulations,
                                           boot_random_state)

cochran_boots = get_boots_series_under_H0(m_results,
                                          get_cochran_statistic,
                                          number_of_simulations,
                                          boot_random_state)

paired_t_p_value = get_boot_paired_t_p_value(paired_t_boots, t_obs)

cochran_p_value = get_boot_cochran_p_value(cochran_boots, cochran_obs)

htest_time = time() - init_test

In [15]:
# Aggregate all results

dict_ = {"data": [hyperparams["data_set_name"]],
         "model": [hyperparams["model_name_or_path"]],
         "transformation": [hyperparams["transformation_name"]],
         "rho": [rho],
         "search_random_state": [hyperparams["search_random_state"]],
         "dgp_random_state": [dgp_random_state],
         "train_random_state": [hyperparams["train_random_state"]],
         "boot_random_state": [boot_random_state],
         "number_of_simulations": [number_of_simulations],
         "test_accuracy": [test_acc],
         "transformed_test_accuracy": [transformed_test_acc],
         "accuracy_difference": [acc_diff],
         "test_size": [test_size],
         "standart_error": [standart_error],
         "observable_paired_t_stats": [t_obs],
         "paired_t_p_value": [paired_t_p_value],
         "observable_cochran_stats": [cochran_obs],
         "cochran_p_value": [cochran_p_value],
         "training_time": [train_time / 3600],
         "test_time": [htest_time / 3600]}

test_results = pd.DataFrame(dict_)
m_results.to_csv(output_raw_result, index=False)
test_results.to_csv(output_result, index=False)
if verbose:
    print(output_raw_result)
    print(output_result)

raw_results/snli/bert_base/syn_p_h/rho_0p40_results.csv
results/snli/bert_base/syn_p_h/rho_0p40_results.csv


In [16]:
test_results

Unnamed: 0,data,model,transformation,rho,search_random_state,dgp_random_state,train_random_state,boot_random_state,number_of_simulations,test_accuracy,transformed_test_accuracy,accuracy_difference,test_size,standart_error,observable_paired_t_stats,paired_t_p_value,observable_cochran_stats,cochran_p_value,training_time,test_time
0,snli,bert,wordnet syn tranformation p and h,0.4,52,34,56,562,1000,0.636,0.607,0.029,1000,0.325821,2.814619,0.002,7.859813,0.002,0.089271,0.128227
