In [1]:
from lr.models.transformers.processor import clean_df
from lr.models.transformers.train_functions import set_seed
from lr.models.transformers.BertWrapper import BertWrapper
from lr.text_processing.transformations.wordnet import path_base_transformation

from lr.stats.h_testing import *
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from time import time
import shutil
import os

## Data

In [2]:
train = pd.read_csv("data/snli/train.csv")
dev_o = pd.read_csv("data/snli/dev.csv")
train = train.head(1000)
dev_o = dev_o.head(1000)

train = clean_df(train, n_cores=8)
dev_o = clean_df(dev_o, n_cores=8)


## Transformations

In [3]:
train_path_mod = "data/snli/train_p_h_syn_noun.csv"
dev_path_mod = "data/snli/dev_p_h_syn_noun.csv"

def train_trans(df): return path_base_transformation(df, train_path_mod)

def dev_trans(df): return path_base_transformation(df, dev_path_mod)

## Val df transformation

In [4]:
dev_t = dev_trans(dev_o)

## Hyperparams

In [5]:
folder = "snli"

hyperparams = {"local_rank": -1,
               "max_seq_length": 200,
               "overwrite_cache": False,
               "num_train_epochs": 1.0,
               "per_gpu_train_batch_size": 32,
               "per_gpu_eval_batch_size": 50,
               "gradient_accumulation_steps": 1,
               "learning_rate": 5e-5,
               "weight_decay": 0.0,
               "adam_epsilon": 1e-8,
               "max_grad_norm": 1.0,
               "max_steps": 4,
               "warmup_steps": 0,
               "save_steps": 3,
               "no_cuda": False,
               "n_gpu": 1,
               "data_set_name": folder,
               "transformation_name": "wordnet sin tranformation p and h",
               "rho": 0.7,
               "model_name_or_path": "bert",
               "output_dir": "bert_draft",
               "number_of_simulations": 1000,
               "random_state": 42,
               "dgp_seed": 123,
               "fp16": False,
               "fp16_opt_level": "01",
               "device": "cpu",
               "verbose": True,
               "model_type": "bert",
               "pad_on_left": False,
               "pad_token": 0,
               "n_cores": 7,
               'eval_sample_size': 100,
               "pad_token_segment_id": 0,
               "mask_padding_with_zero": True,
               "base_path": "data/{}/cached_".format(folder)}

## Selecting one data by DGP

In [6]:
set_seed(hyperparams["dgp_seed"], 0)
dgp = DGP(train, train_trans, rho=hyperparams["rho"])
train_ = dgp.sample_transform()

## Testing

In [7]:
test_results =  h_test_transformer(df_train=train_,
                                   df_dev=dev_o,
                                   df_dev_t=dev_t,
                                   ModelWrapper=BertWrapper,
                                   hyperparams=hyperparams)

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]
Iteration:   0%|          | 0/29 [00:00<?, ?it/s][A
Iteration:   3%|▎         | 1/29 [00:12<05:48, 12.43s/it][A
Iteration:   7%|▋         | 2/29 [00:24<05:35, 12.42s/it][A

Evaluating:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Evaluating:  50%|█████     | 1/2 [00:05<00:05,  5.77s/it][A[A

Evaluating: 100%|██████████| 2/2 [00:11<00:00,  5.81s/it][A[A


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Evaluating:  50%|█████     | 1/2 [00:05<00:05,  5.81s/it][A[A

Evaluating: 100%|██████████| 2/2 [00:11<00:00,  5.81s/it][A[A

Iteration:  10%|█         | 3/29 [01:01<08:28, 19.55s/it][A
Iteration:  14%|█▍        | 4/29 [01:25<08:55, 21.44s/it][A
Epoch:   0%|          | 0/1 [01:25<?, ?it/s]
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

eval path =  data/snli/cached_test_200


Evaluating: 100%|██████████| 20/20 [01:54<00:00,  5.73s/it]
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

eval path =  data/snli/cached_test_t_200


Evaluating: 100%|██████████| 20/20 [01:54<00:00,  5.73s/it]


In [8]:
test_results

Unnamed: 0,data,model,transformation,rho,dgp_seed,random_state,number_of_simulations,validation_accuracy,transformed_validation_accuracy,observable_t_stats,...,boot_t_991,boot_t_992,boot_t_993,boot_t_994,boot_t_995,boot_t_996,boot_t_997,boot_t_998,boot_t_999,boot_t_1000
0,snli,bert,wordnet sin tranformation p and h,0.7,123,42,1000,0.347959,0.35,-0.471458,...,0.0,-0.471458,-0.894792,-0.774834,1.635219,-1.342875,0.0,2.693193,-0.5346,0.277361


In [12]:
! bash test.sh

test_save (__main__.BasicLrTraining) ... ok
test_train_binary_BOW (__main__.BasicLrTraining) ... ok
test_train_ternary_Tfidf (__main__.BasicLrTraining) ... ok

----------------------------------------------------------------------
Ran 3 tests in 0.204s

OK
test_xgb_training (__main__.BasicXGBTraining) ... ok

----------------------------------------------------------------------
Ran 1 test in 1.528s

OK
test_syn_transformation (__main__.SynTrans) ... ok

----------------------------------------------------------------------
Ran 1 test in 3.667s

OK
test_random_seed (__main__.Htesting) ... ok

----------------------------------------------------------------------
Ran 1 test in 4.753s

OK
test_bert_training (__main__.BasicBertTraining) ... ok

----------------------------------------------------------------------
Ran 1 test in 153.030s

OK
test_bert_h_testing (__main__.BertHTesting) ... ok

----------------------------------------------------------------------
Ran 1 test in 143.853s

OK
