In [6]:
from lr.models.transformers.processor import clean_df
from lr.models.transformers.train_functions import set_seed
from lr.models.transformers.BertWrapper import BertWrapper
from lr.text_processing.transformations.wordnet import path_base_transformation

from lr.stats.h_testing import *
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from time import time
import shutil
import os

## Data

In [3]:
train = pd.read_csv("data/snli/train.csv")
dev_o = pd.read_csv("data/snli/dev.csv")
train = train.head(1000)
dev_o = dev_o.head(1000)

train = clean_df(train, n_cores=8)
dev_o = clean_df(dev_o, n_cores=8)


## Transformations

In [19]:
train_path_mod = "data/snli/train_p_h_syn_noun.csv"
dev_path_mod = "data/snli/dev_p_h_syn_noun.csv"

def train_trans(df): return path_base_transformation(df, train_path_mod)

def dev_trans(df): return path_base_transformation(df, dev_path_mod)

## Val df transformation

In [7]:
dev_t = dev_trans(dev_o)

## Hyperparams

In [23]:
folder = "snli"

hyperparams = {"local_rank": -1,
               "max_seq_length": 200,
               "overwrite_cache": False,
               "num_train_epochs": 1.0,
               "per_gpu_train_batch_size": 32,
               "per_gpu_eval_batch_size": 50,
               "gradient_accumulation_steps": 1,
               "learning_rate": 5e-5,
               "weight_decay": 0.0,
               "adam_epsilon": 1e-8,
               "max_grad_norm": 1.0,
               "max_steps": 10,
               "warmup_steps": 0,
               "save_steps": 3,
               "no_cuda": False,
               "n_gpu": 1,
               "model_name_or_path": "bert",
               "output_dir": "bert_draft",
               "random_state": 42,
               "fp16": False,
               "fp16_opt_level": "01",
               "device": "cpu",
               "verbose": True,
               "model_type": "bert",
               "pad_on_left": False,
               "pad_token": 0,
               "n_cores": 7,
               'eval_sample_size': 100,
               "pad_token_segment_id": 0,
               "mask_padding_with_zero": True,
               "base_path": "data/{}/cached_".format(folder)}

## Selecting one data by DGP

In [21]:
set_seed(hyperparams["random_state"], 0)
dgp = DGP(train, transformation, rho=0.3)
train_ = dgp.sample()

## Testing

In [24]:
test_results =  h_test_transformer(df_train=train_,
                                   df_dev=dev_o,
                                   df_dev_t=dev_t,
                                   ModelWrapper=BertWrapper,
                                   hyperparams=hyperparams,
                                   S=1000)

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]
Iteration:   0%|          | 0/29 [00:00<?, ?it/s][A
Iteration:   3%|▎         | 1/29 [00:12<05:48, 12.44s/it][A
Iteration:   7%|▋         | 2/29 [00:24<05:35, 12.42s/it][A

Evaluating:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Evaluating:  50%|█████     | 1/2 [00:05<00:05,  5.77s/it][A[A

Evaluating: 100%|██████████| 2/2 [00:11<00:00,  5.79s/it][A[A


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Evaluating:  50%|█████     | 1/2 [00:05<00:05,  5.82s/it][A[A

Evaluating: 100%|██████████| 2/2 [00:11<00:00,  5.82s/it][A[A

Iteration:  10%|█         | 3/29 [01:00<08:28, 19.54s/it][A
Iteration:  14%|█▍        | 4/29 [01:13<07:14, 17.38s/it][A
Iteration:  17%|█▋        | 5/29 [01:25<06:20, 15.87s/it][A

Evaluating:   0%|          | 0/2 [00:00<?, ?it/s][A[A

Evaluating:  50%|█████     | 1/2 [00:05<00:05,  5.78s/it][A[A

Evaluating: 100%|██████████| 2/2 [00:11<00:00,  5.80s/it][A[A


Evaluating:   0%|          | 0/2

eval path =  data/snli/cached_test_200


Evaluating: 100%|██████████| 20/20 [01:54<00:00,  5.74s/it]
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

eval path =  data/snli/cached_test_t_200


Evaluating: 100%|██████████| 20/20 [01:54<00:00,  5.75s/it]


In [25]:
test_results

Unnamed: 0,validation_accuracy,transformed_validation_accuracy,observable_t_stats,p_value,training_time,test_time,boot_t_1,boot_t_2,boot_t_3,boot_t_4,...,boot_t_991,boot_t_992,boot_t_993,boot_t_994,boot_t_995,boot_t_996,boot_t_997,boot_t_998,boot_t_999,boot_t_1000
0,0.346939,0.345918,0.218223,0.808,207.458956,450.076278,-1.14785,1.091753,-1.177529,-2.069247,...,-0.471458,0.208519,-0.277361,-0.208519,-0.242543,-0.894792,2.069247,0.0,0.943237,-0.727803


In [None]:
test_results