In [1]:
from transformers import BertForSequenceClassification
from lr.models.transformers.processor import clean_df
from lr.text_processing.transformations.wordnet import path_base_transformation
from lr.stats.h_testing import h_test_transformer_trained_model
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from time import time
import shutil
import os
from lr.models.transformers.BertWrapper import BertWrapper


## Load model

In [2]:
save_path = 'bert_p_h_rho_1p00_dgp_seed_185_random_state_27/checkpoint-1500/'
# saved_model = BertForSequenceClassification.from_pretrained(save_path)

## Perform Testing

In [3]:
# Variables

folder = "snli"
result_folder = "results/snli/bert/sin_p_h/"
transformation_name = "wordnet sin tranformation p and h"
n_cores = 7
rho = 1.0
dgp_seed = 13
random_state = 47
name = "rho_{:.2f}_dgp_seed_{}_random_state_{}".format(rho, dgp_seed, random_state)
name = name.replace(".", "p")
output_dir_name = "bert_p_h_" + name

# Data

dev_o = pd.read_csv("data/{}/dev.csv".format(folder))

print("clean dev")
dev_o = clean_df(dev_o, n_cores=n_cores)

# Transformations

train_path_mod = "data/{}/train_p_h_syn_noun.csv".format(folder)
dev_path_mod = "data/{}/dev_p_h_syn_noun.csv".format(folder)

def train_trans(df): return path_base_transformation(df, train_path_mod)
def dev_trans(df): return path_base_transformation(df, dev_path_mod)

print("transform dev")
dev_t = dev_trans(dev_o)

# Hyperparams

hyperparams = {"local_rank": -1,
               "max_seq_length": 200,
               "overwrite_cache": False,
               "num_train_epochs": 1.0,
               "per_gpu_train_batch_size": 32,
               "per_gpu_eval_batch_size": 50,
               "gradient_accumulation_steps": 1,
               "learning_rate": 5e-5,
               "weight_decay": 0.0,
               "adam_epsilon": 1e-8,
               "max_grad_norm": 1.0,
               "max_steps": 1500,
               "warmup_steps": 0,
               "save_steps": 250,
               "no_cuda": False,
               "n_gpu": 1,
               "data_set_name": folder,
               "transformation_name": transformation_name,
               "number_of_simulations": 1000,
               "rho": rho,
               "model_name_or_path": "bert",
               "output_dir": output_dir_name,
               "random_state": random_state,
               "dgp_seed": dgp_seed,
               "fp16": False,
               "fp16_opt_level": "01",
               "device": "cpu",
               "verbose": True,
               "model_type": "bert",
               "pad_on_left": False,
               "pad_token": 0,
               "n_cores": n_cores,
               'eval_sample_size': 200,
               "pad_token_segment_id": 0,
               "mask_padding_with_zero": True,
               "base_path": "data/{}/cached_".format(folder)}


saved_model = BertWrapper(hyperparams)
saved_model.load(save_path)

print("testing")

test_results = h_test_transformer_trained_model(df_dev=dev_o,
                                                df_dev_t=dev_t,
                                                transformer=saved_model,
                                                hyperparams=hyperparams)

clean dev
transform dev
testing
eval path =  data/snli/cached_test_200


Evaluating: 100%|██████████| 197/197 [20:27<00:00,  6.23s/it]


eval path =  data/snli/cached_test_t_200


Evaluating: 100%|██████████| 197/197 [20:28<00:00,  6.24s/it]


In [14]:
from IPython.display import display, HTML 

def show_df(df):
    not_boot_columns = [c for c in df.columns if c.find("boot") == -1]
    display(HTML(df[not_boot_columns].to_html()))
    
test_results.loc[0, "training_time"] = 5.233223
test_results.loc[0, "test_time"] = 5.933914
    
show_df(test_results)

Unnamed: 0,data,model,transformation,rho,dgp_seed,random_state,number_of_simulations,validation_accuracy,transformed_validation_accuracy,accuracy_difference,test_size,standart_error,observable_t_stats,p_value,training_time,test_time
0,snli,bert,wordnet sin tranformation p and h,1.0,13,47,1000,0.837533,0.83103,0.006503,9842,0.257302,2.507234,0.004,5.238566,5.939505


In [18]:
# result_path  = "results/snli/bert/sin_p_h/new/" + name + ".csv"
# test_results.to_csv(result_path, index=False)