In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import torch
import json

In [2]:

tokenizer = AutoTokenizer.from_pretrained(
    "microsoft/tapex-large-finetuned-tabfact"
)
model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/tapex-large-finetuned-tabfact"
)
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

You passed `num_labels=3` which is incompatible to the `id2label` map of length `2`.


pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

BartForSequenceClassification(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50265, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
       

In [3]:
print(model.config.id2label)


{0: 'Refused', 1: 'Entailed'}


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model.to(device)

Using device: cuda


BartForSequenceClassification(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50265, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
       

In [11]:
def table_str_to_df(table_str):
    rows = [r.split("#") for r in table_str.strip().split("\n")]
    header, data = rows[0], rows[1:]
    return pd.DataFrame(data, columns=header)

def tapex_predict(table_str, statement):
    table_df = table_str_to_df(table_str)

    encoding = tokenizer(
        table=table_df,
        query=statement,
        return_tensors="pt",
        truncation=True
    )

    encoding = {k: v.to(device) for k, v in encoding.items()}

    with torch.no_grad():
        outputs = model(**encoding)

    pred_idx = outputs.logits.argmax(dim=-1).item()
    pred_label = model.config.id2label[pred_idx]

    return 1 if pred_label == "Entailed" else 0

In [12]:
adversarial_dataset_path = "/content/test_examples_with_csv_adversarial_flattened.json"
dataset_path = "/content/test_examples_with_csv_flattened.json"
paraphrased_dataset_path = "/content/test_examples_with_csv_paraphrased_flattened.json"

dataset_paths = {'original': dataset_path, 'adversarial': adversarial_dataset_path, 'paraphrased': paraphrased_dataset_path}

max_runs = -1 # -1 to run all

In [17]:
results = {'original': {}, 'adversarial': {}, 'paraphrased': {}}

In [18]:
for mode, path in dataset_paths.items():

  dataset = json.load(open(path))

  correct = 0
  total = 0

  n=0

  for ex in dataset:
      if n==max_runs:
          break
      n+=1
      pred = tapex_predict(ex["table"], ex["statement"])
      gold = ex["label"]

      # print(f"Statement: {ex['statement']}")
      # print(f"Predicted: {pred}, Gold: {gold}\n")

      correct += (pred == gold)
      total += 1

      if n%100==0:
        print(f"Mode: {mode}, Done: {n}/{len(dataset)}")

      if n%1000==0:
        print(f"Accuracy: {correct/total*100:.2f}%")

  results[mode]["correct"] = correct
  results[mode]["total"] = total
  results[mode]["accuracy"] = correct/total*100

  print(f"Mode: {mode}, Final Accuracy: {correct/total*100:.2f}%")


Mode: original, Done: 100/12779
Mode: original, Done: 200/12779
Mode: original, Done: 300/12779
Mode: original, Done: 400/12779
Mode: original, Done: 500/12779
Mode: original, Done: 600/12779
Mode: original, Done: 700/12779
Mode: original, Done: 800/12779
Mode: original, Done: 900/12779
Mode: original, Done: 1000/12779
Accuracy: 76.30%
Mode: original, Done: 1100/12779
Mode: original, Done: 1200/12779
Mode: original, Done: 1300/12779
Mode: original, Done: 1400/12779
Mode: original, Done: 1500/12779
Mode: original, Done: 1600/12779
Mode: original, Done: 1700/12779
Mode: original, Done: 1800/12779
Mode: original, Done: 1900/12779
Mode: original, Done: 2000/12779
Accuracy: 77.90%
Mode: original, Done: 2100/12779
Mode: original, Done: 2200/12779
Mode: original, Done: 2300/12779
Mode: original, Done: 2400/12779
Mode: original, Done: 2500/12779
Mode: original, Done: 2600/12779
Mode: original, Done: 2700/12779
Mode: original, Done: 2800/12779
Mode: original, Done: 2900/12779
Mode: original, Do

In [19]:
print(results)

{'original': {'correct': 10662, 'total': 12779, 'accuracy': 83.43375851005555}, 'adversarial': {'correct': 6615, 'total': 10928, 'accuracy': 60.53257686676427}, 'paraphrased': {'correct': 10063, 'total': 12779, 'accuracy': 78.74638078096878}}


In [20]:
import pprint
pprint.pprint(results)

{'adversarial': {'accuracy': 60.53257686676427,
                 'correct': 6615,
                 'total': 10928},
 'original': {'accuracy': 83.43375851005555, 'correct': 10662, 'total': 12779},
 'paraphrased': {'accuracy': 78.74638078096878,
                 'correct': 10063,
                 'total': 12779}}
