In [34]:
from tango import Workspace
workspace = Workspace.from_url("local:///root/workspace/read/temp/training")
result = workspace.step_result_for_run("seed_infotab_pipeline", "evaluation")

In [35]:
result["result"]

{'document_retrieval': {'accuracy': 0.8199999928474426,
  'recall': 0.8199999928474426,
  'precision': 0.25049999356269836,
  'mrr': 0.44466668367385864},
 'sentence_selection': {'accuracy': 0.0,
  'recall': 0.0,
  'precision': 0.0,
  'mrr': 0.0},
 'table_verification': {'accuracy': 0.6499999761581421,
  'f1': 0.1860465109348297,
  'precision': 0.23529411852359772,
  'recall': 0.1538461595773697}}

In [37]:
result["failed_cases"]["document_retrieval"][0]

[{'sentence': "Jujutsu didn't originate in North America.",
  'table': [{'title': ['Jujutsu'],
    'Also known as': ['Jiu-Jitsu',
     'Jiujitsu',
     'Ju-Jitsu',
     'Jujitsu',
     'Ju-Jutsu'],
    'Focus': ['Hybrid'],
    'Famous practitioners': ['Kano Jigoro',
     'Seishiro Okazaki',
     'Takeda Sokaku',
     'Morihei Ueshiba',
     'Wally Jay',
     'Gokor Chivichyan',
     'Gene LeBell',
     'Lee Hasdell',
     'Carlos Newton',
     'Benny Urquidez',
     'Tim Kennedy',
     'Shonie Carter',
     'Bill Underwood',
     'Harold Howard',
     'Michel van Rijt',
     'Moshe Feldenkrais',
     'Dong Jin Kim',
     'Mikinosuke Kawaishi',
     'Jean-Yves Theriault',
     'Alexis Davis',
     'Stephen Thompson',
     'Serge Mol',
     'Mikhail Zayats',
     'Matt Larsen'],
    'Parenthood': ['Various Japanese martial arts'],
    'Descendant arts': ['Judo',
     'Sambo',
     'Brazilian jiu-jitsu (via Judo)',
     'Aikido, Hapkido',
     'Catch Wrestling',
     'Bartitsu',
     'Ger

In [3]:
import torch
import json
from tango import Step
from tango.common.dataset_dict import DatasetDict
import pandas as pd
from transformers import TapasTokenizer

class TableDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer):
        self.df = df
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        ex_id = idx % 2
        idx = idx // 2
        item = self.df.iloc[idx]
        table = pd.DataFrame(json.loads(item["table"]))
        cells = zip(*item["highlighted_cells"])
        cells = [list(x) for x in cells]
        sub_table = table.iloc[cells[0], cells[1]].reset_index().astype(str)

        if ex_id == 0:
            encoding = self.tokenizer(
                table=sub_table,
                queries=item["positive"],
                padding="max_length",
                truncation=True,
                max_length=512,
                return_tensors="pt",
            )
            encoding["labels"] = torch.tensor([1])
        else:
            encoding = self.tokenizer(
                table=sub_table,
                queries=item["negative"],
                padding="max_length",
                truncation=True,
                max_length=512,
                return_tensors="pt",
            )
            encoding["labels"] = torch.tensor([0])

        encoding = {key: val[-1] for key, val in encoding.items()}
        return encoding

    def __len__(self):
        return len(self.df)

In [7]:
tokenizer = TapasTokenizer.from_pretrained("google/tapas-base", max_question_length=256)
torch.manual_seed(1)
dev_df = pd.read_json("../temp/seed/sent_selection/data/dev.jsonl", lines=True)

dev_dataset = TableDataset(dev_df, tokenizer)

In [17]:
from torch.utils.data import DataLoader
import accelerate

import evaluate
name2metrics = {
    "accuracy": evaluate.load("accuracy"),
    "precision": evaluate.load("precision"),
    "recall": evaluate.load("recall"),
    "f1": evaluate.load("f1"),
}

dataloader = DataLoader(dev_dataset, batch_size=8, shuffle=False)
accelerator = accelerate.Accelerator()

print(type(model))

model, dataloader = accelerator.prepare(model, dataloader)

for batch in dataloader:
    y_hat = model(**batch)
    preds = y_hat.logits.argmax(dim=1)
    for metric in name2metrics.values():
        metric.add_batch(predictions=preds, references=batch["labels"])



<class 'transformers.models.tapas.modeling_tapas.TapasForSequenceClassification'>


In [18]:
for name, metric in name2metrics.items():
    print(name, metric.compute())

accuracy {'accuracy': 0.9653325817361894}
precision {'precision': 0.97524467472654}
recall {'recall': 0.9549041713641488}
f1 {'f1': 0.9649672457989177}
