In [1]:
import os
import numpy as np
import pandas as pd
from simpletransformers.question_answering import QuestionAnsweringModel

In [2]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [3]:
cuda_device = 4
seed = '12'
model_name = 'bert'
output_name = 'indobert_squad'
model_path = 'outputs/idk-mrc/' + model_name + '/' + output_name + '/' + seed + '/best_model'
do_lower_case = True

In [4]:
train_args = {
    'learning_rate': 2e-5,
    'num_train_epochs': 2,
    'max_seq_length': 512,
    'max_query_length': 128,
    'doc_stride': 128,
    'overwrite_output_dir': True,
    'reprocess_input_data': True,
    'train_batch_size': 16,
    'fp16': True,
    'n_best_size': 20,
    'manual_seed': 42,
    'encoding': 'utf-8',
    'save_eval_checkpoints': False,
    'save_model_every_epoch': False,
    'save_steps': -1,
    'do_lower_case': do_lower_case,
    'adam_epsilon': 1e-8,
    'n_gpu': 1,
    'evaluate_during_training': True,
    'evaluate_during_training_steps': 2000,
    'early_stopping_metric': 'ans_f1',
    'early_stopping_metric_minimize': False,
    'no_cache': True
}
model = QuestionAnsweringModel(model_name, model_path, args=train_args, cuda_device=cuda_device)

In [5]:
def predict_pairs(question_context_pairs):
    to_predict = []
    q_idx = 0
    for q, c in question_context_pairs:
        to_predict.append({
            'context': c,
            'qas': [{
                'question': q,
                'id': str(q_idx)
            }]
        })
        q_idx += 1

    qa_answers, _ = model.predict(to_predict, n_best_size=1)

    preds = []
    for item in qa_answers:
        answer = item['answer'][0]
        if answer == 'empty':
            answer = ''
        preds.append(answer)

    return preds

In [6]:
from checklist.editor import Editor
from checklist.test_suite import TestSuite
from checklist.pred_wrapper import PredictorWrapper
from checklist.test_types import MFT

In [7]:
invert = lambda a: predict_pairs([(x[1], x[0]) for x in a])
new_pp = PredictorWrapper.wrap_predict(invert)

In [8]:
editor = Editor(language='indonesian')

In [9]:
editor.lexicons.keys()

dict_keys(['male', 'female', 'first_name', 'first_pronoun', 'last_name', 'country', 'nationality', 'city', 'religion', 'religion_adj', 'sexual_adj', 'sentiment', 'country_city', 'male_from', 'female_from', 'last_from'])

In [10]:
suite = TestSuite()

In [11]:
def format_squad_with_context(x, pred, conf, label=None, *args, **kwargs):
    c, q = x
    ret = 'C: %s\nQ: %s\n' % (c, q)
    if label is not None:
        ret += 'A: %s\n' % label
    ret += 'P: %s\n' % pred
    return ret

In [12]:
df_summary = pd.DataFrame(columns=['type', 'qtag', 'testcases', 'fails', 'fail_rate', 'example_fails'])

In [13]:
def get_label(test_obj, i):
    if test_obj.labels is None:
        label = None
    else:
        label = test_obj.labels if type(test_obj.labels) not in [list, np.array, np.ndarray] else test_obj.labels[i]
    return label

def get_example_fails(test_obj):
    fails = test_obj.fail_idxs()
    if fails.shape[0] == 0:
        return 'N/A'
    fails = np.random.choice(fails, min(fails.shape[0], 1), replace=False)
    for f in fails:
        d_idx = f if test_obj.run_idxs is None else test_obj.run_idxs[f]
        label = get_label(test_obj, d_idx)
        return format_squad_with_context(test_obj.data[d_idx], test_obj.results.preds[d_idx],
                    test_obj.results.confs[d_idx], label=label)

### Negation

In [14]:
verbs = [
    ('dibuat', 'membuat', 'pembuat'), ('ditulis', 'menulis', 'penulis'), ('dibangun', 'membangun', 'pembangun'),
    ('dikembangkan', 'mengembangkan', 'pengembang'), ('ditemukan', 'menemukan', 'penemu'), ('dirancang', 'merancang', 'perancang'),
    ('dibentuk', 'membentuk', 'pembentuk'), ('didirikan', 'mendirikan', 'pendiri'), ('diadakan', 'mengadakan', 'yang mengadakan'),
    ('dilaksanakan', 'melaksanakan', 'pelaksana')
]
negation = ['tidak', 'bukan']
print(len(verbs))

10


In [15]:
t = editor.template(
    (
        'Wikia {verbs[0]} oleh {first_name}.',
        'Siapa yang {negation} {verbs[1]} Wikia?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in question (Who)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in question',
    'qtag': 'Who',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 773.26it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 317509.77it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [16]:
t = editor.template(
    (
        'Wikia {verbs[0]} oleh {first_name}.',
        'Apa yang {negation} {verbs[0]} {first_name}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in question (What)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in question',
    'qtag': 'What',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 748.81it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 341834.07it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [17]:
t = editor.template(
    (
        'Wikia {verbs[0]} pada 1990.',
        'Kapan Wikia belum {verbs[0]}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in question (When)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in question',
    'qtag': 'When',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 820.36it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 235900.11it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [18]:
t = editor.template(
    (
        'Wikia {verbs[0]} di {country}.',
        'Di mana Wikia {negation} {verbs[0]}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in question (Where)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in question',
    'qtag': 'Where',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 741.51it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 314415.59it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [19]:
t = editor.template(
    (
        'Wikia {verbs[0]} karena kebutuhan mendesak.',
        'Mengapa Wikia tidak {verbs[0]}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in question (Why)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in question',
    'qtag': 'Why',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 698.32it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 326404.98it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [20]:
t = editor.template(
    (
        'Wikia {verbs[0]} selama 3 tahun.',
        'Berapa lama Wikia tidak {verbs[0]}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in question (How)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in question',
    'qtag': 'How',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 726.07it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 375497.22it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [21]:
t = editor.template(
    (
        'Wikia {negation} {verbs[0]} oleh {first_name}.',
        'Siapa yang {verbs[1]} Wikia?'
    ),
    labels='',
    negation=negation,
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in context (Who)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)
stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in context',
    'qtag': 'Who',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 724.73it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 243713.19it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [22]:
t = editor.template(
    (
        'Wikia {negation} {verbs[0]} oleh {first_name}.',
        'Apa yang {verbs[0]} {first_name}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in context (What)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in context',
    'qtag': 'What',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 719.63it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 408006.23it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [23]:
t = editor.template(
    (
        'Wikia belum {verbs[0]} pada 1990.',
        'Kapan Wikia {verbs[0]}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in context (When)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in context',
    'qtag': 'When',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 747.89it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 313475.64it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [24]:
t = editor.template(
    (
        'Wikia {negation} {verbs[0]} di {country}.',
        'Di mana Wikia {verbs[0]}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in context (Where)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in context',
    'qtag': 'Where',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 772.15it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 350987.78it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [25]:
t = editor.template(
    (
        'Wikia tidak {verbs[0]} karena kebutuhan mendesak.',
        'Mengapa Wikia {verbs[0]}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in context (Why)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in context',
    'qtag': 'Why',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 669.06it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 318232.47it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [26]:
t = editor.template(
    (
        'Wikia tidak {verbs[0]} selama 3 tahun.',
        'Berapa lama Wikia {verbs[0]}?'
    ),
    labels='',
    verbs=verbs,
    negation=negation,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Negation - in context (How)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Negation - in context',
    'qtag': 'How',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 759.53it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 351281.74it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


### Antonym

In [27]:
adjs = [
    ('terbesar', 'terkecil'), ('termahal', 'termurah'), ('terbaik', 'terburuk'), ('pertama', 'terakhir'),
    ('tertinggi', 'terendah'), ('berat', 'ringan'), ('paling awal', 'paling akhir'), ('mahal', 'murah')
]
nouns = [
    'hadiah', 'hukuman', 'kado', 'pekerjaan', 'bonus'
]
verbs = [
    ('lahir', 'meninggal'), ('masuk', 'keluar'), ('menikah', 'bercerai'), ('pergi', 'pulang'), ('bekerja', 'menganggur')
]

In [28]:
t = editor.template(
    [(
        '{first_name} mendapatkan {nouns} {adjs[0]}.',
        'Siapa yang mendapatkan {nouns} {adjs[1]}?'
    ),(
        '{first_name} mendapatkan {nouns} {adjs[1]}.',
        'Siapa yang mendapatkan {nouns} {adjs[0]}?'
    )],
    labels=['', ''],
    adjs=adjs,
    nouns=nouns,
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Antonym (Who)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Antonym',
    'qtag': 'Who',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 200 examples


convert squad examples to features: 100%|██████████| 200/200 [00:00<00:00, 840.06it/s]
add example index and unique id: 100%|██████████| 200/200 [00:00<00:00, 396999.91it/s]


Running Prediction:   0%|          | 0/25 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [29]:
t = editor.template(
    [(
        'Wikia adalah organisasi {adjs[0]}.',
        'Apa organisasi {adjs[1]}?'
    ),(
        'Wikia adalah organisasi {adjs[1]}.',
        'Apa organisasi {adjs[0]}?'
    )],
    labels=['', ''],
    adjs=adjs,
    nouns=nouns,
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Antonym (What)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Antonym',
    'qtag': 'What',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 200 examples


convert squad examples to features: 100%|██████████| 200/200 [00:00<00:00, 873.45it/s]
add example index and unique id: 100%|██████████| 200/200 [00:00<00:00, 499619.30it/s]


Running Prediction:   0%|          | 0/25 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [30]:
t = editor.template(
    [(
        'Organisasi {adjs[0]} didirikan tahun 2001.',
        'Kapan organisasi {adjs[1]} didirikan?'
    ),(
        'Organisasi {adjs[1]} didirikan tahun 2001.',
        'Kapan organisasi {adjs[0]} didirikan?'
    )],
    labels=['', ''],
    adjs=adjs,
    nouns=nouns,
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Antonym (When)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Antonym',
    'qtag': 'When',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 200 examples


convert squad examples to features: 100%|██████████| 200/200 [00:00<00:00, 788.67it/s]
add example index and unique id: 100%|██████████| 200/200 [00:00<00:00, 644781.55it/s]


Running Prediction:   0%|          | 0/25 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [31]:
t = editor.template(
    [(
        'Organisasi {adjs[0]} didirikan di {country}.',
        'Di mana organisasi {adjs[1]} didirikan?'
    ),(
        'Organisasi {adjs[1]} didirikan di {country}.',
        'Di mana organisasi {adjs[0]} didirikan?'
    )],
    labels=['', ''],
    adjs=adjs,
    nouns=nouns,
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Antonym (Where)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Antonym',
    'qtag': 'Where',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 200 examples


convert squad examples to features: 100%|██████████| 200/200 [00:00<00:00, 777.41it/s]
add example index and unique id: 100%|██████████| 200/200 [00:00<00:00, 380954.04it/s]


Running Prediction:   0%|          | 0/25 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [32]:
t = editor.template(
    [(
        'Organisasi {adjs[0]} didirikan karena kebutuhan.',
        'Mengapa organisasi {adjs[1]} didirikan?'
    ),(
        'Organisasi {adjs[1]} didirikan karena kebutuhan.',
        'Mengapa organisasi {adjs[0]} didirikan?'
    )],
    labels=['', ''],
    adjs=adjs,
    nouns=nouns,
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Antonym (Why)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Antonym',
    'qtag': 'Why',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 200 examples


convert squad examples to features: 100%|██████████| 200/200 [00:00<00:00, 770.03it/s]
add example index and unique id: 100%|██████████| 200/200 [00:00<00:00, 417136.15it/s]


Running Prediction:   0%|          | 0/25 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [33]:
t = editor.template(
    [(
        'Organisasi {adjs[0]} dibekukan 2 kali.',
        'Berapa kali organisasi {adjs[1]} dibekukan?'
    ),(
        'Organisasi {adjs[1]} dibekukan 2 kali.',
        'Berapa kali organisasi {adjs[0]} dibekukan?'
    )],
    labels=['', ''],
    adjs=adjs,
    nouns=nouns,
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Antonym (How)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Antonym',
    'qtag': 'How',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 200 examples


convert squad examples to features: 100%|██████████| 200/200 [00:00<00:00, 815.16it/s]
add example index and unique id: 100%|██████████| 200/200 [00:00<00:00, 401560.94it/s]


Running Prediction:   0%|          | 0/25 [00:00<?, ?it/s]

  df_summary = df_summary.append({


### Entity Swap

In [34]:
nouns_who = [
    'presiden', 'wakil presiden', 'perdana menteri', 'politikus', 'pahlawan', 'duta besar', 'perwakilan', 
    'menteri pendidikan', 'menteri luar negeri', 'menteri dalam negeri', 'menteri perdagangan', 'meteri keuangan',
    'mantan presiden', 'mantan wakil presiden', 'menteri olahraga', 'budayawan', 'menteri agama', 'ketua parlemen',
    'senat', 'penyanyi dari', 'penyanyi asal', 'model asal', 'pelukis asal', 'seniman asal', 'pebulu tangkis asal',
    'olahragawan asal', 'peneliti dari', 'guru besar asal', 'peraih nobel asal', 'petani dari', 'penari dari',
    'model kelahiran', 'model asal', 'model dari', 'tokoh agama dari', 'tokoh agama asal', 'artis asal',
    'selebriti kelahiran', 'selebriti asal', 'selebriti dari', 'sutradara asal', 'penulis asal', 'penulis asal',
    'tokoh asal', 'tokoh dari', 'pengusaha asal', 'pengusaha dari', 'ulama asal', 'pejabat asal', 'warga', 'atlet',
    'atlet asal', 'menteri asal', 'gubernur asal', 'walikota asal', 'penulis dari', 'perwakilan dari', 'wakil dari',
    'calon presiden', 'calon wakil presiden', 'kandidat dari'
]
nouns = [
    'mobil', 'kendaraan', 'motor', 'sepeda', 'ponsel', 'bus', 'truk', 'sepatu', 'jaket', 'pakaian',
    'komputer', 'keyboard', 'cat tembok', 'alat dapur', 'alat masak', 'jam tangan', 'film', 'lagu',
    'novel', 'buku', 'serial', 'pesawat', 'komik', 'game', 'software', 'mesin', 'drama',
    'jam', 'permainan', 'robot', 'sepeda', 'tekstil', 'aplikasi'
]

In [35]:
t = editor.template(
    (
        '{first_name} adalah {nouns_who} {country1}.',
        'Siapa {nouns_who} {country2}?'
    ),
    labels='',
    nouns_who=nouns_who,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Entity swap (Who)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Entity swap',
    'qtag': 'Who',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 836.04it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 260515.78it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [36]:
t = editor.template(
    (
        'Hibria adalah {nouns} produksi {country1}.',
        'Apa nama {nouns} produksi {country2}?'
    ),
    labels='',
    nouns=nouns,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Entity swap (What)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Entity swap',
    'qtag': 'What',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 99 examples


convert squad examples to features: 100%|██████████| 99/99 [00:00<00:00, 673.53it/s]
add example index and unique id: 100%|██████████| 99/99 [00:00<00:00, 301551.27it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [37]:
t = editor.template(
    (
        '{first_name1} lahir pada tahun 1995.',
        'Kapan {first_name2} lahir?'
    ),
    labels='',
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Entity swap (When)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Entity swap',
    'qtag': 'When',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 763.33it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 334474.00it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [38]:
t = editor.template(
    (
        '{city_skr} terletak di Korea Selatan.',
        'Di mana letak {city_nkr}?'
    ),
    labels='',
    nouns=nouns,
    city_skr=editor.lexicons.country_city['South_Korea'],
    city_nkr=editor.lexicons.country_city['North_Korea'],
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Entity swap (Where)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Entity swap',
    'qtag': 'Where',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 708.49it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 313475.64it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [39]:
t = editor.template(
    (
        '{first_name1} sukses karena rajin bekerja.',
        'Mengapa {first_name2} sukses?'
    ),
    labels='',
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Entity swap (Why)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Entity swap',
    'qtag': 'Why',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 762.19it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 413231.92it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [40]:
t = editor.template(
    (
        'Penduduk {country1} berjumlah {population} juta.',
        'Berapa jumlah penduduk {country2}?'
    ),
    labels='',
    population=[str(i) for i in range(10,80)],
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Entity swap (How)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Entity swap',
    'qtag': 'How',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 692.00it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 252516.80it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


### Question Tag Swap

In [41]:
verbs = [
    'lahir', 'meninggal', 'lulus', 'berperang', 'menikah', 'bersekolah', 'bertunangan'
]
verbs_who = [
    ('dibuat', 'membuat', 'pembuat'), ('ditulis', 'menulis', 'penulis'), ('dibangun', 'membangun', 'pembangun'),
    ('dikembangkan', 'mengembangkan', 'pengembang'), ('ditemukan', 'menemukan', 'penemu'), ('dirancang', 'merancang', 'perancang'),
    ('dibentuk', 'membentuk', 'pembentuk'), ('didirikan', 'mendirikan', 'pendiri'), ('diadakan', 'mengadakan', 'yang mengadakan'),
    ('dilaksanakan', 'melaksanakan', 'pelaksana')
]

In [42]:
t = editor.template(
    [(
        'Wikia {verbs_who[0]} pada Agustus 1990.',
        'Siapa yang {verbs_who[1]} Wikia?'
    ), (
        'Wikia {verbs_who[0]} di {country}.',
        'Siapa yang {verbs_who[1]} Wikia?'
    )],
    labels=['', ''],
    verbs_who=verbs_who,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Q tag (Who)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Q tag',
    'qtag': 'Who',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 200 examples


convert squad examples to features: 100%|██████████| 200/200 [00:00<00:00, 824.48it/s]
add example index and unique id: 100%|██████████| 200/200 [00:00<00:00, 344642.89it/s]


Running Prediction:   0%|          | 0/25 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [43]:
t = editor.template(
    (
        '{country} memiliki 3 jenis lembaga pemerintahan.',
        'Apa saja jenis lembaga pemerintahan {country}?'
    ),
    labels='',
    verbs_who=verbs_who,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Q tag (What)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Q tag',
    'qtag': 'What',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 644.92it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 274316.81it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [44]:
t = editor.template(
    (
        '{first_name} {verbs} di {country}.',
        'Kapan {first_name} {verbs}?'
    ),
    labels='',
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Q tag (When)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Q tag',
    'qtag': 'When',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 746.84it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 287084.46it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [45]:
t = editor.template(
    (
        '{first_name} {verbs} pada tahun 1972.',
        'Di mana {first_name} {verbs}?'
    ),
    labels='',
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Q tag (Where)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Q tag',
    'qtag': 'Where',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 714.38it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 317750.30it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [46]:
t = editor.template(
    [(
        'Wikia {verbs_who[0]} pada Agustus 1990.',
        'Mengapa Wikia {verbs_who[0]}?'
    ), (
        'Wikia {verbs_who[0]} di {country}.',
        'Mengapa Wikia {verbs_who[0]}?'
    )],
    labels=['', ''],
    verbs_who=verbs_who,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Q tag (Why)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Q tag',
    'qtag': 'Why',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 200 examples


convert squad examples to features: 100%|██████████| 200/200 [00:00<00:00, 840.17it/s]
add example index and unique id: 100%|██████████| 200/200 [00:00<00:00, 503215.84it/s]


Running Prediction:   0%|          | 0/25 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [47]:
t = editor.template(
    (
        'Wikia {verbs_who[0]} di {country}.',
        'Berapa lama Wikia {verbs_who[0]}?'
    ),
    labels='',
    verbs_who=verbs_who,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Q tag (How)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Q tag',
    'qtag': 'How',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 723.97it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 373823.89it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


### Unavailable Condition

In [48]:
nouns_who = [
    'presiden', 'wakil presiden', 'perdana menteri', 'politikus', 'pahlawan', 'duta besar', 'perwakilan', 
    'menteri pendidikan', 'menteri luar negeri', 'menteri dalam negeri', 'menteri perdagangan', 'meteri keuangan',
    'mantan presiden', 'mantan wakil presiden', 'menteri olahraga', 'budayawan', 'menteri agama', 'ketua parlemen',
    'senat', 'penyanyi dari', 'penyanyi asal', 'model asal', 'pelukis asal', 'seniman asal', 'pebulu tangkis asal',
    'olahragawan asal', 'peneliti dari', 'guru besar asal', 'peraih nobel asal', 'petani dari', 'penari dari',
    'model kelahiran', 'model asal', 'model dari', 'tokoh agama dari', 'tokoh agama asal', 'artis asal',
    'selebriti kelahiran', 'selebriti asal', 'selebriti dari', 'sutradara asal', 'penulis asal', 'penulis asal',
    'tokoh asal', 'tokoh dari', 'pengusaha asal', 'pengusaha dari', 'ulama asal', 'pejabat asal', 'warga', 'atlet',
    'atlet asal', 'menteri asal', 'gubernur asal', 'walikota asal', 'penulis dari', 'perwakilan dari', 'wakil dari',
    'calon presiden', 'calon wakil presiden', 'kandidat dari'
]
nouns = [
    'mobil', 'kendaraan', 'motor', 'sepeda', 'ponsel', 'bus', 'truk', 'sepatu', 'jaket', 'pakaian',
    'komputer', 'keyboard', 'cat tembok', 'alat dapur', 'alat masak', 'jam tangan', 'film', 'lagu',
    'novel', 'buku', 'serial', 'pesawat', 'komik', 'game', 'software', 'mesin', 'drama',
    'jam', 'permainan', 'robot', 'sepeda', 'tekstil', 'aplikasi'
]
conds_who = [
    'yang telah pensiun', 'paling terkenal', 'paling terpuruk', 'yang paling kaya raya', 'paling berprestasi',
    'yang kurang terkenal', 'yang kurang kaya raya', 'yang kurang berprestasi', 'yang baik', 'yang cerdas', 'yang jujur',
    'yang adil', 'yang beruntung', 'yang sederhana', 'yang kaya', 'yang terpercaya', 'yang cakap', 'yang peduli',
    'yang tegas', 'yang pintar', 'yang terkenal', 'yang terpuruk', 'yang berprestasi', 'yang telah menikah', 'yang telah meninggal'
]
conds = [
    'yang paling terkenal', 'yang kurang terkenal', 'yang terkenal', 'yang berkualitas', 'yang mahal', 'yang murah',
    'yang terfavorit', 'yang banyak disenangi', 'yang paling laku', 'yang kurang laku', 'yang kurang disenangi'
]

In [49]:
t = editor.template(
    (
        '{first_name} adalah {nouns_who} {country}.',
        'Siapa {nouns_who} {country} {conds_who}?'
    ),
    labels='',
    nouns_who=nouns_who,
    conds_who=conds_who,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Unavail (Who)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Unavail',
    'qtag': 'Who',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 658.28it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 206921.76it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [50]:
t = editor.template(
    (
        'Hibria adalah {nouns} produksi {country}.',
        'Apa {nouns} produksi {country} {conds}?'
    ),
    labels='',
    nouns=nouns,
    conds=conds,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Unavail (What)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Unavail',
    'qtag': 'What',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 734.95it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 392725.09it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [51]:
t = editor.template(
    (
        '{first_name} merilis {nouns} tahun 2000.',
        'Kapan {first_name} merilis {nouns} {conds}?'
    ),
    labels='',
    nouns=nouns,
    conds=conds,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Unavail (When)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Unavail',
    'qtag': 'When',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 650.16it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 189787.51it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [52]:
t = editor.template(
    (
        '{first_name} merilis {nouns} di {country}.',
        'Di mana {first_name} merilis {nouns} {conds}?'
    ),
    labels='',
    nouns=nouns,
    conds=conds,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Unavail (Where)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Unavail',
    'qtag': 'Where',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 641.69it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 325139.84it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [53]:
t = editor.template(
    (
        '{first_name} merilis {nouns} karena kebutuhan.',
        'Mengapa {first_name} merilis {nouns} {conds}?'
    ),
    labels='',
    nouns=nouns,
    conds=conds,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Unavail (Why)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Unavail',
    'qtag': 'Why',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 665.54it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 349234.30it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [54]:
t = editor.template(
    (
        '{first_name} merilis {nouns} sebanyak {num} kali.',
        'Berapa kali {first_name} merilis {nouns} {conds}?'
    ),
    labels='',
    nouns=nouns,
    conds=conds,
    num=list(range(1,10)),
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Unavail (How)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Unavail',
    'qtag': 'How',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 753.93it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 345779.39it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


### Other

In [55]:
nouns_who = [
    ('teman baik', 'teman kecil'), ('penggemar', 'teman'), ('sahabat', 'teman kecil'), ('penggemar', 'atasan'),
    ('murid', 'teman'), ('guru', 'ayah'), ('guru', 'bos'), ('karyawan', 'bawahan'), ('kolega', 'sahabat'),
    ('sepupu', 'adik'), ('sepupu', 'kakak'), ('ayah', 'adik'), ('ibu', 'adik'), ('ayah', 'kakak'), ('ayah', 'kakak'),
    ('kembaran', 'ibu'), ('kembaran', 'guru'), ('penggemar', 'bos'), ('teman baik', 'ayah'), ('teman baik', 'ibu'),
    ('teman baik', 'paman'), ('teman baik', 'bibi'), ('teman baik', 'kakak'), ('teman baik', 'saudara'), ('teman baik', 'adik'),
    ('pasangan', 'kolega'), ('rekan kerja', 'paman'), ('rekan kerja', 'ayah')
]
verbs = [
    ('lahir', 'lulus'), ('lahir', 'dibebaskan'), ('lahir', 'menikah'), ('lahir', 'bersekolah'), ('lahir', 'bekerja'),
    ('lahir', 'masuk kuliah'), ('lahir', 'dilantik'), ('lahir', 'naik jabatan'), ('meninggal', 'lulus'), 
    ('meninggal', 'dibebaskan'), ('meninggal', 'menikah'), ('meninggal', 'bersekolah'), ('meninggal', 'bekerja'),
    ('meninggal', 'masuk kuliah'), ('meninggal', 'dilantik'), ('meninggal', 'naik jabatan'), ('lulus', 'dibebaskan'), 
    ('lulus', 'menikah'), ('lulus', 'bekerja'), ('lulus', 'dilantik'), ('lulus', 'naik jabatan'), ('menikah', 'dibebaskan'),
    ('menikah', 'bersekolah'), ('menikah', 'bekerja'), ('menikah', 'masuk kuliah'), ('menikah', 'dilantik'), ('menikah', 'naik jabatan')
]
nouns = [
    ('buku', 'karya ilmiah'), ('lagu', 'karya ilmiah'), ('lukisan', 'karya ilmiah'), ('puisi', 'karya_ilmiah'),
    ('cerpen', 'karya ilmiah'), ('novel', 'karya ilmiah'), ('buku', 'lagu'), ('lukisan', 'lagu'), ('puisi', 'lagu'),
    ('cerpen', 'lagu'), ('novel', 'lagu'), ('lukisan', 'buku'), ('puisi', 'buku'), ('cerpen', 'buku'), ('puisi', 'cerpen'),
    ('puisi', 'novel')
]

In [56]:
t = editor.template(
    (
        '{first_name1} adalah {nouns_who[0]} {first_name2}.',
        'Siapa {nouns_who[1]} {first_name2}?'
    ),
    labels='',
    nouns_who=nouns_who,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Other (Who)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Other',
    'qtag': 'Who',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 97 examples


convert squad examples to features: 100%|██████████| 97/97 [00:00<00:00, 751.81it/s]
add example index and unique id: 100%|██████████| 97/97 [00:00<00:00, 546104.01it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [57]:
t = editor.template(
    (
        'Wikia adalah {nouns[0]} yang dibuat oleh {first_name}.',
        'Apa {nouns[1]} yang dibuat oleh {first_name}?'
    ),
    labels='',
    nouns=nouns,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Other (What)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Other',
    'qtag': 'What',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 582.45it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 177724.75it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [58]:
t = editor.template(
    (
        '{first_name} {verbs[0]} pada tahun 1980.',
        'Kapan {first_name} {verbs[1]}?'
    ),
    labels='',
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Other (When)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Other',
    'qtag': 'When',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 687.53it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 304155.47it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [59]:
t = editor.template(
    (
        '{first_name} {verbs[0]} di {country}.',
        'Di mana {first_name} {verbs[1]}?'
    ),
    labels='',
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Other (Where)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Other',
    'qtag': 'Where',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 734.69it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 344077.44it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [60]:
t = editor.template(
    (
        '{first_name} {verbs[0]} karena kebutuhan.',
        'Mengapa {first_name} {verbs[1]}?'
    ),
    labels='',
    verbs=verbs,
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Other (Why)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Other',
    'qtag': 'Why',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 702.23it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 362829.07it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [61]:
t = editor.template(
    (
        '{first_name} merilis {nouns[0]} sebanyak {num} kali.',
        'Berapa kali {first_name} merilis {nouns[1]}?'
    ),
    labels='',
    nouns=nouns,
    num=list(range(1,10)),
    remove_duplicates=True,
    nsamples=100,
    save=True
)
name = 'Other (How)'
test = MFT(**t, name=name, description='', capability='Unanswerability')
test.run(new_pp, overwrite=True)
suite.add(test)

stats = test.get_stats()
df_summary = df_summary.append({
    'type': 'Other',
    'qtag': 'How',
    'testcases': stats['testcases'],
    'fails': stats['fails'],
    'fail_rate': stats['fail_rate'],
    'example_fails': get_example_fails(test)
}, ignore_index=True)

Predicting 100 examples


convert squad examples to features: 100%|██████████| 100/100 [00:00<00:00, 819.81it/s]
add example index and unique id: 100%|██████████| 100/100 [00:00<00:00, 179014.26it/s]


Running Prediction:   0%|          | 0/13 [00:00<?, ?it/s]

  df_summary = df_summary.append({


In [62]:
model_path

'outputs/idk-mrc/bert/indobert_squad/12/best_model'

In [63]:
# suite.summary()

In [64]:
df_summary

Unnamed: 0,type,qtag,testcases,fails,fail_rate,example_fails
0,Negation - in question,Who,100,0,0.0,
1,Negation - in question,What,100,1,1.0,C: Wikia dilaksanakan oleh Ferry.\nQ: Apa yang...
2,Negation - in question,When,100,0,0.0,
3,Negation - in question,Where,100,0,0.0,
4,Negation - in question,Why,100,0,0.0,
5,Negation - in question,How,100,0,0.0,
6,Negation - in context,Who,100,28,28.0,C: Wikia tidak ditulis oleh Tiara.\nQ: Siapa y...
7,Negation - in context,What,100,0,0.0,
8,Negation - in context,When,100,100,100.0,C: Wikia belum dirancang pada 1990.\nQ: Kapan ...
9,Negation - in context,Where,100,66,66.0,C: Wikia bukan dirancang di Guyana.\nQ: Di man...


In [65]:
df_summary.to_csv('checklist_result/' + output_name + '_' + seed + '.csv', index=False)