In [1]:
!export CUDA_VISIBLE_DEVICES=2

In [2]:
import argparse
import glob
import logging
import os
import tqdm
import random

import numpy as np
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange

from hans_processors import HansProcessor
from hans_processors import hans_convert_examples_to_features as convert_examples_to_features
from transformers import (
    WEIGHTS_NAME,
    AutoModelForSequenceClassification,
    AutoTokenizer,
)

In [3]:
from evaluate import create_eval_loader, predict, evaluate

In [4]:

model_path = "checkpoints/finetuned/"
device = torch.device("cuda:2")
hans_data_dir = "../../data/hans/"

# Other settings
max_seq_length = 128
batch_size = 256
model_type = "bert"

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_path)


In [6]:
eval_dataloader, label_list = create_eval_loader(data_dir=hans_data_dir, 
                                                 max_seq_length=max_seq_length, 
                                                 model_type=model_type, 
                                                 model_name_or_path=model_path, 
                                                 tokenizer=tokenizer, 
                                                 overwrite_cache=False, 
                                                 eval_batch_size=batch_size)

In [7]:
model = AutoModelForSequenceClassification.from_pretrained(model_path)
predictions = predict(device, model, tokenizer, eval_dataloader, label_list)
_ = evaluate(predictions, hans_data_dir)

Evaluating: 100%|██████████| 118/118 [01:12<00:00,  1.63it/s]


Heuristic entailed results:
lexical_overlap: 0.9102
subsequence: 0.9256
constituent: 0.9508

Entailed accuracy: 0.9288666666666666

Heuristic non-entailed results:
lexical_overlap: 0.1948
subsequence: 0.1156
constituent: 0.081

Non-Entailed accuracy: 0.13046666666666668

Overall accuracy: 0.5296666666666666

Subcase results:
ln_subject/object_swap: 0.097
ln_preposition: 0.339
ln_relative_clause: 0.218
ln_passive: 0.049
ln_conjunction: 0.271
le_relative_clause: 0.892
le_around_prepositional_phrase: 0.933
le_around_relative_clause: 0.963
le_conjunction: 0.871
le_passive: 0.892
sn_NP/S: 0.105
sn_PP_on_subject: 0.086
sn_relative_clause_on_subject: 0.029
sn_past_participle: 0.217
sn_NP/Z: 0.141
se_conjunction: 0.962
se_adjective: 1.0
se_understood_object: 0.894
se_relative_clause_on_obj: 0.928
se_PP_on_obj: 0.844
cn_embedded_under_if: 0.294
cn_after_if_clause: 0.035
cn_embedded_under_verb: 0.038
cn_disjunction: 0.025
cn_adverb: 0.013
ce_embedded_under_since: 0.813
ce_after_since_clause: 0.9

In [8]:
# Remove Layers

# Remove layers

In [19]:
from itertools import chain, combinations
def all_subsets(ss):
    return chain(*map(lambda x: combinations(ss, x), reversed(range(len(ss)+1))))

In [20]:


combinations = list(subset for subset in all_subsets(range(12)) if len(subset) == 9)
for subset_layers in tqdm(combinations):
    print(f"Subset: {subset_layers}")
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    model.bert.encoder.layer = torch.nn.ModuleList([l for i, l in enumerate(model.bert.encoder.layer) if i in subset_layers])
    predictions = predict(device, model, tokenizer, eval_dataloader, label_list)
    results = evaluate(predictions, hans_data_dir)
    layer_results[subset_layers] = results["accuracy"]
    if max_accuracy < results["accuracy"]:
        max_accuracy = results["accuracy"]
        print("MAX_ACCURACY", max_accuracy)





  0%|          | 0/220 [00:00<?, ?it/s][A[A[A[A

Subset: (0, 1, 2, 3, 4, 5, 6, 7, 8)







Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:54,  2.15it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.14it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:53,  2.16it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:52,  2.17it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:51,  2.18it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:51,  2.18it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:50,  2.19it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:50,  2.19it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:49,  2.19it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:49,  2.19it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:48,  2.19it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:48,  2.

Heuristic entailed results:
lexical_overlap: 0.6134
subsequence: 0.866
constituent: 0.6572

Entailed accuracy: 0.7122

Heuristic non-entailed results:
lexical_overlap: 0.5502
subsequence: 0.4674
constituent: 0.4204

Non-Entailed accuracy: 0.47933333333333333

Overall accuracy: 0.5957666666666667

Subcase results:
ln_subject/object_swap: 0.509
ln_preposition: 0.715
ln_relative_clause: 0.59
ln_passive: 0.367
ln_conjunction: 0.57
le_relative_clause: 0.518
le_around_prepositional_phrase: 0.696
le_around_relative_clause: 0.81
le_conjunction: 0.738
le_passive: 0.305
sn_NP/S: 0.082
sn_PP_on_subject: 0.464
sn_relative_clause_on_subject: 0.48
sn_past_participle: 0.463
sn_NP/Z: 0.848
se_conjunction: 0.76
se_adjective: 0.962
se_understood_object: 0.892
se_relative_clause_on_obj: 0.948
se_PP_on_obj: 0.768
cn_embedded_under_if: 0.901
cn_after_if_clause: 0.139
cn_embedded_under_verb: 0.562
cn_disjunction: 0.364
cn_adverb: 0.136
ce_embedded_under_since: 0.178
ce_after_since_clause: 0.844
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.13it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.13it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.13it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.13it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:50,  2.13it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.13it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:49,  2.

Heuristic entailed results:
lexical_overlap: 0.0092
subsequence: 0.0714
constituent: 0.1026

Entailed accuracy: 0.061066666666666665

Heuristic non-entailed results:
lexical_overlap: 0.9972
subsequence: 0.965
constituent: 0.8944

Non-Entailed accuracy: 0.9522

Overall accuracy: 0.5066333333333334

Subcase results:
ln_subject/object_swap: 1.0
ln_preposition: 0.996
ln_relative_clause: 0.991
ln_passive: 1.0
ln_conjunction: 0.999
le_relative_clause: 0.035
le_around_prepositional_phrase: 0.0
le_around_relative_clause: 0.003
le_conjunction: 0.0
le_passive: 0.008
sn_NP/S: 0.999
sn_PP_on_subject: 0.931
sn_relative_clause_on_subject: 0.934
sn_past_participle: 0.963
sn_NP/Z: 0.998
se_conjunction: 0.026
se_adjective: 0.314
se_understood_object: 0.014
se_relative_clause_on_obj: 0.003
se_PP_on_obj: 0.0
cn_embedded_under_if: 0.999
cn_after_if_clause: 0.726
cn_embedded_under_verb: 0.974
cn_disjunction: 0.975
cn_adverb: 0.798
ce_embedded_under_since: 0.001
ce_after_since_clause: 0.231
ce_embedded_unde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:50,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:49,  2.

Heuristic entailed results:
lexical_overlap: 0.841
subsequence: 0.9042
constituent: 0.83

Entailed accuracy: 0.8584

Heuristic non-entailed results:
lexical_overlap: 0.2736
subsequence: 0.2336
constituent: 0.192

Non-Entailed accuracy: 0.23306666666666667

Overall accuracy: 0.5457333333333333

Subcase results:
ln_subject/object_swap: 0.174
ln_preposition: 0.424
ln_relative_clause: 0.341
ln_passive: 0.088
ln_conjunction: 0.341
le_relative_clause: 0.763
le_around_prepositional_phrase: 0.886
le_around_relative_clause: 0.923
le_conjunction: 0.835
le_passive: 0.798
sn_NP/S: 0.28
sn_PP_on_subject: 0.061
sn_relative_clause_on_subject: 0.056
sn_past_participle: 0.197
sn_NP/Z: 0.574
se_conjunction: 0.925
se_adjective: 0.992
se_understood_object: 0.968
se_relative_clause_on_obj: 0.839
se_PP_on_obj: 0.797
cn_embedded_under_if: 0.62
cn_after_if_clause: 0.002
cn_embedded_under_verb: 0.151
cn_disjunction: 0.181
cn_adverb: 0.006
ce_embedded_under_since: 0.416
ce_after_since_clause: 0.995
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0
subsequence: 0.0306
constituent: 0.0098

Entailed accuracy: 0.013466666666666667

Heuristic non-entailed results:
lexical_overlap: 0.9998
subsequence: 0.9982
constituent: 0.9888

Non-Entailed accuracy: 0.9956

Overall accuracy: 0.5045333333333333

Subcase results:
ln_subject/object_swap: 1.0
ln_preposition: 1.0
ln_relative_clause: 1.0
ln_passive: 0.999
ln_conjunction: 1.0
le_relative_clause: 0.0
le_around_prepositional_phrase: 0.0
le_around_relative_clause: 0.0
le_conjunction: 0.0
le_passive: 0.0
sn_NP/S: 1.0
sn_PP_on_subject: 1.0
sn_relative_clause_on_subject: 0.997
sn_past_participle: 0.994
sn_NP/Z: 1.0
se_conjunction: 0.0
se_adjective: 0.147
se_understood_object: 0.004
se_relative_clause_on_obj: 0.002
se_PP_on_obj: 0.0
cn_embedded_under_if: 0.999
cn_after_if_clause: 0.977
cn_embedded_under_verb: 0.994
cn_disjunction: 1.0
cn_adverb: 0.974
ce_embedded_under_since: 0.0
ce_after_since_clause: 0.011
ce_embedded_under_verb: 0.007
ce_conjunc






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.735
subsequence: 0.8024
constituent: 0.6906

Entailed accuracy: 0.7426666666666667

Heuristic non-entailed results:
lexical_overlap: 0.3102
subsequence: 0.3018
constituent: 0.3926

Non-Entailed accuracy: 0.33486666666666665

Overall accuracy: 0.5387666666666666

Subcase results:
ln_subject/object_swap: 0.296
ln_preposition: 0.366
ln_relative_clause: 0.314
ln_passive: 0.197
ln_conjunction: 0.378
le_relative_clause: 0.814
le_around_prepositional_phrase: 0.682
le_around_relative_clause: 0.81
le_conjunction: 0.62
le_passive: 0.749
sn_NP/S: 0.056
sn_PP_on_subject: 0.245
sn_relative_clause_on_subject: 0.257
sn_past_participle: 0.244
sn_NP/Z: 0.707
se_conjunction: 0.72
se_adjective: 0.97
se_understood_object: 0.759
se_relative_clause_on_obj: 0.814
se_PP_on_obj: 0.749
cn_embedded_under_if: 0.861
cn_after_if_clause: 0.107
cn_embedded_under_verb: 0.513
cn_disjunction: 0.307
cn_adverb: 0.175
ce_embedded_under_since: 0.271
ce_after_since_clause: 0.873






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8536
subsequence: 0.8746
constituent: 0.7194

Entailed accuracy: 0.8158666666666666

Heuristic non-entailed results:
lexical_overlap: 0.2368
subsequence: 0.2624
constituent: 0.2442

Non-Entailed accuracy: 0.2478

Overall accuracy: 0.5318333333333334

Subcase results:
ln_subject/object_swap: 0.084
ln_preposition: 0.339
ln_relative_clause: 0.322
ln_passive: 0.015
ln_conjunction: 0.424
le_relative_clause: 0.714
le_around_prepositional_phrase: 0.941
le_around_relative_clause: 0.93
le_conjunction: 0.778
le_passive: 0.905
sn_NP/S: 0.266
sn_PP_on_subject: 0.056
sn_relative_clause_on_subject: 0.1
sn_past_participle: 0.246
sn_NP/Z: 0.644
se_conjunction: 0.812
se_adjective: 0.985
se_understood_object: 0.931
se_relative_clause_on_obj: 0.817
se_PP_on_obj: 0.828
cn_embedded_under_if: 0.87
cn_after_if_clause: 0.015
cn_embedded_under_verb: 0.113
cn_disjunction: 0.218
cn_adverb: 0.005
ce_embedded_under_since: 0.181
ce_after_since_clause: 0.964
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0046
subsequence: 0.0934
constituent: 0.0382

Entailed accuracy: 0.0454

Heuristic non-entailed results:
lexical_overlap: 0.9936
subsequence: 0.9776
constituent: 0.9622

Non-Entailed accuracy: 0.9778

Overall accuracy: 0.5116

Subcase results:
ln_subject/object_swap: 0.998
ln_preposition: 1.0
ln_relative_clause: 0.999
ln_passive: 0.974
ln_conjunction: 0.997
le_relative_clause: 0.008
le_around_prepositional_phrase: 0.0
le_around_relative_clause: 0.006
le_conjunction: 0.0
le_passive: 0.009
sn_NP/S: 0.979
sn_PP_on_subject: 0.979
sn_relative_clause_on_subject: 0.973
sn_past_participle: 0.957
sn_NP/Z: 1.0
se_conjunction: 0.005
se_adjective: 0.363
se_understood_object: 0.074
se_relative_clause_on_obj: 0.023
se_PP_on_obj: 0.002
cn_embedded_under_if: 0.998
cn_after_if_clause: 0.95
cn_embedded_under_verb: 0.985
cn_disjunction: 0.998
cn_adverb: 0.88
ce_embedded_under_since: 0.001
ce_after_since_clause: 0.046
ce_embedded_under_verb: 0.008
ce_conjunct






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5366
subsequence: 0.6416
constituent: 0.5396

Entailed accuracy: 0.5726

Heuristic non-entailed results:
lexical_overlap: 0.5324
subsequence: 0.522
constituent: 0.5094

Non-Entailed accuracy: 0.5212666666666667

Overall accuracy: 0.5469333333333334

Subcase results:
ln_subject/object_swap: 0.31
ln_preposition: 0.762
ln_relative_clause: 0.717
ln_passive: 0.142
ln_conjunction: 0.731
le_relative_clause: 0.506
le_around_prepositional_phrase: 0.462
le_around_relative_clause: 0.619
le_conjunction: 0.375
le_passive: 0.721
sn_NP/S: 0.577
sn_PP_on_subject: 0.372
sn_relative_clause_on_subject: 0.417
sn_past_participle: 0.44
sn_NP/Z: 0.804
se_conjunction: 0.568
se_adjective: 0.94
se_understood_object: 0.622
se_relative_clause_on_obj: 0.586
se_PP_on_obj: 0.492
cn_embedded_under_if: 0.983
cn_after_if_clause: 0.164
cn_embedded_under_verb: 0.63
cn_disjunction: 0.707
cn_adverb: 0.063
ce_embedded_under_since: 0.036
ce_after_since_clause: 0.784
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  1.99it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.02it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0016
subsequence: 0.0258
constituent: 0.0088

Entailed accuracy: 0.012066666666666667

Heuristic non-entailed results:
lexical_overlap: 0.9984
subsequence: 0.9976
constituent: 0.992

Non-Entailed accuracy: 0.996

Overall accuracy: 0.5040333333333333

Subcase results:
ln_subject/object_swap: 0.999
ln_preposition: 1.0
ln_relative_clause: 1.0
ln_passive: 0.995
ln_conjunction: 0.998
le_relative_clause: 0.001
le_around_prepositional_phrase: 0.0
le_around_relative_clause: 0.001
le_conjunction: 0.0
le_passive: 0.006
sn_NP/S: 0.999
sn_PP_on_subject: 0.998
sn_relative_clause_on_subject: 0.999
sn_past_participle: 0.992
sn_NP/Z: 1.0
se_conjunction: 0.004
se_adjective: 0.103
se_understood_object: 0.011
se_relative_clause_on_obj: 0.008
se_PP_on_obj: 0.003
cn_embedded_under_if: 1.0
cn_after_if_clause: 0.995
cn_embedded_under_verb: 1.0
cn_disjunction: 1.0
cn_adverb: 0.965
ce_embedded_under_since: 0.0
ce_after_since_clause: 0.005
ce_embedded_under_verb: 0






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.317
subsequence: 0.51
constituent: 0.3118

Entailed accuracy: 0.3796

Heuristic non-entailed results:
lexical_overlap: 0.6074
subsequence: 0.6696
constituent: 0.6564

Non-Entailed accuracy: 0.6444666666666666

Overall accuracy: 0.5120333333333333

Subcase results:
ln_subject/object_swap: 0.435
ln_preposition: 0.832
ln_relative_clause: 0.776
ln_passive: 0.312
ln_conjunction: 0.682
le_relative_clause: 0.317
le_around_prepositional_phrase: 0.168
le_around_relative_clause: 0.225
le_conjunction: 0.247
le_passive: 0.628
sn_NP/S: 0.711
sn_PP_on_subject: 0.667
sn_relative_clause_on_subject: 0.675
sn_past_participle: 0.442
sn_NP/Z: 0.853
se_conjunction: 0.355
se_adjective: 0.921
se_understood_object: 0.704
se_relative_clause_on_obj: 0.36
se_PP_on_obj: 0.21
cn_embedded_under_if: 0.944
cn_after_if_clause: 0.611
cn_embedded_under_verb: 0.658
cn_disjunction: 0.857
cn_adverb: 0.212
ce_embedded_under_since: 0.048
ce_after_since_clause: 0.322
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:58,  1.97it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:02<00:56,  2.01it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:55,  2.04it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.06it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8518
subsequence: 0.867
constituent: 0.8314

Entailed accuracy: 0.8500666666666666

Heuristic non-entailed results:
lexical_overlap: 0.185
subsequence: 0.21
constituent: 0.2344

Non-Entailed accuracy: 0.2098

Overall accuracy: 0.5299333333333334

Subcase results:
ln_subject/object_swap: 0.158
ln_preposition: 0.235
ln_relative_clause: 0.217
ln_passive: 0.105
ln_conjunction: 0.21
le_relative_clause: 0.846
le_around_prepositional_phrase: 0.826
le_around_relative_clause: 0.882
le_conjunction: 0.818
le_passive: 0.887
sn_NP/S: 0.041
sn_PP_on_subject: 0.152
sn_relative_clause_on_subject: 0.17
sn_past_participle: 0.183
sn_NP/Z: 0.504
se_conjunction: 0.868
se_adjective: 0.952
se_understood_object: 0.824
se_relative_clause_on_obj: 0.869
se_PP_on_obj: 0.822
cn_embedded_under_if: 0.56
cn_after_if_clause: 0.144
cn_embedded_under_verb: 0.274
cn_disjunction: 0.116
cn_adverb: 0.078
ce_embedded_under_since: 0.593
ce_after_since_clause: 0.892
ce_embedded_un






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9262
subsequence: 0.9118
constituent: 0.8458

Entailed accuracy: 0.8946

Heuristic non-entailed results:
lexical_overlap: 0.181
subsequence: 0.1768
constituent: 0.1534

Non-Entailed accuracy: 0.1704

Overall accuracy: 0.5325

Subcase results:
ln_subject/object_swap: 0.061
ln_preposition: 0.277
ln_relative_clause: 0.253
ln_passive: 0.005
ln_conjunction: 0.309
le_relative_clause: 0.826
le_around_prepositional_phrase: 0.966
le_around_relative_clause: 0.956
le_conjunction: 0.922
le_passive: 0.961
sn_NP/S: 0.143
sn_PP_on_subject: 0.029
sn_relative_clause_on_subject: 0.075
sn_past_participle: 0.222
sn_NP/Z: 0.415
se_conjunction: 0.942
se_adjective: 0.985
se_understood_object: 0.981
se_relative_clause_on_obj: 0.898
se_PP_on_obj: 0.753
cn_embedded_under_if: 0.621
cn_after_if_clause: 0.017
cn_embedded_under_verb: 0.031
cn_disjunction: 0.095
cn_adverb: 0.003
ce_embedded_under_since: 0.451
ce_after_since_clause: 0.983
ce_embedded_under_verb: 0.966
ce






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0564
subsequence: 0.1912
constituent: 0.1554

Entailed accuracy: 0.13433333333333333

Heuristic non-entailed results:
lexical_overlap: 0.9548
subsequence: 0.8892
constituent: 0.836

Non-Entailed accuracy: 0.8933333333333333

Overall accuracy: 0.5138333333333334

Subcase results:
ln_subject/object_swap: 0.959
ln_preposition: 0.992
ln_relative_clause: 0.962
ln_passive: 0.869
ln_conjunction: 0.992
le_relative_clause: 0.085
le_around_prepositional_phrase: 0.035
le_around_relative_clause: 0.094
le_conjunction: 0.021
le_passive: 0.047
sn_NP/S: 0.863
sn_PP_on_subject: 0.891
sn_relative_clause_on_subject: 0.847
sn_past_participle: 0.848
sn_NP/Z: 0.997
se_conjunction: 0.064
se_adjective: 0.528
se_understood_object: 0.213
se_relative_clause_on_obj: 0.116
se_PP_on_obj: 0.035
cn_embedded_under_if: 0.989
cn_after_if_clause: 0.85
cn_embedded_under_verb: 0.857
cn_disjunction: 0.939
cn_adverb: 0.545
ce_embedded_under_since: 0.005
ce_after_since_clause: 0.






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  1.99it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6754
subsequence: 0.7298
constituent: 0.6556

Entailed accuracy: 0.6869333333333333

Heuristic non-entailed results:
lexical_overlap: 0.4486
subsequence: 0.3426
constituent: 0.3896

Non-Entailed accuracy: 0.3936

Overall accuracy: 0.5402666666666667

Subcase results:
ln_subject/object_swap: 0.237
ln_preposition: 0.671
ln_relative_clause: 0.593
ln_passive: 0.158
ln_conjunction: 0.584
le_relative_clause: 0.611
le_around_prepositional_phrase: 0.664
le_around_relative_clause: 0.791
le_conjunction: 0.554
le_passive: 0.757
sn_NP/S: 0.429
sn_PP_on_subject: 0.188
sn_relative_clause_on_subject: 0.243
sn_past_participle: 0.375
sn_NP/Z: 0.478
se_conjunction: 0.697
se_adjective: 0.987
se_understood_object: 0.903
se_relative_clause_on_obj: 0.73
se_PP_on_obj: 0.332
cn_embedded_under_if: 0.875
cn_after_if_clause: 0.214
cn_embedded_under_verb: 0.286
cn_disjunction: 0.536
cn_adverb: 0.037
ce_embedded_under_since: 0.196
ce_after_since_clause: 0.845
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.017
subsequence: 0.0654
constituent: 0.0326

Entailed accuracy: 0.03833333333333333

Heuristic non-entailed results:
lexical_overlap: 0.9882
subsequence: 0.979
constituent: 0.9518

Non-Entailed accuracy: 0.973

Overall accuracy: 0.5056666666666667

Subcase results:
ln_subject/object_swap: 0.988
ln_preposition: 0.999
ln_relative_clause: 0.993
ln_passive: 0.97
ln_conjunction: 0.991
le_relative_clause: 0.022
le_around_prepositional_phrase: 0.005
le_around_relative_clause: 0.027
le_conjunction: 0.02
le_passive: 0.011
sn_NP/S: 0.978
sn_PP_on_subject: 0.975
sn_relative_clause_on_subject: 0.983
sn_past_participle: 0.964
sn_NP/Z: 0.995
se_conjunction: 0.021
se_adjective: 0.203
se_understood_object: 0.05
se_relative_clause_on_obj: 0.047
se_PP_on_obj: 0.006
cn_embedded_under_if: 0.992
cn_after_if_clause: 0.982
cn_embedded_under_verb: 0.984
cn_disjunction: 0.98
cn_adverb: 0.821
ce_embedded_under_since: 0.003
ce_after_since_clause: 0.012
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5184
subsequence: 0.643
constituent: 0.4888

Entailed accuracy: 0.5500666666666667

Heuristic non-entailed results:
lexical_overlap: 0.4372
subsequence: 0.4842
constituent: 0.4554

Non-Entailed accuracy: 0.45893333333333336

Overall accuracy: 0.5045

Subcase results:
ln_subject/object_swap: 0.289
ln_preposition: 0.643
ln_relative_clause: 0.511
ln_passive: 0.266
ln_conjunction: 0.477
le_relative_clause: 0.527
le_around_prepositional_phrase: 0.399
le_around_relative_clause: 0.452
le_conjunction: 0.495
le_passive: 0.719
sn_NP/S: 0.547
sn_PP_on_subject: 0.459
sn_relative_clause_on_subject: 0.452
sn_past_participle: 0.306
sn_NP/Z: 0.657
se_conjunction: 0.528
se_adjective: 0.842
se_understood_object: 0.869
se_relative_clause_on_obj: 0.552
se_PP_on_obj: 0.424
cn_embedded_under_if: 0.76
cn_after_if_clause: 0.387
cn_embedded_under_verb: 0.409
cn_disjunction: 0.599
cn_adverb: 0.122
ce_embedded_under_since: 0.232
ce_after_since_clause: 0.552
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7454
subsequence: 0.776
constituent: 0.669

Entailed accuracy: 0.7301333333333333

Heuristic non-entailed results:
lexical_overlap: 0.2784
subsequence: 0.335
constituent: 0.396

Non-Entailed accuracy: 0.3364666666666667

Overall accuracy: 0.5333

Subcase results:
ln_subject/object_swap: 0.109
ln_preposition: 0.404
ln_relative_clause: 0.409
ln_passive: 0.028
ln_conjunction: 0.442
le_relative_clause: 0.617
le_around_prepositional_phrase: 0.76
le_around_relative_clause: 0.816
le_conjunction: 0.591
le_passive: 0.943
sn_NP/S: 0.408
sn_PP_on_subject: 0.108
sn_relative_clause_on_subject: 0.183
sn_past_participle: 0.446
sn_NP/Z: 0.53
se_conjunction: 0.712
se_adjective: 0.994
se_understood_object: 0.827
se_relative_clause_on_obj: 0.73
se_PP_on_obj: 0.617
cn_embedded_under_if: 0.898
cn_after_if_clause: 0.199
cn_embedded_under_verb: 0.383
cn_disjunction: 0.366
cn_adverb: 0.134
ce_embedded_under_since: 0.145
ce_after_since_clause: 0.846
ce_embedded_un






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:53,  2.01it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:52,  2.04it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:51,  2.

Heuristic entailed results:
lexical_overlap: 0.0426
subsequence: 0.15
constituent: 0.1168

Entailed accuracy: 0.10313333333333333

Heuristic non-entailed results:
lexical_overlap: 0.9556
subsequence: 0.9386
constituent: 0.9522

Non-Entailed accuracy: 0.9488

Overall accuracy: 0.5259666666666667

Subcase results:
ln_subject/object_swap: 0.931
ln_preposition: 0.986
ln_relative_clause: 0.986
ln_passive: 0.895
ln_conjunction: 0.98
le_relative_clause: 0.025
le_around_prepositional_phrase: 0.034
le_around_relative_clause: 0.073
le_conjunction: 0.022
le_passive: 0.059
sn_NP/S: 0.896
sn_PP_on_subject: 0.923
sn_relative_clause_on_subject: 0.957
sn_past_participle: 0.933
sn_NP/Z: 0.984
se_conjunction: 0.085
se_adjective: 0.446
se_understood_object: 0.116
se_relative_clause_on_obj: 0.083
se_PP_on_obj: 0.02
cn_embedded_under_if: 0.996
cn_after_if_clause: 0.958
cn_embedded_under_verb: 0.997
cn_disjunction: 0.974
cn_adverb: 0.836
ce_embedded_under_since: 0.009
ce_after_since_clause: 0.06
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  2.00it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2286
subsequence: 0.3498
constituent: 0.2328

Entailed accuracy: 0.2704

Heuristic non-entailed results:
lexical_overlap: 0.6896
subsequence: 0.781
constituent: 0.7492

Non-Entailed accuracy: 0.7399333333333333

Overall accuracy: 0.5051666666666667

Subcase results:
ln_subject/object_swap: 0.416
ln_preposition: 0.895
ln_relative_clause: 0.869
ln_passive: 0.422
ln_conjunction: 0.846
le_relative_clause: 0.132
le_around_prepositional_phrase: 0.13
le_around_relative_clause: 0.222
le_conjunction: 0.121
le_passive: 0.538
sn_NP/S: 0.821
sn_PP_on_subject: 0.718
sn_relative_clause_on_subject: 0.734
sn_past_participle: 0.698
sn_NP/Z: 0.934
se_conjunction: 0.242
se_adjective: 0.776
se_understood_object: 0.501
se_relative_clause_on_obj: 0.166
se_PP_on_obj: 0.064
cn_embedded_under_if: 0.99
cn_after_if_clause: 0.839
cn_embedded_under_verb: 0.822
cn_disjunction: 0.872
cn_adverb: 0.223
ce_embedded_under_since: 0.01
ce_after_since_clause: 0.174
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.1596
subsequence: 0.3226
constituent: 0.1692

Entailed accuracy: 0.21713333333333334

Heuristic non-entailed results:
lexical_overlap: 0.7724
subsequence: 0.8458
constituent: 0.8392

Non-Entailed accuracy: 0.8191333333333334

Overall accuracy: 0.5181333333333333

Subcase results:
ln_subject/object_swap: 0.594
ln_preposition: 0.95
ln_relative_clause: 0.933
ln_passive: 0.476
ln_conjunction: 0.909
le_relative_clause: 0.108
le_around_prepositional_phrase: 0.088
le_around_relative_clause: 0.136
le_conjunction: 0.081
le_passive: 0.385
sn_NP/S: 0.849
sn_PP_on_subject: 0.842
sn_relative_clause_on_subject: 0.893
sn_past_participle: 0.768
sn_NP/Z: 0.877
se_conjunction: 0.145
se_adjective: 0.607
se_understood_object: 0.498
se_relative_clause_on_obj: 0.214
se_PP_on_obj: 0.149
cn_embedded_under_if: 0.989
cn_after_if_clause: 0.926
cn_embedded_under_verb: 0.913
cn_disjunction: 0.96
cn_adverb: 0.408
ce_embedded_under_since: 0.019
ce_after_since_clause: 0.






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9244
subsequence: 0.9226
constituent: 0.905

Entailed accuracy: 0.9173333333333333

Heuristic non-entailed results:
lexical_overlap: 0.0946
subsequence: 0.1488
constituent: 0.164

Non-Entailed accuracy: 0.1358

Overall accuracy: 0.5265666666666666

Subcase results:
ln_subject/object_swap: 0.093
ln_preposition: 0.11
ln_relative_clause: 0.132
ln_passive: 0.033
ln_conjunction: 0.105
le_relative_clause: 0.879
le_around_prepositional_phrase: 0.912
le_around_relative_clause: 0.946
le_conjunction: 0.906
le_passive: 0.979
sn_NP/S: 0.018
sn_PP_on_subject: 0.083
sn_relative_clause_on_subject: 0.103
sn_past_participle: 0.108
sn_NP/Z: 0.432
se_conjunction: 0.892
se_adjective: 0.994
se_understood_object: 0.9
se_relative_clause_on_obj: 0.92
se_PP_on_obj: 0.907
cn_embedded_under_if: 0.425
cn_after_if_clause: 0.117
cn_embedded_under_verb: 0.133
cn_disjunction: 0.115
cn_adverb: 0.03
ce_embedded_under_since: 0.782
ce_after_since_clause: 0.922
ce_embedded_un






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9356
subsequence: 0.912
constituent: 0.8686

Entailed accuracy: 0.9054

Heuristic non-entailed results:
lexical_overlap: 0.0738
subsequence: 0.164
constituent: 0.125

Non-Entailed accuracy: 0.12093333333333334

Overall accuracy: 0.5131666666666667

Subcase results:
ln_subject/object_swap: 0.024
ln_preposition: 0.07
ln_relative_clause: 0.137
ln_passive: 0.025
ln_conjunction: 0.113
le_relative_clause: 0.823
le_around_prepositional_phrase: 0.976
le_around_relative_clause: 0.963
le_conjunction: 0.931
le_passive: 0.985
sn_NP/S: 0.172
sn_PP_on_subject: 0.015
sn_relative_clause_on_subject: 0.055
sn_past_participle: 0.211
sn_NP/Z: 0.367
se_conjunction: 0.924
se_adjective: 0.984
se_understood_object: 0.975
se_relative_clause_on_obj: 0.883
se_PP_on_obj: 0.794
cn_embedded_under_if: 0.356
cn_after_if_clause: 0.038
cn_embedded_under_verb: 0.052
cn_disjunction: 0.175
cn_adverb: 0.004
ce_embedded_under_since: 0.606
ce_after_since_clause: 0.93
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.1732
subsequence: 0.2502
constituent: 0.2144

Entailed accuracy: 0.2126

Heuristic non-entailed results:
lexical_overlap: 0.8554
subsequence: 0.8128
constituent: 0.7886

Non-Entailed accuracy: 0.8189333333333333

Overall accuracy: 0.5157666666666667

Subcase results:
ln_subject/object_swap: 0.841
ln_preposition: 0.929
ln_relative_clause: 0.851
ln_passive: 0.716
ln_conjunction: 0.94
le_relative_clause: 0.203
le_around_prepositional_phrase: 0.132
le_around_relative_clause: 0.21
le_conjunction: 0.079
le_passive: 0.242
sn_NP/S: 0.798
sn_PP_on_subject: 0.825
sn_relative_clause_on_subject: 0.777
sn_past_participle: 0.685
sn_NP/Z: 0.979
se_conjunction: 0.131
se_adjective: 0.573
se_understood_object: 0.287
se_relative_clause_on_obj: 0.192
se_PP_on_obj: 0.068
cn_embedded_under_if: 0.943
cn_after_if_clause: 0.757
cn_embedded_under_verb: 0.788
cn_disjunction: 0.903
cn_adverb: 0.552
ce_embedded_under_since: 0.084
ce_after_since_clause: 0.201
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8756
subsequence: 0.8272
constituent: 0.8054

Entailed accuracy: 0.8360666666666666

Heuristic non-entailed results:
lexical_overlap: 0.1508
subsequence: 0.255
constituent: 0.2158

Non-Entailed accuracy: 0.2072

Overall accuracy: 0.5216333333333333

Subcase results:
ln_subject/object_swap: 0.046
ln_preposition: 0.219
ln_relative_clause: 0.213
ln_passive: 0.091
ln_conjunction: 0.185
le_relative_clause: 0.743
le_around_prepositional_phrase: 0.907
le_around_relative_clause: 0.91
le_conjunction: 0.873
le_passive: 0.945
sn_NP/S: 0.362
sn_PP_on_subject: 0.069
sn_relative_clause_on_subject: 0.096
sn_past_participle: 0.344
sn_NP/Z: 0.404
se_conjunction: 0.905
se_adjective: 0.998
se_understood_object: 0.862
se_relative_clause_on_obj: 0.806
se_PP_on_obj: 0.565
cn_embedded_under_if: 0.561
cn_after_if_clause: 0.09
cn_embedded_under_verb: 0.118
cn_disjunction: 0.305
cn_adverb: 0.005
ce_embedded_under_since: 0.476
ce_after_since_clause: 0.9
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.094
subsequence: 0.201
constituent: 0.1092

Entailed accuracy: 0.13473333333333334

Heuristic non-entailed results:
lexical_overlap: 0.9098
subsequence: 0.92
constituent: 0.9024

Non-Entailed accuracy: 0.9107333333333333

Overall accuracy: 0.5227333333333334

Subcase results:
ln_subject/object_swap: 0.887
ln_preposition: 0.961
ln_relative_clause: 0.902
ln_passive: 0.888
ln_conjunction: 0.911
le_relative_clause: 0.088
le_around_prepositional_phrase: 0.072
le_around_relative_clause: 0.099
le_conjunction: 0.108
le_passive: 0.103
sn_NP/S: 0.882
sn_PP_on_subject: 0.927
sn_relative_clause_on_subject: 0.948
sn_past_participle: 0.881
sn_NP/Z: 0.962
se_conjunction: 0.15
se_adjective: 0.391
se_understood_object: 0.24
se_relative_clause_on_obj: 0.181
se_PP_on_obj: 0.043
cn_embedded_under_if: 0.974
cn_after_if_clause: 0.948
cn_embedded_under_verb: 0.96
cn_disjunction: 0.928
cn_adverb: 0.702
ce_embedded_under_since: 0.093
ce_after_since_clause: 0.042
c






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:06,  1.75it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:01<01:02,  1.85it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:59,  1.92it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:57,  1.97it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  2.01it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.04it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.06it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4442
subsequence: 0.5952
constituent: 0.4184

Entailed accuracy: 0.48593333333333333

Heuristic non-entailed results:
lexical_overlap: 0.5178
subsequence: 0.5832
constituent: 0.5382

Non-Entailed accuracy: 0.5464

Overall accuracy: 0.5161666666666667

Subcase results:
ln_subject/object_swap: 0.398
ln_preposition: 0.644
ln_relative_clause: 0.559
ln_passive: 0.435
ln_conjunction: 0.553
le_relative_clause: 0.449
le_around_prepositional_phrase: 0.38
le_around_relative_clause: 0.415
le_conjunction: 0.458
le_passive: 0.519
sn_NP/S: 0.63
sn_PP_on_subject: 0.546
sn_relative_clause_on_subject: 0.589
sn_past_participle: 0.392
sn_NP/Z: 0.759
se_conjunction: 0.527
se_adjective: 0.76
se_understood_object: 0.731
se_relative_clause_on_obj: 0.539
se_PP_on_obj: 0.419
cn_embedded_under_if: 0.724
cn_after_if_clause: 0.526
cn_embedded_under_verb: 0.562
cn_disjunction: 0.66
cn_adverb: 0.219
ce_embedded_under_since: 0.287
ce_after_since_clause: 0.378
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9066
subsequence: 0.8838
constituent: 0.765

Entailed accuracy: 0.8518

Heuristic non-entailed results:
lexical_overlap: 0.0774
subsequence: 0.2408
constituent: 0.2928

Non-Entailed accuracy: 0.20366666666666666

Overall accuracy: 0.5277333333333334

Subcase results:
ln_subject/object_swap: 0.006
ln_preposition: 0.111
ln_relative_clause: 0.157
ln_passive: 0.009
ln_conjunction: 0.104
le_relative_clause: 0.736
le_around_prepositional_phrase: 0.948
le_around_relative_clause: 0.931
le_conjunction: 0.921
le_passive: 0.997
sn_NP/S: 0.326
sn_PP_on_subject: 0.026
sn_relative_clause_on_subject: 0.06
sn_past_participle: 0.407
sn_NP/Z: 0.385
se_conjunction: 0.923
se_adjective: 1.0
se_understood_object: 0.895
se_relative_clause_on_obj: 0.82
se_PP_on_obj: 0.781
cn_embedded_under_if: 0.825
cn_after_if_clause: 0.168
cn_embedded_under_verb: 0.088
cn_disjunction: 0.294
cn_adverb: 0.089
ce_embedded_under_since: 0.316
ce_after_since_clause: 0.886
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.1684
subsequence: 0.2652
constituent: 0.1642

Entailed accuracy: 0.19926666666666668

Heuristic non-entailed results:
lexical_overlap: 0.8276
subsequence: 0.885
constituent: 0.8778

Non-Entailed accuracy: 0.8634666666666667

Overall accuracy: 0.5313666666666667

Subcase results:
ln_subject/object_swap: 0.716
ln_preposition: 0.923
ln_relative_clause: 0.896
ln_passive: 0.744
ln_conjunction: 0.859
le_relative_clause: 0.1
le_around_prepositional_phrase: 0.14
le_around_relative_clause: 0.212
le_conjunction: 0.16
le_passive: 0.23
sn_NP/S: 0.861
sn_PP_on_subject: 0.844
sn_relative_clause_on_subject: 0.883
sn_past_participle: 0.872
sn_NP/Z: 0.965
se_conjunction: 0.212
se_adjective: 0.649
se_understood_object: 0.209
se_relative_clause_on_obj: 0.172
se_PP_on_obj: 0.084
cn_embedded_under_if: 0.983
cn_after_if_clause: 0.906
cn_embedded_under_verb: 0.961
cn_disjunction: 0.941
cn_adverb: 0.598
ce_embedded_under_since: 0.031
ce_after_since_clause: 0.087







Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4054
subsequence: 0.4446
constituent: 0.2876

Entailed accuracy: 0.3792

Heuristic non-entailed results:
lexical_overlap: 0.5446
subsequence: 0.6562
constituent: 0.6872

Non-Entailed accuracy: 0.6293333333333333

Overall accuracy: 0.5042666666666666

Subcase results:
ln_subject/object_swap: 0.338
ln_preposition: 0.742
ln_relative_clause: 0.678
ln_passive: 0.354
ln_conjunction: 0.611
le_relative_clause: 0.306
le_around_prepositional_phrase: 0.37
le_around_relative_clause: 0.445
le_conjunction: 0.34
le_passive: 0.566
sn_NP/S: 0.725
sn_PP_on_subject: 0.571
sn_relative_clause_on_subject: 0.575
sn_past_participle: 0.575
sn_NP/Z: 0.835
se_conjunction: 0.437
se_adjective: 0.807
se_understood_object: 0.492
se_relative_clause_on_obj: 0.29
se_PP_on_obj: 0.197
cn_embedded_under_if: 0.951
cn_after_if_clause: 0.77
cn_embedded_under_verb: 0.625
cn_disjunction: 0.831
cn_adverb: 0.259
ce_embedded_under_since: 0.048
ce_after_since_clause: 0.265
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2864
subsequence: 0.4552
constituent: 0.2294

Entailed accuracy: 0.32366666666666666

Heuristic non-entailed results:
lexical_overlap: 0.644
subsequence: 0.7432
constituent: 0.7526

Non-Entailed accuracy: 0.7132666666666667

Overall accuracy: 0.5184666666666666

Subcase results:
ln_subject/object_swap: 0.455
ln_preposition: 0.8
ln_relative_clause: 0.773
ln_passive: 0.476
ln_conjunction: 0.716
le_relative_clause: 0.225
le_around_prepositional_phrase: 0.234
le_around_relative_clause: 0.281
le_conjunction: 0.277
le_passive: 0.415
sn_NP/S: 0.777
sn_PP_on_subject: 0.751
sn_relative_clause_on_subject: 0.794
sn_past_participle: 0.582
sn_NP/Z: 0.812
se_conjunction: 0.354
se_adjective: 0.79
se_understood_object: 0.535
se_relative_clause_on_obj: 0.309
se_PP_on_obj: 0.288
cn_embedded_under_if: 0.944
cn_after_if_clause: 0.769
cn_embedded_under_verb: 0.823
cn_disjunction: 0.853
cn_adverb: 0.374
ce_embedded_under_since: 0.067
ce_after_since_clause: 0.19






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9416
subsequence: 0.8934
constituent: 0.8924

Entailed accuracy: 0.9091333333333333

Heuristic non-entailed results:
lexical_overlap: 0.069
subsequence: 0.1474
constituent: 0.1566

Non-Entailed accuracy: 0.12433333333333334

Overall accuracy: 0.5167333333333334

Subcase results:
ln_subject/object_swap: 0.003
ln_preposition: 0.114
ln_relative_clause: 0.106
ln_passive: 0.001
ln_conjunction: 0.121
le_relative_clause: 0.825
le_around_prepositional_phrase: 0.965
le_around_relative_clause: 0.972
le_conjunction: 0.946
le_passive: 1.0
sn_NP/S: 0.236
sn_PP_on_subject: 0.012
sn_relative_clause_on_subject: 0.03
sn_past_participle: 0.297
sn_NP/Z: 0.162
se_conjunction: 0.949
se_adjective: 0.997
se_understood_object: 0.909
se_relative_clause_on_obj: 0.909
se_PP_on_obj: 0.703
cn_embedded_under_if: 0.556
cn_after_if_clause: 0.058
cn_embedded_under_verb: 0.021
cn_disjunction: 0.146
cn_adverb: 0.002
ce_embedded_under_since: 0.625
ce_after_since_clause: 0.97






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3352
subsequence: 0.3612
constituent: 0.2834

Entailed accuracy: 0.3266

Heuristic non-entailed results:
lexical_overlap: 0.6904
subsequence: 0.745
constituent: 0.7044

Non-Entailed accuracy: 0.7132666666666667

Overall accuracy: 0.5199333333333334

Subcase results:
ln_subject/object_swap: 0.507
ln_preposition: 0.864
ln_relative_clause: 0.754
ln_passive: 0.557
ln_conjunction: 0.77
le_relative_clause: 0.227
le_around_prepositional_phrase: 0.276
le_around_relative_clause: 0.423
le_conjunction: 0.328
le_passive: 0.422
sn_NP/S: 0.682
sn_PP_on_subject: 0.645
sn_relative_clause_on_subject: 0.729
sn_past_participle: 0.786
sn_NP/Z: 0.883
se_conjunction: 0.409
se_adjective: 0.734
se_understood_object: 0.255
se_relative_clause_on_obj: 0.344
se_PP_on_obj: 0.064
cn_embedded_under_if: 0.954
cn_after_if_clause: 0.793
cn_embedded_under_verb: 0.762
cn_disjunction: 0.822
cn_adverb: 0.191
ce_embedded_under_since: 0.063
ce_after_since_clause: 0.172
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:00,  1.95it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:58,  2.00it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5276
subsequence: 0.5142
constituent: 0.3618

Entailed accuracy: 0.46786666666666665

Heuristic non-entailed results:
lexical_overlap: 0.4282
subsequence: 0.5896
constituent: 0.5566

Non-Entailed accuracy: 0.5248

Overall accuracy: 0.49633333333333335

Subcase results:
ln_subject/object_swap: 0.227
ln_preposition: 0.584
ln_relative_clause: 0.542
ln_passive: 0.256
ln_conjunction: 0.532
le_relative_clause: 0.422
le_around_prepositional_phrase: 0.448
le_around_relative_clause: 0.515
le_conjunction: 0.482
le_passive: 0.771
sn_NP/S: 0.657
sn_PP_on_subject: 0.437
sn_relative_clause_on_subject: 0.496
sn_past_participle: 0.588
sn_NP/Z: 0.77
se_conjunction: 0.509
se_adjective: 0.819
se_understood_object: 0.583
se_relative_clause_on_obj: 0.441
se_PP_on_obj: 0.219
cn_embedded_under_if: 0.852
cn_after_if_clause: 0.635
cn_embedded_under_verb: 0.5
cn_disjunction: 0.71
cn_adverb: 0.086
ce_embedded_under_since: 0.148
ce_after_since_clause: 0.311
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5418
subsequence: 0.6904
constituent: 0.4338

Entailed accuracy: 0.5553333333333333

Heuristic non-entailed results:
lexical_overlap: 0.4582
subsequence: 0.5028
constituent: 0.5542

Non-Entailed accuracy: 0.5050666666666667

Overall accuracy: 0.5302

Subcase results:
ln_subject/object_swap: 0.274
ln_preposition: 0.639
ln_relative_clause: 0.553
ln_passive: 0.328
ln_conjunction: 0.497
le_relative_clause: 0.46
le_around_prepositional_phrase: 0.483
le_around_relative_clause: 0.601
le_conjunction: 0.545
le_passive: 0.62
sn_NP/S: 0.467
sn_PP_on_subject: 0.491
sn_relative_clause_on_subject: 0.568
sn_past_participle: 0.55
sn_NP/Z: 0.438
se_conjunction: 0.64
se_adjective: 0.898
se_understood_object: 0.859
se_relative_clause_on_obj: 0.701
se_PP_on_obj: 0.354
cn_embedded_under_if: 0.715
cn_after_if_clause: 0.687
cn_embedded_under_verb: 0.558
cn_disjunction: 0.678
cn_adverb: 0.133
ce_embedded_under_since: 0.365
ce_after_since_clause: 0.258
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.356
subsequence: 0.4904
constituent: 0.2796

Entailed accuracy: 0.37533333333333335

Heuristic non-entailed results:
lexical_overlap: 0.6112
subsequence: 0.7114
constituent: 0.7794

Non-Entailed accuracy: 0.7006666666666667

Overall accuracy: 0.538

Subcase results:
ln_subject/object_swap: 0.382
ln_preposition: 0.776
ln_relative_clause: 0.798
ln_passive: 0.406
ln_conjunction: 0.694
le_relative_clause: 0.203
le_around_prepositional_phrase: 0.27
le_around_relative_clause: 0.312
le_conjunction: 0.346
le_passive: 0.649
sn_NP/S: 0.704
sn_PP_on_subject: 0.619
sn_relative_clause_on_subject: 0.697
sn_past_participle: 0.759
sn_NP/Z: 0.778
se_conjunction: 0.458
se_adjective: 0.76
se_understood_object: 0.633
se_relative_clause_on_obj: 0.359
se_PP_on_obj: 0.242
cn_embedded_under_if: 0.953
cn_after_if_clause: 0.783
cn_embedded_under_verb: 0.76
cn_disjunction: 0.848
cn_adverb: 0.553
ce_embedded_under_since: 0.045
ce_after_since_clause: 0.211
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8736
subsequence: 0.9314
constituent: 0.883

Entailed accuracy: 0.896

Heuristic non-entailed results:
lexical_overlap: 0.1492
subsequence: 0.2066
constituent: 0.1764

Non-Entailed accuracy: 0.1774

Overall accuracy: 0.5367

Subcase results:
ln_subject/object_swap: 0.116
ln_preposition: 0.181
ln_relative_clause: 0.211
ln_passive: 0.102
ln_conjunction: 0.136
le_relative_clause: 0.833
le_around_prepositional_phrase: 0.838
le_around_relative_clause: 0.89
le_conjunction: 0.891
le_passive: 0.916
sn_NP/S: 0.02
sn_PP_on_subject: 0.098
sn_relative_clause_on_subject: 0.125
sn_past_participle: 0.203
sn_NP/Z: 0.587
se_conjunction: 0.931
se_adjective: 0.969
se_understood_object: 0.962
se_relative_clause_on_obj: 0.92
se_PP_on_obj: 0.875
cn_embedded_under_if: 0.384
cn_after_if_clause: 0.233
cn_embedded_under_verb: 0.134
cn_disjunction: 0.101
cn_adverb: 0.03
ce_embedded_under_since: 0.763
ce_after_since_clause: 0.885
ce_embedded_under_verb: 0.886
ce_conj






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9408
subsequence: 0.976
constituent: 0.931

Entailed accuracy: 0.9492666666666667

Heuristic non-entailed results:
lexical_overlap: 0.0706
subsequence: 0.104
constituent: 0.0774

Non-Entailed accuracy: 0.084

Overall accuracy: 0.5166333333333334

Subcase results:
ln_subject/object_swap: 0.034
ln_preposition: 0.103
ln_relative_clause: 0.106
ln_passive: 0.031
ln_conjunction: 0.079
le_relative_clause: 0.865
le_around_prepositional_phrase: 0.969
le_around_relative_clause: 0.984
le_conjunction: 0.98
le_passive: 0.906
sn_NP/S: 0.058
sn_PP_on_subject: 0.024
sn_relative_clause_on_subject: 0.029
sn_past_participle: 0.103
sn_NP/Z: 0.306
se_conjunction: 0.99
se_adjective: 0.995
se_understood_object: 0.976
se_relative_clause_on_obj: 0.963
se_PP_on_obj: 0.956
cn_embedded_under_if: 0.224
cn_after_if_clause: 0.052
cn_embedded_under_verb: 0.02
cn_disjunction: 0.09
cn_adverb: 0.001
ce_embedded_under_since: 0.781
ce_after_since_clause: 0.968
ce_embedded_und






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:59,  1.96it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.00it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2448
subsequence: 0.3224
constituent: 0.2656

Entailed accuracy: 0.2776

Heuristic non-entailed results:
lexical_overlap: 0.7714
subsequence: 0.7706
constituent: 0.7246

Non-Entailed accuracy: 0.7555333333333333

Overall accuracy: 0.5165666666666666

Subcase results:
ln_subject/object_swap: 0.697
ln_preposition: 0.908
ln_relative_clause: 0.838
ln_passive: 0.593
ln_conjunction: 0.821
le_relative_clause: 0.229
le_around_prepositional_phrase: 0.217
le_around_relative_clause: 0.286
le_conjunction: 0.22
le_passive: 0.272
sn_NP/S: 0.63
sn_PP_on_subject: 0.759
sn_relative_clause_on_subject: 0.741
sn_past_participle: 0.731
sn_NP/Z: 0.992
se_conjunction: 0.264
se_adjective: 0.61
se_understood_object: 0.322
se_relative_clause_on_obj: 0.264
se_PP_on_obj: 0.152
cn_embedded_under_if: 0.92
cn_after_if_clause: 0.804
cn_embedded_under_verb: 0.678
cn_disjunction: 0.815
cn_adverb: 0.406
ce_embedded_under_since: 0.084
ce_after_since_clause: 0.227
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.846
subsequence: 0.9278
constituent: 0.8386

Entailed accuracy: 0.8708

Heuristic non-entailed results:
lexical_overlap: 0.1974
subsequence: 0.1984
constituent: 0.1788

Non-Entailed accuracy: 0.19153333333333333

Overall accuracy: 0.5311666666666667

Subcase results:
ln_subject/object_swap: 0.089
ln_preposition: 0.311
ln_relative_clause: 0.318
ln_passive: 0.075
ln_conjunction: 0.194
le_relative_clause: 0.739
le_around_prepositional_phrase: 0.862
le_around_relative_clause: 0.867
le_conjunction: 0.892
le_passive: 0.87
sn_NP/S: 0.178
sn_PP_on_subject: 0.1
sn_relative_clause_on_subject: 0.131
sn_past_participle: 0.252
sn_NP/Z: 0.331
se_conjunction: 0.966
se_adjective: 0.986
se_understood_object: 0.97
se_relative_clause_on_obj: 0.907
se_PP_on_obj: 0.81
cn_embedded_under_if: 0.438
cn_after_if_clause: 0.133
cn_embedded_under_verb: 0.105
cn_disjunction: 0.204
cn_adverb: 0.014
ce_embedded_under_since: 0.61
ce_after_since_clause: 0.898
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.1198
subsequence: 0.2638
constituent: 0.1622

Entailed accuracy: 0.18193333333333334

Heuristic non-entailed results:
lexical_overlap: 0.8698
subsequence: 0.9018
constituent: 0.845

Non-Entailed accuracy: 0.8722

Overall accuracy: 0.5270666666666667

Subcase results:
ln_subject/object_swap: 0.777
ln_preposition: 0.966
ln_relative_clause: 0.957
ln_passive: 0.786
ln_conjunction: 0.863
le_relative_clause: 0.071
le_around_prepositional_phrase: 0.095
le_around_relative_clause: 0.111
le_conjunction: 0.193
le_passive: 0.129
sn_NP/S: 0.801
sn_PP_on_subject: 0.908
sn_relative_clause_on_subject: 0.935
sn_past_participle: 0.888
sn_NP/Z: 0.977
se_conjunction: 0.222
se_adjective: 0.448
se_understood_object: 0.346
se_relative_clause_on_obj: 0.188
se_PP_on_obj: 0.115
cn_embedded_under_if: 0.972
cn_after_if_clause: 0.913
cn_embedded_under_verb: 0.894
cn_disjunction: 0.873
cn_adverb: 0.573
ce_embedded_under_since: 0.075
ce_after_since_clause: 0.084
ce_embe






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7908
subsequence: 0.8552
constituent: 0.7386

Entailed accuracy: 0.7948666666666667

Heuristic non-entailed results:
lexical_overlap: 0.1898
subsequence: 0.2832
constituent: 0.2192

Non-Entailed accuracy: 0.23073333333333335

Overall accuracy: 0.5128

Subcase results:
ln_subject/object_swap: 0.106
ln_preposition: 0.328
ln_relative_clause: 0.265
ln_passive: 0.08
ln_conjunction: 0.17
le_relative_clause: 0.728
le_around_prepositional_phrase: 0.74
le_around_relative_clause: 0.786
le_conjunction: 0.848
le_passive: 0.852
sn_NP/S: 0.224
sn_PP_on_subject: 0.311
sn_relative_clause_on_subject: 0.299
sn_past_participle: 0.138
sn_NP/Z: 0.444
se_conjunction: 0.835
se_adjective: 0.951
se_understood_object: 0.935
se_relative_clause_on_obj: 0.806
se_PP_on_obj: 0.749
cn_embedded_under_if: 0.379
cn_after_if_clause: 0.259
cn_embedded_under_verb: 0.15
cn_disjunction: 0.276
cn_adverb: 0.032
ce_embedded_under_since: 0.598
ce_after_since_clause: 0.695
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8896
subsequence: 0.9298
constituent: 0.7906

Entailed accuracy: 0.87

Heuristic non-entailed results:
lexical_overlap: 0.1236
subsequence: 0.1898
constituent: 0.2368

Non-Entailed accuracy: 0.1834

Overall accuracy: 0.5267

Subcase results:
ln_subject/object_swap: 0.048
ln_preposition: 0.189
ln_relative_clause: 0.219
ln_passive: 0.027
ln_conjunction: 0.135
le_relative_clause: 0.774
le_around_prepositional_phrase: 0.886
le_around_relative_clause: 0.906
le_conjunction: 0.946
le_passive: 0.936
sn_NP/S: 0.212
sn_PP_on_subject: 0.081
sn_relative_clause_on_subject: 0.096
sn_past_participle: 0.272
sn_NP/Z: 0.288
se_conjunction: 0.955
se_adjective: 0.996
se_understood_object: 0.945
se_relative_clause_on_obj: 0.874
se_PP_on_obj: 0.879
cn_embedded_under_if: 0.663
cn_after_if_clause: 0.225
cn_embedded_under_verb: 0.099
cn_disjunction: 0.159
cn_adverb: 0.038
ce_embedded_under_since: 0.442
ce_after_since_clause: 0.832
ce_embedded_under_verb: 0.892
ce_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2608
subsequence: 0.4226
constituent: 0.234

Entailed accuracy: 0.3058

Heuristic non-entailed results:
lexical_overlap: 0.7172
subsequence: 0.8158
constituent: 0.7944

Non-Entailed accuracy: 0.7758

Overall accuracy: 0.5408

Subcase results:
ln_subject/object_swap: 0.544
ln_preposition: 0.876
ln_relative_clause: 0.874
ln_passive: 0.569
ln_conjunction: 0.723
le_relative_clause: 0.143
le_around_prepositional_phrase: 0.222
le_around_relative_clause: 0.308
le_conjunction: 0.326
le_passive: 0.305
sn_NP/S: 0.668
sn_PP_on_subject: 0.767
sn_relative_clause_on_subject: 0.841
sn_past_participle: 0.841
sn_NP/Z: 0.962
se_conjunction: 0.36
se_adjective: 0.769
se_understood_object: 0.413
se_relative_clause_on_obj: 0.327
se_PP_on_obj: 0.244
cn_embedded_under_if: 0.949
cn_after_if_clause: 0.904
cn_embedded_under_verb: 0.88
cn_disjunction: 0.797
cn_adverb: 0.442
ce_embedded_under_since: 0.059
ce_after_since_clause: 0.116
ce_embedded_under_verb: 0.152
ce_c






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6112
subsequence: 0.6414
constituent: 0.4584

Entailed accuracy: 0.5703333333333334

Heuristic non-entailed results:
lexical_overlap: 0.377
subsequence: 0.5146
constituent: 0.4662

Non-Entailed accuracy: 0.4526

Overall accuracy: 0.5114666666666666

Subcase results:
ln_subject/object_swap: 0.166
ln_preposition: 0.566
ln_relative_clause: 0.567
ln_passive: 0.191
ln_conjunction: 0.395
le_relative_clause: 0.418
le_around_prepositional_phrase: 0.615
le_around_relative_clause: 0.636
le_conjunction: 0.681
le_passive: 0.706
sn_NP/S: 0.544
sn_PP_on_subject: 0.352
sn_relative_clause_on_subject: 0.451
sn_past_participle: 0.477
sn_NP/Z: 0.749
se_conjunction: 0.676
se_adjective: 0.956
se_understood_object: 0.682
se_relative_clause_on_obj: 0.469
se_PP_on_obj: 0.424
cn_embedded_under_if: 0.856
cn_after_if_clause: 0.562
cn_embedded_under_verb: 0.337
cn_disjunction: 0.488
cn_adverb: 0.088
ce_embedded_under_since: 0.131
ce_after_since_clause: 0.455
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:00,  1.95it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:58,  1.99it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5582
subsequence: 0.7562
constituent: 0.4398

Entailed accuracy: 0.5847333333333333

Heuristic non-entailed results:
lexical_overlap: 0.4174
subsequence: 0.5826
constituent: 0.53

Non-Entailed accuracy: 0.51

Overall accuracy: 0.5473666666666667

Subcase results:
ln_subject/object_swap: 0.158
ln_preposition: 0.65
ln_relative_clause: 0.64
ln_passive: 0.181
ln_conjunction: 0.458
le_relative_clause: 0.427
le_around_prepositional_phrase: 0.507
le_around_relative_clause: 0.569
le_conjunction: 0.629
le_passive: 0.659
sn_NP/S: 0.538
sn_PP_on_subject: 0.499
sn_relative_clause_on_subject: 0.588
sn_past_participle: 0.575
sn_NP/Z: 0.713
se_conjunction: 0.716
se_adjective: 0.934
se_understood_object: 0.883
se_relative_clause_on_obj: 0.642
se_PP_on_obj: 0.606
cn_embedded_under_if: 0.842
cn_after_if_clause: 0.612
cn_embedded_under_verb: 0.472
cn_disjunction: 0.598
cn_adverb: 0.126
ce_embedded_under_since: 0.199
ce_after_since_clause: 0.369
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8972
subsequence: 0.9342
constituent: 0.8746

Entailed accuracy: 0.902

Heuristic non-entailed results:
lexical_overlap: 0.1166
subsequence: 0.1182
constituent: 0.1586

Non-Entailed accuracy: 0.13113333333333332

Overall accuracy: 0.5165666666666666

Subcase results:
ln_subject/object_swap: 0.054
ln_preposition: 0.172
ln_relative_clause: 0.169
ln_passive: 0.068
ln_conjunction: 0.12
le_relative_clause: 0.807
le_around_prepositional_phrase: 0.903
le_around_relative_clause: 0.929
le_conjunction: 0.95
le_passive: 0.897
sn_NP/S: 0.137
sn_PP_on_subject: 0.055
sn_relative_clause_on_subject: 0.066
sn_past_participle: 0.214
sn_NP/Z: 0.119
se_conjunction: 0.968
se_adjective: 0.991
se_understood_object: 0.981
se_relative_clause_on_obj: 0.922
se_PP_on_obj: 0.809
cn_embedded_under_if: 0.413
cn_after_if_clause: 0.154
cn_embedded_under_verb: 0.059
cn_disjunction: 0.154
cn_adverb: 0.013
ce_embedded_under_since: 0.673
ce_after_since_clause: 0.889
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4664
subsequence: 0.6456
constituent: 0.4112

Entailed accuracy: 0.5077333333333334

Heuristic non-entailed results:
lexical_overlap: 0.532
subsequence: 0.591
constituent: 0.6114

Non-Entailed accuracy: 0.5781333333333334

Overall accuracy: 0.5429333333333334

Subcase results:
ln_subject/object_swap: 0.297
ln_preposition: 0.734
ln_relative_clause: 0.693
ln_passive: 0.427
ln_conjunction: 0.509
le_relative_clause: 0.331
le_around_prepositional_phrase: 0.425
le_around_relative_clause: 0.561
le_conjunction: 0.585
le_passive: 0.43
sn_NP/S: 0.484
sn_PP_on_subject: 0.487
sn_relative_clause_on_subject: 0.594
sn_past_participle: 0.667
sn_NP/Z: 0.723
se_conjunction: 0.671
se_adjective: 0.925
se_understood_object: 0.723
se_relative_clause_on_obj: 0.592
se_PP_on_obj: 0.317
cn_embedded_under_if: 0.875
cn_after_if_clause: 0.807
cn_embedded_under_verb: 0.604
cn_disjunction: 0.651
cn_adverb: 0.12
ce_embedded_under_since: 0.185
ce_after_since_clause: 0.255






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7428
subsequence: 0.7622
constituent: 0.6154

Entailed accuracy: 0.7068

Heuristic non-entailed results:
lexical_overlap: 0.267
subsequence: 0.3566
constituent: 0.3338

Non-Entailed accuracy: 0.3191333333333333

Overall accuracy: 0.5129666666666667

Subcase results:
ln_subject/object_swap: 0.108
ln_preposition: 0.396
ln_relative_clause: 0.364
ln_passive: 0.192
ln_conjunction: 0.275
le_relative_clause: 0.61
le_around_prepositional_phrase: 0.738
le_around_relative_clause: 0.808
le_conjunction: 0.783
le_passive: 0.775
sn_NP/S: 0.431
sn_PP_on_subject: 0.199
sn_relative_clause_on_subject: 0.237
sn_past_participle: 0.385
sn_NP/Z: 0.531
se_conjunction: 0.812
se_adjective: 0.952
se_understood_object: 0.845
se_relative_clause_on_obj: 0.686
se_PP_on_obj: 0.516
cn_embedded_under_if: 0.681
cn_after_if_clause: 0.435
cn_embedded_under_verb: 0.139
cn_disjunction: 0.377
cn_adverb: 0.037
ce_embedded_under_since: 0.314
ce_after_since_clause: 0.585
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7848
subsequence: 0.852
constituent: 0.6984

Entailed accuracy: 0.7784

Heuristic non-entailed results:
lexical_overlap: 0.2346
subsequence: 0.2406
constituent: 0.2968

Non-Entailed accuracy: 0.25733333333333336

Overall accuracy: 0.5178666666666667

Subcase results:
ln_subject/object_swap: 0.07
ln_preposition: 0.41
ln_relative_clause: 0.384
ln_passive: 0.053
ln_conjunction: 0.256
le_relative_clause: 0.676
le_around_prepositional_phrase: 0.742
le_around_relative_clause: 0.821
le_conjunction: 0.784
le_passive: 0.901
sn_NP/S: 0.258
sn_PP_on_subject: 0.179
sn_relative_clause_on_subject: 0.249
sn_past_participle: 0.295
sn_NP/Z: 0.222
se_conjunction: 0.871
se_adjective: 0.974
se_understood_object: 0.969
se_relative_clause_on_obj: 0.837
se_PP_on_obj: 0.609
cn_embedded_under_if: 0.493
cn_after_if_clause: 0.413
cn_embedded_under_verb: 0.129
cn_disjunction: 0.43
cn_adverb: 0.019
ce_embedded_under_since: 0.549
ce_after_since_clause: 0.606
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:00,  1.95it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:58,  2.00it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5858
subsequence: 0.6928
constituent: 0.4788

Entailed accuracy: 0.5858

Heuristic non-entailed results:
lexical_overlap: 0.3796
subsequence: 0.4868
constituent: 0.529

Non-Entailed accuracy: 0.46513333333333334

Overall accuracy: 0.5254666666666666

Subcase results:
ln_subject/object_swap: 0.18
ln_preposition: 0.584
ln_relative_clause: 0.555
ln_passive: 0.161
ln_conjunction: 0.418
le_relative_clause: 0.439
le_around_prepositional_phrase: 0.524
le_around_relative_clause: 0.583
le_conjunction: 0.561
le_passive: 0.822
sn_NP/S: 0.553
sn_PP_on_subject: 0.314
sn_relative_clause_on_subject: 0.406
sn_past_participle: 0.568
sn_NP/Z: 0.593
se_conjunction: 0.709
se_adjective: 0.924
se_understood_object: 0.784
se_relative_clause_on_obj: 0.59
se_PP_on_obj: 0.457
cn_embedded_under_if: 0.869
cn_after_if_clause: 0.635
cn_embedded_under_verb: 0.437
cn_disjunction: 0.549
cn_adverb: 0.155
ce_embedded_under_since: 0.16
ce_after_since_clause: 0.4
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9178
subsequence: 0.9566
constituent: 0.9204

Entailed accuracy: 0.9316

Heuristic non-entailed results:
lexical_overlap: 0.0698
subsequence: 0.1278
constituent: 0.1482

Non-Entailed accuracy: 0.11526666666666667

Overall accuracy: 0.5234333333333333

Subcase results:
ln_subject/object_swap: 0.015
ln_preposition: 0.101
ln_relative_clause: 0.106
ln_passive: 0.086
ln_conjunction: 0.041
le_relative_clause: 0.839
le_around_prepositional_phrase: 0.927
le_around_relative_clause: 0.93
le_conjunction: 0.981
le_passive: 0.912
sn_NP/S: 0.117
sn_PP_on_subject: 0.057
sn_relative_clause_on_subject: 0.065
sn_past_participle: 0.275
sn_NP/Z: 0.125
se_conjunction: 0.988
se_adjective: 0.996
se_understood_object: 0.982
se_relative_clause_on_obj: 0.954
se_PP_on_obj: 0.863
cn_embedded_under_if: 0.323
cn_after_if_clause: 0.135
cn_embedded_under_verb: 0.067
cn_disjunction: 0.198
cn_adverb: 0.018
ce_embedded_under_since: 0.838
ce_after_since_clause: 0.928
ce_embe






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4022
subsequence: 0.5314
constituent: 0.3694

Entailed accuracy: 0.43433333333333335

Heuristic non-entailed results:
lexical_overlap: 0.5348
subsequence: 0.687
constituent: 0.6948

Non-Entailed accuracy: 0.6388666666666667

Overall accuracy: 0.5366

Subcase results:
ln_subject/object_swap: 0.299
ln_preposition: 0.726
ln_relative_clause: 0.622
ln_passive: 0.589
ln_conjunction: 0.438
le_relative_clause: 0.312
le_around_prepositional_phrase: 0.308
le_around_relative_clause: 0.435
le_conjunction: 0.592
le_passive: 0.364
sn_NP/S: 0.487
sn_PP_on_subject: 0.666
sn_relative_clause_on_subject: 0.722
sn_past_participle: 0.745
sn_NP/Z: 0.815
se_conjunction: 0.604
se_adjective: 0.776
se_understood_object: 0.507
se_relative_clause_on_obj: 0.517
se_PP_on_obj: 0.253
cn_embedded_under_if: 0.84
cn_after_if_clause: 0.758
cn_embedded_under_verb: 0.751
cn_disjunction: 0.799
cn_adverb: 0.326
ce_embedded_under_since: 0.287
ce_after_since_clause: 0.231
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6228
subsequence: 0.6678
constituent: 0.5192

Entailed accuracy: 0.6032666666666666

Heuristic non-entailed results:
lexical_overlap: 0.3174
subsequence: 0.483
constituent: 0.4124

Non-Entailed accuracy: 0.40426666666666666

Overall accuracy: 0.5037666666666667

Subcase results:
ln_subject/object_swap: 0.164
ln_preposition: 0.434
ln_relative_clause: 0.404
ln_passive: 0.312
ln_conjunction: 0.273
le_relative_clause: 0.569
le_around_prepositional_phrase: 0.568
le_around_relative_clause: 0.65
le_conjunction: 0.663
le_passive: 0.664
sn_NP/S: 0.449
sn_PP_on_subject: 0.458
sn_relative_clause_on_subject: 0.489
sn_past_participle: 0.42
sn_NP/Z: 0.599
se_conjunction: 0.687
se_adjective: 0.86
se_understood_object: 0.727
se_relative_clause_on_obj: 0.64
se_PP_on_obj: 0.425
cn_embedded_under_if: 0.568
cn_after_if_clause: 0.488
cn_embedded_under_verb: 0.396
cn_disjunction: 0.492
cn_adverb: 0.118
ce_embedded_under_since: 0.414
ce_after_since_clause: 0.431






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7052
subsequence: 0.8272
constituent: 0.679

Entailed accuracy: 0.7371333333333333

Heuristic non-entailed results:
lexical_overlap: 0.2636
subsequence: 0.3328
constituent: 0.3154

Non-Entailed accuracy: 0.30393333333333333

Overall accuracy: 0.5205333333333333

Subcase results:
ln_subject/object_swap: 0.108
ln_preposition: 0.416
ln_relative_clause: 0.335
ln_passive: 0.226
ln_conjunction: 0.233
le_relative_clause: 0.693
le_around_prepositional_phrase: 0.628
le_around_relative_clause: 0.713
le_conjunction: 0.78
le_passive: 0.712
sn_NP/S: 0.302
sn_PP_on_subject: 0.35
sn_relative_clause_on_subject: 0.397
sn_past_participle: 0.367
sn_NP/Z: 0.248
se_conjunction: 0.85
se_adjective: 0.933
se_understood_object: 0.92
se_relative_clause_on_obj: 0.846
se_PP_on_obj: 0.587
cn_embedded_under_if: 0.383
cn_after_if_clause: 0.423
cn_embedded_under_verb: 0.218
cn_disjunction: 0.495
cn_adverb: 0.058
ce_embedded_under_since: 0.693
ce_after_since_clause: 0.533






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5602
subsequence: 0.681
constituent: 0.4832

Entailed accuracy: 0.5748

Heuristic non-entailed results:
lexical_overlap: 0.366
subsequence: 0.5404
constituent: 0.5712

Non-Entailed accuracy: 0.4925333333333333

Overall accuracy: 0.5336666666666666

Subcase results:
ln_subject/object_swap: 0.138
ln_preposition: 0.53
ln_relative_clause: 0.545
ln_passive: 0.334
ln_conjunction: 0.283
le_relative_clause: 0.46
le_around_prepositional_phrase: 0.474
le_around_relative_clause: 0.556
le_conjunction: 0.669
le_passive: 0.642
sn_NP/S: 0.547
sn_PP_on_subject: 0.441
sn_relative_clause_on_subject: 0.507
sn_past_participle: 0.669
sn_NP/Z: 0.538
se_conjunction: 0.769
se_adjective: 0.915
se_understood_object: 0.693
se_relative_clause_on_obj: 0.55
se_PP_on_obj: 0.478
cn_embedded_under_if: 0.817
cn_after_if_clause: 0.679
cn_embedded_under_verb: 0.472
cn_disjunction: 0.611
cn_adverb: 0.277
ce_embedded_under_since: 0.27
ce_after_since_clause: 0.396
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.703
subsequence: 0.8038
constituent: 0.6564

Entailed accuracy: 0.7210666666666666

Heuristic non-entailed results:
lexical_overlap: 0.2776
subsequence: 0.364
constituent: 0.3698

Non-Entailed accuracy: 0.33713333333333334

Overall accuracy: 0.5291

Subcase results:
ln_subject/object_swap: 0.068
ln_preposition: 0.424
ln_relative_clause: 0.375
ln_passive: 0.324
ln_conjunction: 0.197
le_relative_clause: 0.606
le_around_prepositional_phrase: 0.627
le_around_relative_clause: 0.726
le_conjunction: 0.838
le_passive: 0.718
sn_NP/S: 0.357
sn_PP_on_subject: 0.285
sn_relative_clause_on_subject: 0.318
sn_past_participle: 0.577
sn_NP/Z: 0.283
se_conjunction: 0.874
se_adjective: 0.929
se_understood_object: 0.91
se_relative_clause_on_obj: 0.767
se_PP_on_obj: 0.539
cn_embedded_under_if: 0.58
cn_after_if_clause: 0.469
cn_embedded_under_verb: 0.282
cn_disjunction: 0.462
cn_adverb: 0.056
ce_embedded_under_since: 0.573
ce_after_since_clause: 0.499
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.621
subsequence: 0.6606
constituent: 0.5884

Entailed accuracy: 0.6233333333333333

Heuristic non-entailed results:
lexical_overlap: 0.409
subsequence: 0.4946
constituent: 0.472

Non-Entailed accuracy: 0.45853333333333335

Overall accuracy: 0.5409333333333334

Subcase results:
ln_subject/object_swap: 0.375
ln_preposition: 0.471
ln_relative_clause: 0.503
ln_passive: 0.295
ln_conjunction: 0.401
le_relative_clause: 0.592
le_around_prepositional_phrase: 0.587
le_around_relative_clause: 0.616
le_conjunction: 0.585
le_passive: 0.725
sn_NP/S: 0.229
sn_PP_on_subject: 0.395
sn_relative_clause_on_subject: 0.427
sn_past_participle: 0.61
sn_NP/Z: 0.812
se_conjunction: 0.632
se_adjective: 0.794
se_understood_object: 0.674
se_relative_clause_on_obj: 0.639
se_PP_on_obj: 0.564
cn_embedded_under_if: 0.709
cn_after_if_clause: 0.548
cn_embedded_under_verb: 0.541
cn_disjunction: 0.375
cn_adverb: 0.187
ce_embedded_under_since: 0.418
ce_after_since_clause: 0.58






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:52,  2.

Heuristic entailed results:
lexical_overlap: 0.7692
subsequence: 0.8656
constituent: 0.7008

Entailed accuracy: 0.7785333333333333

Heuristic non-entailed results:
lexical_overlap: 0.3216
subsequence: 0.2914
constituent: 0.3306

Non-Entailed accuracy: 0.31453333333333333

Overall accuracy: 0.5465333333333333

Subcase results:
ln_subject/object_swap: 0.25
ln_preposition: 0.409
ln_relative_clause: 0.408
ln_passive: 0.158
ln_conjunction: 0.383
le_relative_clause: 0.677
le_around_prepositional_phrase: 0.808
le_around_relative_clause: 0.857
le_conjunction: 0.789
le_passive: 0.715
sn_NP/S: 0.168
sn_PP_on_subject: 0.214
sn_relative_clause_on_subject: 0.254
sn_past_participle: 0.237
sn_NP/Z: 0.584
se_conjunction: 0.779
se_adjective: 0.923
se_understood_object: 0.954
se_relative_clause_on_obj: 0.827
se_PP_on_obj: 0.845
cn_embedded_under_if: 0.59
cn_after_if_clause: 0.398
cn_embedded_under_verb: 0.267
cn_disjunction: 0.327
cn_adverb: 0.071
ce_embedded_under_since: 0.488
ce_after_since_clause: 0.






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.12it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0442
subsequence: 0.1062
constituent: 0.0826

Entailed accuracy: 0.07766666666666666

Heuristic non-entailed results:
lexical_overlap: 0.9564
subsequence: 0.9474
constituent: 0.9172

Non-Entailed accuracy: 0.9403333333333334

Overall accuracy: 0.509

Subcase results:
ln_subject/object_swap: 0.97
ln_preposition: 0.989
ln_relative_clause: 0.99
ln_passive: 0.849
ln_conjunction: 0.984
le_relative_clause: 0.029
le_around_prepositional_phrase: 0.036
le_around_relative_clause: 0.051
le_conjunction: 0.028
le_passive: 0.077
sn_NP/S: 0.906
sn_PP_on_subject: 0.945
sn_relative_clause_on_subject: 0.951
sn_past_participle: 0.935
sn_NP/Z: 1.0
se_conjunction: 0.04
se_adjective: 0.345
se_understood_object: 0.071
se_relative_clause_on_obj: 0.053
se_PP_on_obj: 0.022
cn_embedded_under_if: 0.995
cn_after_if_clause: 0.969
cn_embedded_under_verb: 0.928
cn_disjunction: 0.98
cn_adverb: 0.714
ce_embedded_under_since: 0.001
ce_after_since_clause: 0.041
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5644
subsequence: 0.7032
constituent: 0.5134

Entailed accuracy: 0.5936666666666667

Heuristic non-entailed results:
lexical_overlap: 0.4758
subsequence: 0.5048
constituent: 0.5118

Non-Entailed accuracy: 0.49746666666666667

Overall accuracy: 0.5455666666666666

Subcase results:
ln_subject/object_swap: 0.392
ln_preposition: 0.625
ln_relative_clause: 0.62
ln_passive: 0.238
ln_conjunction: 0.504
le_relative_clause: 0.485
le_around_prepositional_phrase: 0.529
le_around_relative_clause: 0.579
le_conjunction: 0.583
le_passive: 0.646
sn_NP/S: 0.469
sn_PP_on_subject: 0.345
sn_relative_clause_on_subject: 0.473
sn_past_participle: 0.555
sn_NP/Z: 0.682
se_conjunction: 0.701
se_adjective: 0.877
se_understood_object: 0.792
se_relative_clause_on_obj: 0.619
se_PP_on_obj: 0.527
cn_embedded_under_if: 0.8
cn_after_if_clause: 0.543
cn_embedded_under_verb: 0.543
cn_disjunction: 0.567
cn_adverb: 0.106
ce_embedded_under_since: 0.266
ce_after_since_clause: 0.5






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0348
subsequence: 0.0996
constituent: 0.0658

Entailed accuracy: 0.06673333333333334

Heuristic non-entailed results:
lexical_overlap: 0.9622
subsequence: 0.968
constituent: 0.9406

Non-Entailed accuracy: 0.9569333333333333

Overall accuracy: 0.5118333333333334

Subcase results:
ln_subject/object_swap: 0.953
ln_preposition: 0.988
ln_relative_clause: 0.994
ln_passive: 0.897
ln_conjunction: 0.979
le_relative_clause: 0.024
le_around_prepositional_phrase: 0.017
le_around_relative_clause: 0.026
le_conjunction: 0.039
le_passive: 0.068
sn_NP/S: 0.949
sn_PP_on_subject: 0.955
sn_relative_clause_on_subject: 0.977
sn_past_participle: 0.961
sn_NP/Z: 0.998
se_conjunction: 0.052
se_adjective: 0.291
se_understood_object: 0.093
se_relative_clause_on_obj: 0.035
se_PP_on_obj: 0.027
cn_embedded_under_if: 0.991
cn_after_if_clause: 0.977
cn_embedded_under_verb: 0.985
cn_disjunction: 0.983
cn_adverb: 0.767
ce_embedded_under_since: 0.019
ce_after_since_clause: 0






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:59,  1.98it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.02it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5104
subsequence: 0.5924
constituent: 0.427

Entailed accuracy: 0.5099333333333333

Heuristic non-entailed results:
lexical_overlap: 0.4506
subsequence: 0.5236
constituent: 0.5012

Non-Entailed accuracy: 0.4918

Overall accuracy: 0.5008666666666667

Subcase results:
ln_subject/object_swap: 0.35
ln_preposition: 0.572
ln_relative_clause: 0.555
ln_passive: 0.253
ln_conjunction: 0.523
le_relative_clause: 0.482
le_around_prepositional_phrase: 0.436
le_around_relative_clause: 0.44
le_conjunction: 0.479
le_passive: 0.715
sn_NP/S: 0.584
sn_PP_on_subject: 0.48
sn_relative_clause_on_subject: 0.517
sn_past_participle: 0.334
sn_NP/Z: 0.703
se_conjunction: 0.504
se_adjective: 0.786
se_understood_object: 0.729
se_relative_clause_on_obj: 0.49
se_PP_on_obj: 0.453
cn_embedded_under_if: 0.669
cn_after_if_clause: 0.548
cn_embedded_under_verb: 0.482
cn_disjunction: 0.604
cn_adverb: 0.203
ce_embedded_under_since: 0.3
ce_after_since_clause: 0.375
ce_embedded_un






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.12it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.12it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.12it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7512
subsequence: 0.8358
constituent: 0.593

Entailed accuracy: 0.7266666666666667

Heuristic non-entailed results:
lexical_overlap: 0.2956
subsequence: 0.3414
constituent: 0.454

Non-Entailed accuracy: 0.3636666666666667

Overall accuracy: 0.5451666666666667

Subcase results:
ln_subject/object_swap: 0.222
ln_preposition: 0.372
ln_relative_clause: 0.435
ln_passive: 0.09
ln_conjunction: 0.359
le_relative_clause: 0.629
le_around_prepositional_phrase: 0.768
le_around_relative_clause: 0.785
le_conjunction: 0.728
le_passive: 0.846
sn_NP/S: 0.241
sn_PP_on_subject: 0.208
sn_relative_clause_on_subject: 0.296
sn_past_participle: 0.485
sn_NP/Z: 0.477
se_conjunction: 0.772
se_adjective: 0.944
se_understood_object: 0.877
se_relative_clause_on_obj: 0.754
se_PP_on_obj: 0.832
cn_embedded_under_if: 0.777
cn_after_if_clause: 0.497
cn_embedded_under_verb: 0.456
cn_disjunction: 0.37
cn_adverb: 0.17
ce_embedded_under_since: 0.315
ce_after_since_clause: 0.564







Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0804
subsequence: 0.1796
constituent: 0.1086

Entailed accuracy: 0.12286666666666667

Heuristic non-entailed results:
lexical_overlap: 0.9084
subsequence: 0.9262
constituent: 0.9338

Non-Entailed accuracy: 0.9228

Overall accuracy: 0.5228333333333334

Subcase results:
ln_subject/object_swap: 0.868
ln_preposition: 0.963
ln_relative_clause: 0.976
ln_passive: 0.78
ln_conjunction: 0.955
le_relative_clause: 0.039
le_around_prepositional_phrase: 0.079
le_around_relative_clause: 0.096
le_conjunction: 0.071
le_passive: 0.117
sn_NP/S: 0.865
sn_PP_on_subject: 0.899
sn_relative_clause_on_subject: 0.95
sn_past_participle: 0.928
sn_NP/Z: 0.989
se_conjunction: 0.088
se_adjective: 0.422
se_understood_object: 0.183
se_relative_clause_on_obj: 0.091
se_PP_on_obj: 0.114
cn_embedded_under_if: 0.985
cn_after_if_clause: 0.974
cn_embedded_under_verb: 0.996
cn_disjunction: 0.943
cn_adverb: 0.771
ce_embedded_under_since: 0.011
ce_after_since_clause: 0.03
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:06,  1.75it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:01<01:02,  1.85it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:59,  1.92it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:57,  1.98it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  2.02it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.05it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3386
subsequence: 0.4174
constituent: 0.2436

Entailed accuracy: 0.3332

Heuristic non-entailed results:
lexical_overlap: 0.6134
subsequence: 0.7278
constituent: 0.7268

Non-Entailed accuracy: 0.6893333333333334

Overall accuracy: 0.5112666666666666

Subcase results:
ln_subject/object_swap: 0.461
ln_preposition: 0.74
ln_relative_clause: 0.796
ln_passive: 0.345
ln_conjunction: 0.725
le_relative_clause: 0.205
le_around_prepositional_phrase: 0.35
le_around_relative_clause: 0.331
le_conjunction: 0.274
le_passive: 0.533
sn_NP/S: 0.755
sn_PP_on_subject: 0.592
sn_relative_clause_on_subject: 0.726
sn_past_participle: 0.655
sn_NP/Z: 0.911
se_conjunction: 0.32
se_adjective: 0.703
se_understood_object: 0.507
se_relative_clause_on_obj: 0.248
se_PP_on_obj: 0.309
cn_embedded_under_if: 0.941
cn_after_if_clause: 0.822
cn_embedded_under_verb: 0.786
cn_disjunction: 0.736
cn_adverb: 0.349
ce_embedded_under_since: 0.05
ce_after_since_clause: 0.192
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3564
subsequence: 0.5168
constituent: 0.2736

Entailed accuracy: 0.38226666666666664

Heuristic non-entailed results:
lexical_overlap: 0.6026
subsequence: 0.716
constituent: 0.734

Non-Entailed accuracy: 0.6842

Overall accuracy: 0.5332333333333333

Subcase results:
ln_subject/object_swap: 0.455
ln_preposition: 0.752
ln_relative_clause: 0.766
ln_passive: 0.299
ln_conjunction: 0.741
le_relative_clause: 0.287
le_around_prepositional_phrase: 0.316
le_around_relative_clause: 0.334
le_conjunction: 0.269
le_passive: 0.576
sn_NP/S: 0.754
sn_PP_on_subject: 0.634
sn_relative_clause_on_subject: 0.728
sn_past_participle: 0.64
sn_NP/Z: 0.824
se_conjunction: 0.347
se_adjective: 0.771
se_understood_object: 0.644
se_relative_clause_on_obj: 0.379
se_PP_on_obj: 0.443
cn_embedded_under_if: 0.92
cn_after_if_clause: 0.779
cn_embedded_under_verb: 0.811
cn_disjunction: 0.778
cn_adverb: 0.382
ce_embedded_under_since: 0.108
ce_after_since_clause: 0.218
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8322
subsequence: 0.895
constituent: 0.7634

Entailed accuracy: 0.8302

Heuristic non-entailed results:
lexical_overlap: 0.2212
subsequence: 0.2232
constituent: 0.243

Non-Entailed accuracy: 0.22913333333333333

Overall accuracy: 0.5296666666666666

Subcase results:
ln_subject/object_swap: 0.148
ln_preposition: 0.305
ln_relative_clause: 0.352
ln_passive: 0.073
ln_conjunction: 0.228
le_relative_clause: 0.725
le_around_prepositional_phrase: 0.832
le_around_relative_clause: 0.877
le_conjunction: 0.852
le_passive: 0.875
sn_NP/S: 0.096
sn_PP_on_subject: 0.103
sn_relative_clause_on_subject: 0.217
sn_past_participle: 0.458
sn_NP/Z: 0.242
se_conjunction: 0.904
se_adjective: 0.974
se_understood_object: 0.892
se_relative_clause_on_obj: 0.884
se_PP_on_obj: 0.821
cn_embedded_under_if: 0.507
cn_after_if_clause: 0.303
cn_embedded_under_verb: 0.193
cn_disjunction: 0.186
cn_adverb: 0.026
ce_embedded_under_since: 0.59
ce_after_since_clause: 0.712
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2592
subsequence: 0.3774
constituent: 0.2454

Entailed accuracy: 0.294

Heuristic non-entailed results:
lexical_overlap: 0.755
subsequence: 0.7806
constituent: 0.7592

Non-Entailed accuracy: 0.7649333333333334

Overall accuracy: 0.5294666666666666

Subcase results:
ln_subject/object_swap: 0.645
ln_preposition: 0.884
ln_relative_clause: 0.879
ln_passive: 0.585
ln_conjunction: 0.782
le_relative_clause: 0.194
le_around_prepositional_phrase: 0.219
le_around_relative_clause: 0.322
le_conjunction: 0.268
le_passive: 0.293
sn_NP/S: 0.589
sn_PP_on_subject: 0.701
sn_relative_clause_on_subject: 0.811
sn_past_participle: 0.874
sn_NP/Z: 0.928
se_conjunction: 0.381
se_adjective: 0.705
se_understood_object: 0.302
se_relative_clause_on_obj: 0.303
se_PP_on_obj: 0.196
cn_embedded_under_if: 0.927
cn_after_if_clause: 0.909
cn_embedded_under_verb: 0.869
cn_disjunction: 0.744
cn_adverb: 0.347
ce_embedded_under_since: 0.052
ce_after_since_clause: 0.091
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:57,  1.99it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:55,  2.03it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.05it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5256
subsequence: 0.5842
constituent: 0.3868

Entailed accuracy: 0.4988666666666667

Heuristic non-entailed results:
lexical_overlap: 0.4782
subsequence: 0.5682
constituent: 0.5356

Non-Entailed accuracy: 0.5273333333333333

Overall accuracy: 0.5131

Subcase results:
ln_subject/object_swap: 0.285
ln_preposition: 0.615
ln_relative_clause: 0.628
ln_passive: 0.307
ln_conjunction: 0.556
le_relative_clause: 0.404
le_around_prepositional_phrase: 0.504
le_around_relative_clause: 0.588
le_conjunction: 0.518
le_passive: 0.614
sn_NP/S: 0.49
sn_PP_on_subject: 0.43
sn_relative_clause_on_subject: 0.489
sn_past_participle: 0.625
sn_NP/Z: 0.807
se_conjunction: 0.517
se_adjective: 0.82
se_understood_object: 0.617
se_relative_clause_on_obj: 0.527
se_PP_on_obj: 0.44
cn_embedded_under_if: 0.816
cn_after_if_clause: 0.718
cn_embedded_under_verb: 0.491
cn_disjunction: 0.529
cn_adverb: 0.124
ce_embedded_under_since: 0.153
ce_after_since_clause: 0.308
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6716
subsequence: 0.7974
constituent: 0.5768

Entailed accuracy: 0.6819333333333333

Heuristic non-entailed results:
lexical_overlap: 0.3602
subsequence: 0.3402
constituent: 0.3842

Non-Entailed accuracy: 0.3615333333333333

Overall accuracy: 0.5217333333333334

Subcase results:
ln_subject/object_swap: 0.258
ln_preposition: 0.518
ln_relative_clause: 0.477
ln_passive: 0.11
ln_conjunction: 0.438
le_relative_clause: 0.622
le_around_prepositional_phrase: 0.635
le_around_relative_clause: 0.701
le_conjunction: 0.624
le_passive: 0.776
sn_NP/S: 0.265
sn_PP_on_subject: 0.298
sn_relative_clause_on_subject: 0.388
sn_past_participle: 0.443
sn_NP/Z: 0.307
se_conjunction: 0.712
se_adjective: 0.915
se_understood_object: 0.926
se_relative_clause_on_obj: 0.788
se_PP_on_obj: 0.646
cn_embedded_under_if: 0.518
cn_after_if_clause: 0.54
cn_embedded_under_verb: 0.31
cn_disjunction: 0.478
cn_adverb: 0.075
ce_embedded_under_since: 0.532
ce_after_since_clause: 0.42






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.418
subsequence: 0.5992
constituent: 0.3256

Entailed accuracy: 0.4476

Heuristic non-entailed results:
lexical_overlap: 0.5392
subsequence: 0.6264
constituent: 0.6802

Non-Entailed accuracy: 0.6152666666666666

Overall accuracy: 0.5314333333333333

Subcase results:
ln_subject/object_swap: 0.416
ln_preposition: 0.663
ln_relative_clause: 0.7
ln_passive: 0.273
ln_conjunction: 0.644
le_relative_clause: 0.319
le_around_prepositional_phrase: 0.387
le_around_relative_clause: 0.395
le_conjunction: 0.352
le_passive: 0.637
sn_NP/S: 0.567
sn_PP_on_subject: 0.499
sn_relative_clause_on_subject: 0.627
sn_past_participle: 0.721
sn_NP/Z: 0.718
se_conjunction: 0.451
se_adjective: 0.817
se_understood_object: 0.771
se_relative_clause_on_obj: 0.451
se_PP_on_obj: 0.506
cn_embedded_under_if: 0.881
cn_after_if_clause: 0.809
cn_embedded_under_verb: 0.772
cn_disjunction: 0.633
cn_adverb: 0.306
ce_embedded_under_since: 0.127
ce_after_since_clause: 0.191
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.932
subsequence: 0.9464
constituent: 0.908

Entailed accuracy: 0.9288

Heuristic non-entailed results:
lexical_overlap: 0.0464
subsequence: 0.1406
constituent: 0.126

Non-Entailed accuracy: 0.10433333333333333

Overall accuracy: 0.5165666666666666

Subcase results:
ln_subject/object_swap: 0.032
ln_preposition: 0.053
ln_relative_clause: 0.079
ln_passive: 0.026
ln_conjunction: 0.042
le_relative_clause: 0.847
le_around_prepositional_phrase: 0.952
le_around_relative_clause: 0.957
le_conjunction: 0.959
le_passive: 0.945
sn_NP/S: 0.093
sn_PP_on_subject: 0.034
sn_relative_clause_on_subject: 0.065
sn_past_participle: 0.386
sn_NP/Z: 0.125
se_conjunction: 0.96
se_adjective: 0.98
se_understood_object: 0.908
se_relative_clause_on_obj: 0.963
se_PP_on_obj: 0.921
cn_embedded_under_if: 0.24
cn_after_if_clause: 0.139
cn_embedded_under_verb: 0.095
cn_disjunction: 0.128
cn_adverb: 0.028
ce_embedded_under_since: 0.88
ce_after_since_clause: 0.889
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3724
subsequence: 0.458
constituent: 0.3268

Entailed accuracy: 0.3857333333333333

Heuristic non-entailed results:
lexical_overlap: 0.5916
subsequence: 0.6862
constituent: 0.719

Non-Entailed accuracy: 0.6656

Overall accuracy: 0.5256666666666666

Subcase results:
ln_subject/object_swap: 0.443
ln_preposition: 0.672
ln_relative_clause: 0.651
ln_passive: 0.586
ln_conjunction: 0.606
le_relative_clause: 0.311
le_around_prepositional_phrase: 0.348
le_around_relative_clause: 0.402
le_conjunction: 0.413
le_passive: 0.388
sn_NP/S: 0.519
sn_PP_on_subject: 0.582
sn_relative_clause_on_subject: 0.701
sn_past_participle: 0.804
sn_NP/Z: 0.825
se_conjunction: 0.473
se_adjective: 0.671
se_understood_object: 0.344
se_relative_clause_on_obj: 0.457
se_PP_on_obj: 0.345
cn_embedded_under_if: 0.784
cn_after_if_clause: 0.766
cn_embedded_under_verb: 0.846
cn_disjunction: 0.771
cn_adverb: 0.428
ce_embedded_under_since: 0.303
ce_after_since_clause: 0.185
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.573
subsequence: 0.6002
constituent: 0.4426

Entailed accuracy: 0.5386

Heuristic non-entailed results:
lexical_overlap: 0.3632
subsequence: 0.5278
constituent: 0.471

Non-Entailed accuracy: 0.454

Overall accuracy: 0.4963

Subcase results:
ln_subject/object_swap: 0.22
ln_preposition: 0.405
ln_relative_clause: 0.462
ln_passive: 0.308
ln_conjunction: 0.421
le_relative_clause: 0.492
le_around_prepositional_phrase: 0.569
le_around_relative_clause: 0.583
le_conjunction: 0.529
le_passive: 0.692
sn_NP/S: 0.503
sn_PP_on_subject: 0.397
sn_relative_clause_on_subject: 0.496
sn_past_participle: 0.516
sn_NP/Z: 0.727
se_conjunction: 0.542
se_adjective: 0.817
se_understood_object: 0.572
se_relative_clause_on_obj: 0.566
se_PP_on_obj: 0.504
cn_embedded_under_if: 0.542
cn_after_if_clause: 0.532
cn_embedded_under_verb: 0.523
cn_disjunction: 0.562
cn_adverb: 0.196
ce_embedded_under_since: 0.39
ce_after_since_clause: 0.374
ce_embedded_under_verb: 0.379
ce_con






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:00,  1.93it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:58,  1.98it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.02it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.05it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7624
subsequence: 0.838
constituent: 0.668

Entailed accuracy: 0.7561333333333333

Heuristic non-entailed results:
lexical_overlap: 0.1956
subsequence: 0.2896
constituent: 0.2872

Non-Entailed accuracy: 0.2574666666666667

Overall accuracy: 0.5068

Subcase results:
ln_subject/object_swap: 0.101
ln_preposition: 0.26
ln_relative_clause: 0.243
ln_passive: 0.155
ln_conjunction: 0.219
le_relative_clause: 0.737
le_around_prepositional_phrase: 0.766
le_around_relative_clause: 0.778
le_conjunction: 0.746
le_passive: 0.785
sn_NP/S: 0.318
sn_PP_on_subject: 0.231
sn_relative_clause_on_subject: 0.301
sn_past_participle: 0.346
sn_NP/Z: 0.252
se_conjunction: 0.796
se_adjective: 0.928
se_understood_object: 0.867
se_relative_clause_on_obj: 0.862
se_PP_on_obj: 0.737
cn_embedded_under_if: 0.279
cn_after_if_clause: 0.383
cn_embedded_under_verb: 0.282
cn_disjunction: 0.451
cn_adverb: 0.041
ce_embedded_under_since: 0.728
ce_after_since_clause: 0.551
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  2.00it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:55,  2.03it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:54,  2.06it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.631
subsequence: 0.7012
constituent: 0.5076

Entailed accuracy: 0.6132666666666666

Heuristic non-entailed results:
lexical_overlap: 0.3016
subsequence: 0.473
constituent: 0.541

Non-Entailed accuracy: 0.43853333333333333

Overall accuracy: 0.5259

Subcase results:
ln_subject/object_swap: 0.202
ln_preposition: 0.337
ln_relative_clause: 0.456
ln_passive: 0.192
ln_conjunction: 0.321
le_relative_clause: 0.488
le_around_prepositional_phrase: 0.67
le_around_relative_clause: 0.578
le_conjunction: 0.658
le_passive: 0.761
sn_NP/S: 0.47
sn_PP_on_subject: 0.325
sn_relative_clause_on_subject: 0.456
sn_past_participle: 0.613
sn_NP/Z: 0.501
se_conjunction: 0.686
se_adjective: 0.858
se_understood_object: 0.681
se_relative_clause_on_obj: 0.559
se_PP_on_obj: 0.722
cn_embedded_under_if: 0.731
cn_after_if_clause: 0.591
cn_embedded_under_verb: 0.519
cn_disjunction: 0.485
cn_adverb: 0.379
ce_embedded_under_since: 0.36
ce_after_since_clause: 0.46
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6844
subsequence: 0.746
constituent: 0.601

Entailed accuracy: 0.6771333333333334

Heuristic non-entailed results:
lexical_overlap: 0.2976
subsequence: 0.373
constituent: 0.3492

Non-Entailed accuracy: 0.3399333333333333

Overall accuracy: 0.5085333333333333

Subcase results:
ln_subject/object_swap: 0.176
ln_preposition: 0.398
ln_relative_clause: 0.372
ln_passive: 0.237
ln_conjunction: 0.305
le_relative_clause: 0.599
le_around_prepositional_phrase: 0.65
le_around_relative_clause: 0.765
le_conjunction: 0.715
le_passive: 0.693
sn_NP/S: 0.29
sn_PP_on_subject: 0.293
sn_relative_clause_on_subject: 0.296
sn_past_participle: 0.664
sn_NP/Z: 0.322
se_conjunction: 0.798
se_adjective: 0.886
se_understood_object: 0.699
se_relative_clause_on_obj: 0.764
se_PP_on_obj: 0.583
cn_embedded_under_if: 0.434
cn_after_if_clause: 0.443
cn_embedded_under_verb: 0.411
cn_disjunction: 0.385
cn_adverb: 0.073
ce_embedded_under_since: 0.607
ce_after_since_clause: 0.459







Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8878
subsequence: 0.9486
constituent: 0.8648

Entailed accuracy: 0.9004

Heuristic non-entailed results:
lexical_overlap: 0.1348
subsequence: 0.1558
constituent: 0.1844

Non-Entailed accuracy: 0.15833333333333333

Overall accuracy: 0.5293666666666667

Subcase results:
ln_subject/object_swap: 0.156
ln_preposition: 0.157
ln_relative_clause: 0.214
ln_passive: 0.048
ln_conjunction: 0.099
le_relative_clause: 0.844
le_around_prepositional_phrase: 0.891
le_around_relative_clause: 0.893
le_conjunction: 0.945
le_passive: 0.866
sn_NP/S: 0.027
sn_PP_on_subject: 0.131
sn_relative_clause_on_subject: 0.176
sn_past_participle: 0.301
sn_NP/Z: 0.144
se_conjunction: 0.944
se_adjective: 0.955
se_understood_object: 0.952
se_relative_clause_on_obj: 0.948
se_PP_on_obj: 0.944
cn_embedded_under_if: 0.214
cn_after_if_clause: 0.274
cn_embedded_under_verb: 0.204
cn_disjunction: 0.18
cn_adverb: 0.05
ce_embedded_under_since: 0.899
ce_after_since_clause: 0.771
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5518
subsequence: 0.6616
constituent: 0.4648

Entailed accuracy: 0.5594

Heuristic non-entailed results:
lexical_overlap: 0.4486
subsequence: 0.5858
constituent: 0.617

Non-Entailed accuracy: 0.5504666666666667

Overall accuracy: 0.5549333333333333

Subcase results:
ln_subject/object_swap: 0.425
ln_preposition: 0.563
ln_relative_clause: 0.572
ln_passive: 0.369
ln_conjunction: 0.314
le_relative_clause: 0.501
le_around_prepositional_phrase: 0.537
le_around_relative_clause: 0.575
le_conjunction: 0.717
le_passive: 0.429
sn_NP/S: 0.208
sn_PP_on_subject: 0.575
sn_relative_clause_on_subject: 0.61
sn_past_participle: 0.742
sn_NP/Z: 0.794
se_conjunction: 0.634
se_adjective: 0.755
se_understood_object: 0.582
se_relative_clause_on_obj: 0.691
se_PP_on_obj: 0.646
cn_embedded_under_if: 0.666
cn_after_if_clause: 0.787
cn_embedded_under_verb: 0.719
cn_disjunction: 0.571
cn_adverb: 0.342
ce_embedded_under_since: 0.454
ce_after_since_clause: 0.257
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:01<00:58,  1.97it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:57,  2.01it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.04it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.06it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7016
subsequence: 0.769
constituent: 0.5808

Entailed accuracy: 0.6838

Heuristic non-entailed results:
lexical_overlap: 0.307
subsequence: 0.394
constituent: 0.3826

Non-Entailed accuracy: 0.3612

Overall accuracy: 0.5225

Subcase results:
ln_subject/object_swap: 0.258
ln_preposition: 0.364
ln_relative_clause: 0.398
ln_passive: 0.244
ln_conjunction: 0.271
le_relative_clause: 0.636
le_around_prepositional_phrase: 0.707
le_around_relative_clause: 0.762
le_conjunction: 0.769
le_passive: 0.634
sn_NP/S: 0.201
sn_PP_on_subject: 0.392
sn_relative_clause_on_subject: 0.428
sn_past_participle: 0.363
sn_NP/Z: 0.586
se_conjunction: 0.718
se_adjective: 0.874
se_understood_object: 0.7
se_relative_clause_on_obj: 0.759
se_PP_on_obj: 0.794
cn_embedded_under_if: 0.436
cn_after_if_clause: 0.548
cn_embedded_under_verb: 0.443
cn_disjunction: 0.357
cn_adverb: 0.129
ce_embedded_under_since: 0.591
ce_after_since_clause: 0.434
ce_embedded_under_verb: 0.537
ce_con






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8576
subsequence: 0.9294
constituent: 0.8126

Entailed accuracy: 0.8665333333333334

Heuristic non-entailed results:
lexical_overlap: 0.1618
subsequence: 0.1824
constituent: 0.1986

Non-Entailed accuracy: 0.18093333333333333

Overall accuracy: 0.5237333333333334

Subcase results:
ln_subject/object_swap: 0.129
ln_preposition: 0.211
ln_relative_clause: 0.266
ln_passive: 0.065
ln_conjunction: 0.138
le_relative_clause: 0.826
le_around_prepositional_phrase: 0.834
le_around_relative_clause: 0.869
le_conjunction: 0.919
le_passive: 0.84
sn_NP/S: 0.067
sn_PP_on_subject: 0.196
sn_relative_clause_on_subject: 0.261
sn_past_participle: 0.249
sn_NP/Z: 0.139
se_conjunction: 0.905
se_adjective: 0.952
se_understood_object: 0.906
se_relative_clause_on_obj: 0.936
se_PP_on_obj: 0.948
cn_embedded_under_if: 0.161
cn_after_if_clause: 0.369
cn_embedded_under_verb: 0.175
cn_disjunction: 0.256
cn_adverb: 0.032
ce_embedded_under_since: 0.894
ce_after_since_clause: 0






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8098
subsequence: 0.8868
constituent: 0.6836

Entailed accuracy: 0.7934

Heuristic non-entailed results:
lexical_overlap: 0.1726
subsequence: 0.3174
constituent: 0.354

Non-Entailed accuracy: 0.2813333333333333

Overall accuracy: 0.5373666666666667

Subcase results:
ln_subject/object_swap: 0.141
ln_preposition: 0.218
ln_relative_clause: 0.289
ln_passive: 0.098
ln_conjunction: 0.117
le_relative_clause: 0.697
le_around_prepositional_phrase: 0.823
le_around_relative_clause: 0.813
le_conjunction: 0.908
le_passive: 0.808
sn_NP/S: 0.238
sn_PP_on_subject: 0.253
sn_relative_clause_on_subject: 0.348
sn_past_participle: 0.473
sn_NP/Z: 0.275
se_conjunction: 0.883
se_adjective: 0.941
se_understood_object: 0.861
se_relative_clause_on_obj: 0.828
se_PP_on_obj: 0.921
cn_embedded_under_if: 0.418
cn_after_if_clause: 0.515
cn_embedded_under_verb: 0.427
cn_disjunction: 0.265
cn_adverb: 0.145
ce_embedded_under_since: 0.676
ce_after_since_clause: 0.549
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:59,  1.96it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.01it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8022
subsequence: 0.896
constituent: 0.73

Entailed accuracy: 0.8094

Heuristic non-entailed results:
lexical_overlap: 0.223
subsequence: 0.1962
constituent: 0.291

Non-Entailed accuracy: 0.23673333333333332

Overall accuracy: 0.5230666666666667

Subcase results:
ln_subject/object_swap: 0.143
ln_preposition: 0.271
ln_relative_clause: 0.294
ln_passive: 0.236
ln_conjunction: 0.171
le_relative_clause: 0.758
le_around_prepositional_phrase: 0.819
le_around_relative_clause: 0.855
le_conjunction: 0.866
le_passive: 0.713
sn_NP/S: 0.119
sn_PP_on_subject: 0.165
sn_relative_clause_on_subject: 0.201
sn_past_participle: 0.347
sn_NP/Z: 0.149
se_conjunction: 0.886
se_adjective: 0.937
se_understood_object: 0.906
se_relative_clause_on_obj: 0.887
se_PP_on_obj: 0.864
cn_embedded_under_if: 0.278
cn_after_if_clause: 0.487
cn_embedded_under_verb: 0.34
cn_disjunction: 0.282
cn_adverb: 0.068
ce_embedded_under_since: 0.76
ce_after_since_clause: 0.546
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  2.00it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:55,  2.03it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.06it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8448
subsequence: 0.9246
constituent: 0.8266

Entailed accuracy: 0.8653333333333333

Heuristic non-entailed results:
lexical_overlap: 0.1324
subsequence: 0.2186
constituent: 0.2084

Non-Entailed accuracy: 0.18646666666666667

Overall accuracy: 0.5259

Subcase results:
ln_subject/object_swap: 0.077
ln_preposition: 0.139
ln_relative_clause: 0.137
ln_passive: 0.242
ln_conjunction: 0.067
le_relative_clause: 0.843
le_around_prepositional_phrase: 0.841
le_around_relative_clause: 0.867
le_conjunction: 0.92
le_passive: 0.753
sn_NP/S: 0.158
sn_PP_on_subject: 0.166
sn_relative_clause_on_subject: 0.212
sn_past_participle: 0.417
sn_NP/Z: 0.14
se_conjunction: 0.938
se_adjective: 0.926
se_understood_object: 0.867
se_relative_clause_on_obj: 0.947
se_PP_on_obj: 0.945
cn_embedded_under_if: 0.115
cn_after_if_clause: 0.22
cn_embedded_under_verb: 0.396
cn_disjunction: 0.196
cn_adverb: 0.115
ce_embedded_under_since: 0.927
ce_after_since_clause: 0.77
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8502
subsequence: 0.862
constituent: 0.8708

Entailed accuracy: 0.861

Heuristic non-entailed results:
lexical_overlap: 0.192
subsequence: 0.2414
constituent: 0.1618

Non-Entailed accuracy: 0.1984

Overall accuracy: 0.5297

Subcase results:
ln_subject/object_swap: 0.179
ln_preposition: 0.239
ln_relative_clause: 0.203
ln_passive: 0.109
ln_conjunction: 0.23
le_relative_clause: 0.849
le_around_prepositional_phrase: 0.805
le_around_relative_clause: 0.881
le_conjunction: 0.764
le_passive: 0.952
sn_NP/S: 0.048
sn_PP_on_subject: 0.19
sn_relative_clause_on_subject: 0.161
sn_past_participle: 0.212
sn_NP/Z: 0.596
se_conjunction: 0.801
se_adjective: 0.989
se_understood_object: 0.946
se_relative_clause_on_obj: 0.822
se_PP_on_obj: 0.752
cn_embedded_under_if: 0.417
cn_after_if_clause: 0.039
cn_embedded_under_verb: 0.203
cn_disjunction: 0.126
cn_adverb: 0.024
ce_embedded_under_since: 0.698
ce_after_since_clause: 0.937
ce_embedded_under_verb: 0.877
ce_con






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9682
subsequence: 0.9698
constituent: 0.952

Entailed accuracy: 0.9633333333333334

Heuristic non-entailed results:
lexical_overlap: 0.0602
subsequence: 0.113
constituent: 0.045

Non-Entailed accuracy: 0.07273333333333333

Overall accuracy: 0.5180333333333333

Subcase results:
ln_subject/object_swap: 0.012
ln_preposition: 0.075
ln_relative_clause: 0.109
ln_passive: 0.017
ln_conjunction: 0.088
le_relative_clause: 0.908
le_around_prepositional_phrase: 0.983
le_around_relative_clause: 0.988
le_conjunction: 0.973
le_passive: 0.989
sn_NP/S: 0.056
sn_PP_on_subject: 0.029
sn_relative_clause_on_subject: 0.022
sn_past_participle: 0.134
sn_NP/Z: 0.324
se_conjunction: 0.975
se_adjective: 0.999
se_understood_object: 0.993
se_relative_clause_on_obj: 0.931
se_PP_on_obj: 0.951
cn_embedded_under_if: 0.173
cn_after_if_clause: 0.002
cn_embedded_under_verb: 0.006
cn_disjunction: 0.042
cn_adverb: 0.002
ce_embedded_under_since: 0.828
ce_after_since_clause: 0.9






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.1278
subsequence: 0.2234
constituent: 0.242

Entailed accuracy: 0.19773333333333334

Heuristic non-entailed results:
lexical_overlap: 0.8968
subsequence: 0.8542
constituent: 0.7564

Non-Entailed accuracy: 0.8358

Overall accuracy: 0.5167666666666667

Subcase results:
ln_subject/object_swap: 0.898
ln_preposition: 0.942
ln_relative_clause: 0.891
ln_passive: 0.783
ln_conjunction: 0.97
le_relative_clause: 0.166
le_around_prepositional_phrase: 0.085
le_around_relative_clause: 0.131
le_conjunction: 0.04
le_passive: 0.217
sn_NP/S: 0.822
sn_PP_on_subject: 0.855
sn_relative_clause_on_subject: 0.802
sn_past_participle: 0.802
sn_NP/Z: 0.99
se_conjunction: 0.099
se_adjective: 0.579
se_understood_object: 0.222
se_relative_clause_on_obj: 0.131
se_PP_on_obj: 0.086
cn_embedded_under_if: 0.928
cn_after_if_clause: 0.725
cn_embedded_under_verb: 0.729
cn_disjunction: 0.858
cn_adverb: 0.542
ce_embedded_under_since: 0.094
ce_after_since_clause: 0.261
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:59,  1.96it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.01it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.921
subsequence: 0.9466
constituent: 0.905

Entailed accuracy: 0.9242

Heuristic non-entailed results:
lexical_overlap: 0.1278
subsequence: 0.1638
constituent: 0.1142

Non-Entailed accuracy: 0.13526666666666667

Overall accuracy: 0.5297333333333333

Subcase results:
ln_subject/object_swap: 0.024
ln_preposition: 0.197
ln_relative_clause: 0.196
ln_passive: 0.062
ln_conjunction: 0.16
le_relative_clause: 0.85
le_around_prepositional_phrase: 0.927
le_around_relative_clause: 0.956
le_conjunction: 0.923
le_passive: 0.949
sn_NP/S: 0.202
sn_PP_on_subject: 0.039
sn_relative_clause_on_subject: 0.044
sn_past_participle: 0.242
sn_NP/Z: 0.292
se_conjunction: 0.965
se_adjective: 0.999
se_understood_object: 0.97
se_relative_clause_on_obj: 0.933
se_PP_on_obj: 0.866
cn_embedded_under_if: 0.399
cn_after_if_clause: 0.007
cn_embedded_under_verb: 0.047
cn_disjunction: 0.116
cn_adverb: 0.002
ce_embedded_under_since: 0.667
ce_after_since_clause: 0.992
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0936
subsequence: 0.2152
constituent: 0.2186

Entailed accuracy: 0.1758

Heuristic non-entailed results:
lexical_overlap: 0.917
subsequence: 0.8922
constituent: 0.7986

Non-Entailed accuracy: 0.8692666666666666

Overall accuracy: 0.5225333333333333

Subcase results:
ln_subject/object_swap: 0.883
ln_preposition: 0.949
ln_relative_clause: 0.92
ln_passive: 0.878
ln_conjunction: 0.955
le_relative_clause: 0.106
le_around_prepositional_phrase: 0.079
le_around_relative_clause: 0.092
le_conjunction: 0.083
le_passive: 0.108
sn_NP/S: 0.848
sn_PP_on_subject: 0.885
sn_relative_clause_on_subject: 0.886
sn_past_participle: 0.888
sn_NP/Z: 0.954
se_conjunction: 0.156
se_adjective: 0.439
se_understood_object: 0.248
se_relative_clause_on_obj: 0.139
se_PP_on_obj: 0.094
cn_embedded_under_if: 0.921
cn_after_if_clause: 0.746
cn_embedded_under_verb: 0.86
cn_disjunction: 0.865
cn_adverb: 0.601
ce_embedded_under_since: 0.162
ce_after_since_clause: 0.224
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5832
subsequence: 0.7176
constituent: 0.698

Entailed accuracy: 0.6662666666666667

Heuristic non-entailed results:
lexical_overlap: 0.341
subsequence: 0.3962
constituent: 0.278

Non-Entailed accuracy: 0.3384

Overall accuracy: 0.5023333333333333

Subcase results:
ln_subject/object_swap: 0.256
ln_preposition: 0.399
ln_relative_clause: 0.38
ln_passive: 0.208
ln_conjunction: 0.462
le_relative_clause: 0.642
le_around_prepositional_phrase: 0.473
le_around_relative_clause: 0.483
le_conjunction: 0.487
le_passive: 0.831
sn_NP/S: 0.449
sn_PP_on_subject: 0.394
sn_relative_clause_on_subject: 0.41
sn_past_participle: 0.218
sn_NP/Z: 0.51
se_conjunction: 0.582
se_adjective: 0.906
se_understood_object: 0.885
se_relative_clause_on_obj: 0.615
se_PP_on_obj: 0.6
cn_embedded_under_if: 0.339
cn_after_if_clause: 0.22
cn_embedded_under_verb: 0.293
cn_disjunction: 0.354
cn_adverb: 0.184
ce_embedded_under_since: 0.654
ce_after_since_clause: 0.695
ce_embedded_unde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.949
subsequence: 0.9502
constituent: 0.888

Entailed accuracy: 0.9290666666666667

Heuristic non-entailed results:
lexical_overlap: 0.0718
subsequence: 0.1352
constituent: 0.119

Non-Entailed accuracy: 0.10866666666666666

Overall accuracy: 0.5188666666666667

Subcase results:
ln_subject/object_swap: 0.019
ln_preposition: 0.094
ln_relative_clause: 0.118
ln_passive: 0.025
ln_conjunction: 0.103
le_relative_clause: 0.88
le_around_prepositional_phrase: 0.969
le_around_relative_clause: 0.972
le_conjunction: 0.937
le_passive: 0.987
sn_NP/S: 0.161
sn_PP_on_subject: 0.023
sn_relative_clause_on_subject: 0.021
sn_past_participle: 0.22
sn_NP/Z: 0.251
se_conjunction: 0.932
se_adjective: 1.0
se_understood_object: 0.95
se_relative_clause_on_obj: 0.912
se_PP_on_obj: 0.957
cn_embedded_under_if: 0.496
cn_after_if_clause: 0.014
cn_embedded_under_verb: 0.027
cn_disjunction: 0.051
cn_adverb: 0.007
ce_embedded_under_since: 0.602
ce_after_since_clause: 0.977
ce






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:57,  1.98it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:55,  2.02it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.05it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3282
subsequence: 0.4448
constituent: 0.3866

Entailed accuracy: 0.38653333333333334

Heuristic non-entailed results:
lexical_overlap: 0.7204
subsequence: 0.7148
constituent: 0.635

Non-Entailed accuracy: 0.6900666666666667

Overall accuracy: 0.5383

Subcase results:
ln_subject/object_swap: 0.691
ln_preposition: 0.778
ln_relative_clause: 0.719
ln_passive: 0.606
ln_conjunction: 0.808
le_relative_clause: 0.316
le_around_prepositional_phrase: 0.311
le_around_relative_clause: 0.362
le_conjunction: 0.252
le_passive: 0.4
sn_NP/S: 0.659
sn_PP_on_subject: 0.645
sn_relative_clause_on_subject: 0.636
sn_past_participle: 0.724
sn_NP/Z: 0.91
se_conjunction: 0.336
se_adjective: 0.798
se_understood_object: 0.458
se_relative_clause_on_obj: 0.313
se_PP_on_obj: 0.319
cn_embedded_under_if: 0.904
cn_after_if_clause: 0.591
cn_embedded_under_verb: 0.701
cn_disjunction: 0.656
cn_adverb: 0.323
ce_embedded_under_since: 0.166
ce_after_since_clause: 0.393
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  2.00it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6436
subsequence: 0.6916
constituent: 0.565

Entailed accuracy: 0.6334

Heuristic non-entailed results:
lexical_overlap: 0.3432
subsequence: 0.4444
constituent: 0.3642

Non-Entailed accuracy: 0.38393333333333335

Overall accuracy: 0.5086666666666667

Subcase results:
ln_subject/object_swap: 0.231
ln_preposition: 0.434
ln_relative_clause: 0.438
ln_passive: 0.216
ln_conjunction: 0.397
le_relative_clause: 0.566
le_around_prepositional_phrase: 0.648
le_around_relative_clause: 0.62
le_conjunction: 0.613
le_passive: 0.771
sn_NP/S: 0.501
sn_PP_on_subject: 0.325
sn_relative_clause_on_subject: 0.35
sn_past_participle: 0.347
sn_NP/Z: 0.699
se_conjunction: 0.59
se_adjective: 0.949
se_understood_object: 0.819
se_relative_clause_on_obj: 0.529
se_PP_on_obj: 0.571
cn_embedded_under_if: 0.721
cn_after_if_clause: 0.355
cn_embedded_under_verb: 0.25
cn_disjunction: 0.376
cn_adverb: 0.119
ce_embedded_under_since: 0.281
ce_after_since_clause: 0.623
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5516
subsequence: 0.7444
constituent: 0.5514

Entailed accuracy: 0.6158

Heuristic non-entailed results:
lexical_overlap: 0.426
subsequence: 0.5302
constituent: 0.459

Non-Entailed accuracy: 0.47173333333333334

Overall accuracy: 0.5437666666666666

Subcase results:
ln_subject/object_swap: 0.243
ln_preposition: 0.552
ln_relative_clause: 0.55
ln_passive: 0.222
ln_conjunction: 0.563
le_relative_clause: 0.568
le_around_prepositional_phrase: 0.487
le_around_relative_clause: 0.481
le_conjunction: 0.474
le_passive: 0.748
sn_NP/S: 0.535
sn_PP_on_subject: 0.496
sn_relative_clause_on_subject: 0.554
sn_past_participle: 0.457
sn_NP/Z: 0.609
se_conjunction: 0.602
se_adjective: 0.931
se_understood_object: 0.888
se_relative_clause_on_obj: 0.64
se_PP_on_obj: 0.661
cn_embedded_under_if: 0.708
cn_after_if_clause: 0.343
cn_embedded_under_verb: 0.493
cn_disjunction: 0.545
cn_adverb: 0.206
ce_embedded_under_since: 0.369
ce_after_since_clause: 0.668
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9662
subsequence: 0.9748
constituent: 0.9524

Entailed accuracy: 0.9644666666666667

Heuristic non-entailed results:
lexical_overlap: 0.0436
subsequence: 0.0674
constituent: 0.0494

Non-Entailed accuracy: 0.05346666666666667

Overall accuracy: 0.5089666666666667

Subcase results:
ln_subject/object_swap: 0.011
ln_preposition: 0.052
ln_relative_clause: 0.07
ln_passive: 0.023
ln_conjunction: 0.062
le_relative_clause: 0.9
le_around_prepositional_phrase: 0.98
le_around_relative_clause: 0.987
le_conjunction: 0.972
le_passive: 0.992
sn_NP/S: 0.07
sn_PP_on_subject: 0.011
sn_relative_clause_on_subject: 0.015
sn_past_participle: 0.17
sn_NP/Z: 0.071
se_conjunction: 0.987
se_adjective: 1.0
se_understood_object: 0.994
se_relative_clause_on_obj: 0.971
se_PP_on_obj: 0.922
cn_embedded_under_if: 0.202
cn_after_if_clause: 0.009
cn_embedded_under_verb: 0.005
cn_disjunction: 0.029
cn_adverb: 0.002
ce_embedded_under_since: 0.844
ce_after_since_clause: 0.985
ce






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:07,  1.74it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:01<01:03,  1.83it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<01:00,  1.91it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:57,  1.97it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  2.01it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.04it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.06it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6458
subsequence: 0.7466
constituent: 0.6982

Entailed accuracy: 0.6968666666666666

Heuristic non-entailed results:
lexical_overlap: 0.4422
subsequence: 0.414
constituent: 0.3302

Non-Entailed accuracy: 0.3954666666666667

Overall accuracy: 0.5461666666666667

Subcase results:
ln_subject/object_swap: 0.394
ln_preposition: 0.562
ln_relative_clause: 0.466
ln_passive: 0.321
ln_conjunction: 0.468
le_relative_clause: 0.602
le_around_prepositional_phrase: 0.628
le_around_relative_clause: 0.708
le_conjunction: 0.609
le_passive: 0.682
sn_NP/S: 0.313
sn_PP_on_subject: 0.372
sn_relative_clause_on_subject: 0.331
sn_past_participle: 0.486
sn_NP/Z: 0.568
se_conjunction: 0.732
se_adjective: 0.943
se_understood_object: 0.835
se_relative_clause_on_obj: 0.724
se_PP_on_obj: 0.499
cn_embedded_under_if: 0.618
cn_after_if_clause: 0.396
cn_embedded_under_verb: 0.284
cn_disjunction: 0.293
cn_adverb: 0.06
ce_embedded_under_since: 0.459
ce_after_since_clause: 0.6






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:54,  2.13it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.13it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.12it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.835
subsequence: 0.8292
constituent: 0.7998

Entailed accuracy: 0.8213333333333334

Heuristic non-entailed results:
lexical_overlap: 0.176
subsequence: 0.2474
constituent: 0.156

Non-Entailed accuracy: 0.19313333333333332

Overall accuracy: 0.5072333333333333

Subcase results:
ln_subject/object_swap: 0.11
ln_preposition: 0.207
ln_relative_clause: 0.227
ln_passive: 0.118
ln_conjunction: 0.218
le_relative_clause: 0.74
le_around_prepositional_phrase: 0.857
le_around_relative_clause: 0.847
le_conjunction: 0.831
le_passive: 0.9
sn_NP/S: 0.269
sn_PP_on_subject: 0.142
sn_relative_clause_on_subject: 0.143
sn_past_participle: 0.261
sn_NP/Z: 0.422
se_conjunction: 0.82
se_adjective: 0.954
se_understood_object: 0.887
se_relative_clause_on_obj: 0.769
se_PP_on_obj: 0.716
cn_embedded_under_if: 0.407
cn_after_if_clause: 0.138
cn_embedded_under_verb: 0.056
cn_disjunction: 0.151
cn_adverb: 0.028
ce_embedded_under_since: 0.591
ce_after_since_clause: 0.853
ce






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7922
subsequence: 0.8976
constituent: 0.8354

Entailed accuracy: 0.8417333333333333

Heuristic non-entailed results:
lexical_overlap: 0.2342
subsequence: 0.1886
constituent: 0.1672

Non-Entailed accuracy: 0.19666666666666666

Overall accuracy: 0.5192

Subcase results:
ln_subject/object_swap: 0.109
ln_preposition: 0.308
ln_relative_clause: 0.292
ln_passive: 0.127
ln_conjunction: 0.335
le_relative_clause: 0.784
le_around_prepositional_phrase: 0.763
le_around_relative_clause: 0.785
le_conjunction: 0.72
le_passive: 0.909
sn_NP/S: 0.18
sn_PP_on_subject: 0.18
sn_relative_clause_on_subject: 0.224
sn_past_participle: 0.221
sn_NP/Z: 0.138
se_conjunction: 0.843
se_adjective: 0.959
se_understood_object: 0.986
se_relative_clause_on_obj: 0.914
se_PP_on_obj: 0.786
cn_embedded_under_if: 0.259
cn_after_if_clause: 0.189
cn_embedded_under_verb: 0.099
cn_disjunction: 0.24
cn_adverb: 0.049
ce_embedded_under_since: 0.804
ce_after_since_clause: 0.838
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.721
subsequence: 0.7786
constituent: 0.695

Entailed accuracy: 0.7315333333333334

Heuristic non-entailed results:
lexical_overlap: 0.2722
subsequence: 0.3492
constituent: 0.2968

Non-Entailed accuracy: 0.30606666666666665

Overall accuracy: 0.5188

Subcase results:
ln_subject/object_swap: 0.158
ln_preposition: 0.336
ln_relative_clause: 0.341
ln_passive: 0.144
ln_conjunction: 0.382
le_relative_clause: 0.651
le_around_prepositional_phrase: 0.711
le_around_relative_clause: 0.701
le_conjunction: 0.624
le_passive: 0.918
sn_NP/S: 0.422
sn_PP_on_subject: 0.211
sn_relative_clause_on_subject: 0.239
sn_past_participle: 0.426
sn_NP/Z: 0.448
se_conjunction: 0.694
se_adjective: 0.96
se_understood_object: 0.889
se_relative_clause_on_obj: 0.675
se_PP_on_obj: 0.675
cn_embedded_under_if: 0.678
cn_after_if_clause: 0.269
cn_embedded_under_verb: 0.21
cn_disjunction: 0.277
cn_adverb: 0.05
ce_embedded_under_since: 0.392
ce_after_since_clause: 0.754
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:57,  2.00it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:55,  2.03it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.06it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9842
subsequence: 0.9924
constituent: 0.9824

Entailed accuracy: 0.9863333333333333

Heuristic non-entailed results:
lexical_overlap: 0.011
subsequence: 0.0498
constituent: 0.0262

Non-Entailed accuracy: 0.029

Overall accuracy: 0.5076666666666667

Subcase results:
ln_subject/object_swap: 0.002
ln_preposition: 0.013
ln_relative_clause: 0.015
ln_passive: 0.012
ln_conjunction: 0.013
le_relative_clause: 0.944
le_around_prepositional_phrase: 0.99
le_around_relative_clause: 0.993
le_conjunction: 0.998
le_passive: 0.996
sn_NP/S: 0.037
sn_PP_on_subject: 0.004
sn_relative_clause_on_subject: 0.004
sn_past_participle: 0.147
sn_NP/Z: 0.057
se_conjunction: 0.995
se_adjective: 1.0
se_understood_object: 0.999
se_relative_clause_on_obj: 0.987
se_PP_on_obj: 0.981
cn_embedded_under_if: 0.097
cn_after_if_clause: 0.001
cn_embedded_under_verb: 0.003
cn_disjunction: 0.03
cn_adverb: 0.0
ce_embedded_under_since: 0.943
ce_after_since_clause: 0.994
ce_embedded_und






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:00,  1.94it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:58,  1.99it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.773
subsequence: 0.8372
constituent: 0.7432

Entailed accuracy: 0.7844666666666666

Heuristic non-entailed results:
lexical_overlap: 0.2494
subsequence: 0.3406
constituent: 0.2748

Non-Entailed accuracy: 0.28826666666666667

Overall accuracy: 0.5363666666666667

Subcase results:
ln_subject/object_swap: 0.22
ln_preposition: 0.278
ln_relative_clause: 0.243
ln_passive: 0.235
ln_conjunction: 0.271
le_relative_clause: 0.704
le_around_prepositional_phrase: 0.77
le_around_relative_clause: 0.812
le_conjunction: 0.772
le_passive: 0.807
sn_NP/S: 0.226
sn_PP_on_subject: 0.295
sn_relative_clause_on_subject: 0.253
sn_past_participle: 0.428
sn_NP/Z: 0.501
se_conjunction: 0.794
se_adjective: 0.922
se_understood_object: 0.913
se_relative_clause_on_obj: 0.82
se_PP_on_obj: 0.737
cn_embedded_under_if: 0.395
cn_after_if_clause: 0.279
cn_embedded_under_verb: 0.243
cn_disjunction: 0.4
cn_adverb: 0.057
ce_embedded_under_since: 0.64
ce_after_since_clause: 0.674
c






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.817
subsequence: 0.8542
constituent: 0.7788

Entailed accuracy: 0.8166666666666667

Heuristic non-entailed results:
lexical_overlap: 0.1664
subsequence: 0.25
constituent: 0.1844

Non-Entailed accuracy: 0.20026666666666668

Overall accuracy: 0.5084666666666666

Subcase results:
ln_subject/object_swap: 0.138
ln_preposition: 0.152
ln_relative_clause: 0.216
ln_passive: 0.161
ln_conjunction: 0.165
le_relative_clause: 0.746
le_around_prepositional_phrase: 0.816
le_around_relative_clause: 0.803
le_conjunction: 0.827
le_passive: 0.893
sn_NP/S: 0.259
sn_PP_on_subject: 0.195
sn_relative_clause_on_subject: 0.199
sn_past_participle: 0.22
sn_NP/Z: 0.377
se_conjunction: 0.819
se_adjective: 0.936
se_understood_object: 0.928
se_relative_clause_on_obj: 0.777
se_PP_on_obj: 0.811
cn_embedded_under_if: 0.248
cn_after_if_clause: 0.167
cn_embedded_under_verb: 0.153
cn_disjunction: 0.268
cn_adverb: 0.086
ce_embedded_under_since: 0.691
ce_after_since_clause: 0.77






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9048
subsequence: 0.967
constituent: 0.8998

Entailed accuracy: 0.9238666666666666

Heuristic non-entailed results:
lexical_overlap: 0.0854
subsequence: 0.1
constituent: 0.092

Non-Entailed accuracy: 0.09246666666666667

Overall accuracy: 0.5081666666666667

Subcase results:
ln_subject/object_swap: 0.04
ln_preposition: 0.096
ln_relative_clause: 0.106
ln_passive: 0.101
ln_conjunction: 0.084
le_relative_clause: 0.897
le_around_prepositional_phrase: 0.886
le_around_relative_clause: 0.893
le_conjunction: 0.898
le_passive: 0.95
sn_NP/S: 0.106
sn_PP_on_subject: 0.109
sn_relative_clause_on_subject: 0.125
sn_past_participle: 0.102
sn_NP/Z: 0.058
se_conjunction: 0.949
se_adjective: 0.984
se_understood_object: 0.993
se_relative_clause_on_obj: 0.975
se_PP_on_obj: 0.934
cn_embedded_under_if: 0.087
cn_after_if_clause: 0.091
cn_embedded_under_verb: 0.087
cn_disjunction: 0.186
cn_adverb: 0.009
ce_embedded_under_since: 0.925
ce_after_since_clause: 0.871
c






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:56,  2.00it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:54,  2.03it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:53,  2.06it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.07it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.08it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:51,  2.09it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7948
subsequence: 0.8578
constituent: 0.7446

Entailed accuracy: 0.7990666666666667

Heuristic non-entailed results:
lexical_overlap: 0.1704
subsequence: 0.2786
constituent: 0.262

Non-Entailed accuracy: 0.237

Overall accuracy: 0.5180333333333333

Subcase results:
ln_subject/object_swap: 0.085
ln_preposition: 0.186
ln_relative_clause: 0.245
ln_passive: 0.147
ln_conjunction: 0.189
le_relative_clause: 0.709
le_around_prepositional_phrase: 0.797
le_around_relative_clause: 0.77
le_conjunction: 0.777
le_passive: 0.921
sn_NP/S: 0.349
sn_PP_on_subject: 0.174
sn_relative_clause_on_subject: 0.195
sn_past_participle: 0.35
sn_NP/Z: 0.325
se_conjunction: 0.84
se_adjective: 0.984
se_understood_object: 0.925
se_relative_clause_on_obj: 0.724
se_PP_on_obj: 0.816
cn_embedded_under_if: 0.486
cn_after_if_clause: 0.27
cn_embedded_under_verb: 0.202
cn_disjunction: 0.274
cn_adverb: 0.078
ce_embedded_under_since: 0.567
ce_after_since_clause: 0.754
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8916
subsequence: 0.9024
constituent: 0.87

Entailed accuracy: 0.888

Heuristic non-entailed results:
lexical_overlap: 0.1174
subsequence: 0.192
constituent: 0.1384

Non-Entailed accuracy: 0.14926666666666666

Overall accuracy: 0.5186333333333333

Subcase results:
ln_subject/object_swap: 0.065
ln_preposition: 0.177
ln_relative_clause: 0.135
ln_passive: 0.067
ln_conjunction: 0.143
le_relative_clause: 0.83
le_around_prepositional_phrase: 0.871
le_around_relative_clause: 0.901
le_conjunction: 0.888
le_passive: 0.968
sn_NP/S: 0.229
sn_PP_on_subject: 0.124
sn_relative_clause_on_subject: 0.103
sn_past_participle: 0.369
sn_NP/Z: 0.135
se_conjunction: 0.908
se_adjective: 0.98
se_understood_object: 0.94
se_relative_clause_on_obj: 0.912
se_PP_on_obj: 0.772
cn_embedded_under_if: 0.297
cn_after_if_clause: 0.132
cn_embedded_under_verb: 0.077
cn_disjunction: 0.164
cn_adverb: 0.022
ce_embedded_under_since: 0.803
ce_after_since_clause: 0.841
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  2.01it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9528
subsequence: 0.9854
constituent: 0.9784

Entailed accuracy: 0.9722

Heuristic non-entailed results:
lexical_overlap: 0.0518
subsequence: 0.0678
constituent: 0.0302

Non-Entailed accuracy: 0.049933333333333337

Overall accuracy: 0.5110666666666667

Subcase results:
ln_subject/object_swap: 0.026
ln_preposition: 0.061
ln_relative_clause: 0.065
ln_passive: 0.074
ln_conjunction: 0.033
le_relative_clause: 0.926
le_around_prepositional_phrase: 0.948
le_around_relative_clause: 0.966
le_conjunction: 0.964
le_passive: 0.96
sn_NP/S: 0.017
sn_PP_on_subject: 0.04
sn_relative_clause_on_subject: 0.036
sn_past_participle: 0.177
sn_NP/Z: 0.069
se_conjunction: 0.986
se_adjective: 0.992
se_understood_object: 0.994
se_relative_clause_on_obj: 0.978
se_PP_on_obj: 0.977
cn_embedded_under_if: 0.061
cn_after_if_clause: 0.028
cn_embedded_under_verb: 0.02
cn_disjunction: 0.027
cn_adverb: 0.015
ce_embedded_under_since: 0.963
ce_after_since_clause: 0.968
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6178
subsequence: 0.7908
constituent: 0.6792

Entailed accuracy: 0.6959333333333333

Heuristic non-entailed results:
lexical_overlap: 0.37
subsequence: 0.4384
constituent: 0.3688

Non-Entailed accuracy: 0.3924

Overall accuracy: 0.5441666666666667

Subcase results:
ln_subject/object_swap: 0.266
ln_preposition: 0.444
ln_relative_clause: 0.448
ln_passive: 0.397
ln_conjunction: 0.295
le_relative_clause: 0.586
le_around_prepositional_phrase: 0.576
le_around_relative_clause: 0.626
le_conjunction: 0.703
le_passive: 0.598
sn_NP/S: 0.176
sn_PP_on_subject: 0.408
sn_relative_clause_on_subject: 0.406
sn_past_participle: 0.549
sn_NP/Z: 0.653
se_conjunction: 0.745
se_adjective: 0.895
se_understood_object: 0.892
se_relative_clause_on_obj: 0.752
se_PP_on_obj: 0.67
cn_embedded_under_if: 0.492
cn_after_if_clause: 0.457
cn_embedded_under_verb: 0.377
cn_disjunction: 0.376
cn_adverb: 0.142
ce_embedded_under_since: 0.652
ce_after_since_clause: 0.569
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:53,  2.02it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:52,  2.05it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:51,  2.07it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8228
subsequence: 0.8992
constituent: 0.86

Entailed accuracy: 0.8606666666666667

Heuristic non-entailed results:
lexical_overlap: 0.1628
subsequence: 0.181
constituent: 0.1048

Non-Entailed accuracy: 0.14953333333333332

Overall accuracy: 0.5051

Subcase results:
ln_subject/object_swap: 0.098
ln_preposition: 0.17
ln_relative_clause: 0.207
ln_passive: 0.181
ln_conjunction: 0.158
le_relative_clause: 0.773
le_around_prepositional_phrase: 0.797
le_around_relative_clause: 0.814
le_conjunction: 0.849
le_passive: 0.881
sn_NP/S: 0.092
sn_PP_on_subject: 0.171
sn_relative_clause_on_subject: 0.175
sn_past_participle: 0.161
sn_NP/Z: 0.306
se_conjunction: 0.873
se_adjective: 0.96
se_understood_object: 0.955
se_relative_clause_on_obj: 0.85
se_PP_on_obj: 0.858
cn_embedded_under_if: 0.156
cn_after_if_clause: 0.104
cn_embedded_under_verb: 0.086
cn_disjunction: 0.121
cn_adverb: 0.057
ce_embedded_under_since: 0.832
ce_after_since_clause: 0.854
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8856
subsequence: 0.971
constituent: 0.9466

Entailed accuracy: 0.9344

Heuristic non-entailed results:
lexical_overlap: 0.1038
subsequence: 0.1026
constituent: 0.0566

Non-Entailed accuracy: 0.08766666666666667

Overall accuracy: 0.5110333333333333

Subcase results:
ln_subject/object_swap: 0.028
ln_preposition: 0.128
ln_relative_clause: 0.154
ln_passive: 0.093
ln_conjunction: 0.116
le_relative_clause: 0.885
le_around_prepositional_phrase: 0.854
le_around_relative_clause: 0.859
le_conjunction: 0.884
le_passive: 0.946
sn_NP/S: 0.05
sn_PP_on_subject: 0.099
sn_relative_clause_on_subject: 0.129
sn_past_participle: 0.145
sn_NP/Z: 0.09
se_conjunction: 0.955
se_adjective: 0.99
se_understood_object: 0.99
se_relative_clause_on_obj: 0.967
se_PP_on_obj: 0.953
cn_embedded_under_if: 0.047
cn_after_if_clause: 0.061
cn_embedded_under_verb: 0.06
cn_disjunction: 0.101
cn_adverb: 0.014
ce_embedded_under_since: 0.955
ce_after_since_clause: 0.921
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.12it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.12it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8236
subsequence: 0.9068
constituent: 0.8412

Entailed accuracy: 0.8572

Heuristic non-entailed results:
lexical_overlap: 0.1554
subsequence: 0.2456
constituent: 0.1572

Non-Entailed accuracy: 0.18606666666666666

Overall accuracy: 0.5216333333333333

Subcase results:
ln_subject/object_swap: 0.068
ln_preposition: 0.19
ln_relative_clause: 0.235
ln_passive: 0.124
ln_conjunction: 0.16
le_relative_clause: 0.782
le_around_prepositional_phrase: 0.793
le_around_relative_clause: 0.792
le_conjunction: 0.845
le_passive: 0.906
sn_NP/S: 0.242
sn_PP_on_subject: 0.174
sn_relative_clause_on_subject: 0.202
sn_past_participle: 0.341
sn_NP/Z: 0.269
se_conjunction: 0.89
se_adjective: 0.992
se_understood_object: 0.943
se_relative_clause_on_obj: 0.847
se_PP_on_obj: 0.862
cn_embedded_under_if: 0.275
cn_after_if_clause: 0.186
cn_embedded_under_verb: 0.129
cn_disjunction: 0.14
cn_adverb: 0.056
ce_embedded_under_since: 0.76
ce_after_since_clause: 0.823
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.865
subsequence: 0.9176
constituent: 0.8848

Entailed accuracy: 0.8891333333333333

Heuristic non-entailed results:
lexical_overlap: 0.1334
subsequence: 0.1658
constituent: 0.1286

Non-Entailed accuracy: 0.1426

Overall accuracy: 0.5158666666666667

Subcase results:
ln_subject/object_swap: 0.054
ln_preposition: 0.146
ln_relative_clause: 0.136
ln_passive: 0.207
ln_conjunction: 0.124
le_relative_clause: 0.824
le_around_prepositional_phrase: 0.862
le_around_relative_clause: 0.888
le_conjunction: 0.891
le_passive: 0.86
sn_NP/S: 0.157
sn_PP_on_subject: 0.099
sn_relative_clause_on_subject: 0.086
sn_past_participle: 0.356
sn_NP/Z: 0.131
se_conjunction: 0.925
se_adjective: 0.983
se_understood_object: 0.926
se_relative_clause_on_obj: 0.906
se_PP_on_obj: 0.848
cn_embedded_under_if: 0.242
cn_after_if_clause: 0.137
cn_embedded_under_verb: 0.082
cn_disjunction: 0.146
cn_adverb: 0.036
ce_embedded_under_since: 0.818
ce_after_since_clause: 0.869
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:55,  2.01it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:54,  2.04it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:53,  2.06it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.09it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:51,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.86
subsequence: 0.9512
constituent: 0.9152

Entailed accuracy: 0.9088

Heuristic non-entailed results:
lexical_overlap: 0.122
subsequence: 0.1536
constituent: 0.1094

Non-Entailed accuracy: 0.12833333333333333

Overall accuracy: 0.5185666666666666

Subcase results:
ln_subject/object_swap: 0.043
ln_preposition: 0.122
ln_relative_clause: 0.106
ln_passive: 0.289
ln_conjunction: 0.05
le_relative_clause: 0.865
le_around_prepositional_phrase: 0.817
le_around_relative_clause: 0.866
le_conjunction: 0.936
le_passive: 0.816
sn_NP/S: 0.119
sn_PP_on_subject: 0.124
sn_relative_clause_on_subject: 0.107
sn_past_participle: 0.331
sn_NP/Z: 0.087
se_conjunction: 0.962
se_adjective: 0.97
se_understood_object: 0.966
se_relative_clause_on_obj: 0.951
se_PP_on_obj: 0.907
cn_embedded_under_if: 0.119
cn_after_if_clause: 0.093
cn_embedded_under_verb: 0.101
cn_disjunction: 0.189
cn_adverb: 0.045
ce_embedded_under_since: 0.936
ce_after_since_clause: 0.888
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9768
subsequence: 0.9882
constituent: 0.9756

Entailed accuracy: 0.9802

Heuristic non-entailed results:
lexical_overlap: 0.0232
subsequence: 0.0668
constituent: 0.0256

Non-Entailed accuracy: 0.038533333333333336

Overall accuracy: 0.5093666666666666

Subcase results:
ln_subject/object_swap: 0.01
ln_preposition: 0.038
ln_relative_clause: 0.036
ln_passive: 0.012
ln_conjunction: 0.02
le_relative_clause: 0.953
le_around_prepositional_phrase: 0.972
le_around_relative_clause: 0.989
le_conjunction: 0.981
le_passive: 0.989
sn_NP/S: 0.012
sn_PP_on_subject: 0.037
sn_relative_clause_on_subject: 0.032
sn_past_participle: 0.177
sn_NP/Z: 0.076
se_conjunction: 0.976
se_adjective: 0.998
se_understood_object: 0.992
se_relative_clause_on_obj: 0.983
se_PP_on_obj: 0.992
cn_embedded_under_if: 0.062
cn_after_if_clause: 0.026
cn_embedded_under_verb: 0.023
cn_disjunction: 0.014
cn_adverb: 0.003
ce_embedded_under_since: 0.946
ce_after_since_clause: 0.97
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7142
subsequence: 0.7662
constituent: 0.6984

Entailed accuracy: 0.7262666666666666

Heuristic non-entailed results:
lexical_overlap: 0.326
subsequence: 0.4072
constituent: 0.3182

Non-Entailed accuracy: 0.35046666666666665

Overall accuracy: 0.5383666666666667

Subcase results:
ln_subject/object_swap: 0.267
ln_preposition: 0.45
ln_relative_clause: 0.389
ln_passive: 0.221
ln_conjunction: 0.303
le_relative_clause: 0.672
le_around_prepositional_phrase: 0.665
le_around_relative_clause: 0.729
le_conjunction: 0.711
le_passive: 0.794
sn_NP/S: 0.109
sn_PP_on_subject: 0.38
sn_relative_clause_on_subject: 0.359
sn_past_participle: 0.525
sn_NP/Z: 0.663
se_conjunction: 0.732
se_adjective: 0.924
se_understood_object: 0.76
se_relative_clause_on_obj: 0.747
se_PP_on_obj: 0.668
cn_embedded_under_if: 0.515
cn_after_if_clause: 0.398
cn_embedded_under_verb: 0.32
cn_disjunction: 0.256
cn_adverb: 0.102
ce_embedded_under_since: 0.564
ce_after_since_clause: 0.604






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8664
subsequence: 0.87
constituent: 0.8198

Entailed accuracy: 0.8520666666666666

Heuristic non-entailed results:
lexical_overlap: 0.1326
subsequence: 0.197
constituent: 0.132

Non-Entailed accuracy: 0.15386666666666668

Overall accuracy: 0.5029666666666667

Subcase results:
ln_subject/object_swap: 0.119
ln_preposition: 0.136
ln_relative_clause: 0.174
ln_passive: 0.093
ln_conjunction: 0.141
le_relative_clause: 0.803
le_around_prepositional_phrase: 0.875
le_around_relative_clause: 0.872
le_conjunction: 0.858
le_passive: 0.924
sn_NP/S: 0.098
sn_PP_on_subject: 0.174
sn_relative_clause_on_subject: 0.198
sn_past_participle: 0.173
sn_NP/Z: 0.342
se_conjunction: 0.821
se_adjective: 0.937
se_understood_object: 0.865
se_relative_clause_on_obj: 0.829
se_PP_on_obj: 0.898
cn_embedded_under_if: 0.189
cn_after_if_clause: 0.165
cn_embedded_under_verb: 0.129
cn_disjunction: 0.118
cn_adverb: 0.059
ce_embedded_under_since: 0.774
ce_after_since_clause: 0.79






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9484
subsequence: 0.9824
constituent: 0.9592

Entailed accuracy: 0.9633333333333334

Heuristic non-entailed results:
lexical_overlap: 0.0448
subsequence: 0.0674
constituent: 0.0328

Non-Entailed accuracy: 0.04833333333333333

Overall accuracy: 0.5058333333333334

Subcase results:
ln_subject/object_swap: 0.016
ln_preposition: 0.059
ln_relative_clause: 0.064
ln_passive: 0.024
ln_conjunction: 0.061
le_relative_clause: 0.948
le_around_prepositional_phrase: 0.939
le_around_relative_clause: 0.946
le_conjunction: 0.919
le_passive: 0.99
sn_NP/S: 0.023
sn_PP_on_subject: 0.05
sn_relative_clause_on_subject: 0.079
sn_past_participle: 0.119
sn_NP/Z: 0.066
se_conjunction: 0.963
se_adjective: 0.994
se_understood_object: 0.972
se_relative_clause_on_obj: 0.997
se_PP_on_obj: 0.986
cn_embedded_under_if: 0.055
cn_after_if_clause: 0.043
cn_embedded_under_verb: 0.022
cn_disjunction: 0.041
cn_adverb: 0.003
ce_embedded_under_since: 0.966
ce_after_since_clause: 0.






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.903
subsequence: 0.9216
constituent: 0.8538

Entailed accuracy: 0.8928

Heuristic non-entailed results:
lexical_overlap: 0.0904
subsequence: 0.1816
constituent: 0.1498

Non-Entailed accuracy: 0.1406

Overall accuracy: 0.5167

Subcase results:
ln_subject/object_swap: 0.054
ln_preposition: 0.107
ln_relative_clause: 0.129
ln_passive: 0.041
ln_conjunction: 0.121
le_relative_clause: 0.871
le_around_prepositional_phrase: 0.902
le_around_relative_clause: 0.88
le_conjunction: 0.88
le_passive: 0.982
sn_NP/S: 0.138
sn_PP_on_subject: 0.142
sn_relative_clause_on_subject: 0.175
sn_past_participle: 0.252
sn_NP/Z: 0.201
se_conjunction: 0.858
se_adjective: 0.977
se_understood_object: 0.954
se_relative_clause_on_obj: 0.873
se_PP_on_obj: 0.946
cn_embedded_under_if: 0.274
cn_after_if_clause: 0.189
cn_embedded_under_verb: 0.135
cn_disjunction: 0.106
cn_adverb: 0.045
ce_embedded_under_since: 0.785
ce_after_since_clause: 0.832
ce_embedded_under_verb: 0.881
ce_c






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9614
subsequence: 0.9604
constituent: 0.946

Entailed accuracy: 0.9559333333333333

Heuristic non-entailed results:
lexical_overlap: 0.0474
subsequence: 0.0802
constituent: 0.0668

Non-Entailed accuracy: 0.0648

Overall accuracy: 0.5103666666666666

Subcase results:
ln_subject/object_swap: 0.022
ln_preposition: 0.072
ln_relative_clause: 0.064
ln_passive: 0.022
ln_conjunction: 0.057
le_relative_clause: 0.935
le_around_prepositional_phrase: 0.955
le_around_relative_clause: 0.972
le_conjunction: 0.952
le_passive: 0.993
sn_NP/S: 0.046
sn_PP_on_subject: 0.047
sn_relative_clause_on_subject: 0.05
sn_past_participle: 0.204
sn_NP/Z: 0.054
se_conjunction: 0.947
se_adjective: 0.98
se_understood_object: 0.952
se_relative_clause_on_obj: 0.956
se_PP_on_obj: 0.967
cn_embedded_under_if: 0.178
cn_after_if_clause: 0.074
cn_embedded_under_verb: 0.029
cn_disjunction: 0.046
cn_adverb: 0.007
ce_embedded_under_since: 0.928
ce_after_since_clause: 0.909
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9726
subsequence: 0.9782
constituent: 0.9716

Entailed accuracy: 0.9741333333333333

Heuristic non-entailed results:
lexical_overlap: 0.022
subsequence: 0.059
constituent: 0.0446

Non-Entailed accuracy: 0.04186666666666667

Overall accuracy: 0.508

Subcase results:
ln_subject/object_swap: 0.004
ln_preposition: 0.017
ln_relative_clause: 0.015
ln_passive: 0.048
ln_conjunction: 0.026
le_relative_clause: 0.965
le_around_prepositional_phrase: 0.971
le_around_relative_clause: 0.978
le_conjunction: 0.971
le_passive: 0.978
sn_NP/S: 0.037
sn_PP_on_subject: 0.038
sn_relative_clause_on_subject: 0.032
sn_past_participle: 0.154
sn_NP/Z: 0.034
se_conjunction: 0.959
se_adjective: 0.98
se_understood_object: 0.986
se_relative_clause_on_obj: 0.987
se_PP_on_obj: 0.979
cn_embedded_under_if: 0.098
cn_after_if_clause: 0.04
cn_embedded_under_verb: 0.03
cn_disjunction: 0.049
cn_adverb: 0.006
ce_embedded_under_since: 0.985
ce_after_since_clause: 0.95
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  2.01it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.04it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.06it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9876
subsequence: 0.9954
constituent: 0.9906

Entailed accuracy: 0.9912

Heuristic non-entailed results:
lexical_overlap: 0.012
subsequence: 0.0224
constituent: 0.0208

Non-Entailed accuracy: 0.0184

Overall accuracy: 0.5048

Subcase results:
ln_subject/object_swap: 0.005
ln_preposition: 0.015
ln_relative_clause: 0.014
ln_passive: 0.018
ln_conjunction: 0.008
le_relative_clause: 0.989
le_around_prepositional_phrase: 0.988
le_around_relative_clause: 0.988
le_conjunction: 0.983
le_passive: 0.99
sn_NP/S: 0.004
sn_PP_on_subject: 0.02
sn_relative_clause_on_subject: 0.009
sn_past_participle: 0.063
sn_NP/Z: 0.016
se_conjunction: 0.996
se_adjective: 0.992
se_understood_object: 0.99
se_relative_clause_on_obj: 0.999
se_PP_on_obj: 1.0
cn_embedded_under_if: 0.041
cn_after_if_clause: 0.008
cn_embedded_under_verb: 0.024
cn_disjunction: 0.016
cn_adverb: 0.015
ce_embedded_under_since: 0.992
ce_after_since_clause: 0.976
ce_embedded_under_verb: 0.995
ce_conj






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5808
subsequence: 0.6078
constituent: 0.5606

Entailed accuracy: 0.5830666666666666

Heuristic non-entailed results:
lexical_overlap: 0.5032
subsequence: 0.5206
constituent: 0.4854

Non-Entailed accuracy: 0.5030666666666667

Overall accuracy: 0.5430666666666667

Subcase results:
ln_subject/object_swap: 0.542
ln_preposition: 0.517
ln_relative_clause: 0.521
ln_passive: 0.423
ln_conjunction: 0.513
le_relative_clause: 0.579
le_around_prepositional_phrase: 0.611
le_around_relative_clause: 0.639
le_conjunction: 0.535
le_passive: 0.54
sn_NP/S: 0.283
sn_PP_on_subject: 0.43
sn_relative_clause_on_subject: 0.426
sn_past_participle: 0.585
sn_NP/Z: 0.879
se_conjunction: 0.56
se_adjective: 0.738
se_understood_object: 0.524
se_relative_clause_on_obj: 0.629
se_PP_on_obj: 0.588
cn_embedded_under_if: 0.696
cn_after_if_clause: 0.447
cn_embedded_under_verb: 0.592
cn_disjunction: 0.343
cn_adverb: 0.349
ce_embedded_under_since: 0.452
ce_after_since_clause: 0.57






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7342
subsequence: 0.8198
constituent: 0.727

Entailed accuracy: 0.7603333333333333

Heuristic non-entailed results:
lexical_overlap: 0.4108
subsequence: 0.321
constituent: 0.3032

Non-Entailed accuracy: 0.345

Overall accuracy: 0.5526666666666666

Subcase results:
ln_subject/object_swap: 0.446
ln_preposition: 0.461
ln_relative_clause: 0.446
ln_passive: 0.257
ln_conjunction: 0.444
le_relative_clause: 0.628
le_around_prepositional_phrase: 0.825
le_around_relative_clause: 0.849
le_conjunction: 0.782
le_passive: 0.587
sn_NP/S: 0.199
sn_PP_on_subject: 0.227
sn_relative_clause_on_subject: 0.251
sn_past_participle: 0.286
sn_NP/Z: 0.642
se_conjunction: 0.783
se_adjective: 0.839
se_understood_object: 0.889
se_relative_clause_on_obj: 0.771
se_PP_on_obj: 0.817
cn_embedded_under_if: 0.485
cn_after_if_clause: 0.257
cn_embedded_under_verb: 0.312
cn_disjunction: 0.264
cn_adverb: 0.198
ce_embedded_under_since: 0.586
ce_after_since_clause: 0.78
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0294
subsequence: 0.0724
constituent: 0.0544

Entailed accuracy: 0.052066666666666664

Heuristic non-entailed results:
lexical_overlap: 0.977
subsequence: 0.966
constituent: 0.9538

Non-Entailed accuracy: 0.9656

Overall accuracy: 0.5088333333333334

Subcase results:
ln_subject/object_swap: 0.985
ln_preposition: 0.994
ln_relative_clause: 0.99
ln_passive: 0.921
ln_conjunction: 0.995
le_relative_clause: 0.025
le_around_prepositional_phrase: 0.03
le_around_relative_clause: 0.025
le_conjunction: 0.024
le_passive: 0.043
sn_NP/S: 0.956
sn_PP_on_subject: 0.977
sn_relative_clause_on_subject: 0.961
sn_past_participle: 0.936
sn_NP/Z: 1.0
se_conjunction: 0.025
se_adjective: 0.207
se_understood_object: 0.053
se_relative_clause_on_obj: 0.048
se_PP_on_obj: 0.029
cn_embedded_under_if: 0.992
cn_after_if_clause: 0.966
cn_embedded_under_verb: 0.953
cn_disjunction: 0.961
cn_adverb: 0.897
ce_embedded_under_since: 0.008
ce_after_since_clause: 0.039
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:00,  1.95it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:58,  2.00it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5832
subsequence: 0.74
constituent: 0.5984

Entailed accuracy: 0.6405333333333333

Heuristic non-entailed results:
lexical_overlap: 0.5538
subsequence: 0.4994
constituent: 0.428

Non-Entailed accuracy: 0.49373333333333336

Overall accuracy: 0.5671333333333334

Subcase results:
ln_subject/object_swap: 0.562
ln_preposition: 0.639
ln_relative_clause: 0.662
ln_passive: 0.358
ln_conjunction: 0.548
le_relative_clause: 0.46
le_around_prepositional_phrase: 0.65
le_around_relative_clause: 0.673
le_conjunction: 0.608
le_passive: 0.525
sn_NP/S: 0.493
sn_PP_on_subject: 0.363
sn_relative_clause_on_subject: 0.42
sn_past_participle: 0.51
sn_NP/Z: 0.711
se_conjunction: 0.723
se_adjective: 0.845
se_understood_object: 0.776
se_relative_clause_on_obj: 0.688
se_PP_on_obj: 0.668
cn_embedded_under_if: 0.712
cn_after_if_clause: 0.348
cn_embedded_under_verb: 0.505
cn_disjunction: 0.367
cn_adverb: 0.208
ce_embedded_under_since: 0.392
ce_after_since_clause: 0.662
c






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  2.00it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:55,  2.03it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:54,  2.06it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.09it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0312
subsequence: 0.0676
constituent: 0.0622

Entailed accuracy: 0.05366666666666667

Heuristic non-entailed results:
lexical_overlap: 0.9782
subsequence: 0.9672
constituent: 0.9446

Non-Entailed accuracy: 0.9633333333333334

Overall accuracy: 0.5085

Subcase results:
ln_subject/object_swap: 0.981
ln_preposition: 0.988
ln_relative_clause: 0.989
ln_passive: 0.943
ln_conjunction: 0.99
le_relative_clause: 0.024
le_around_prepositional_phrase: 0.032
le_around_relative_clause: 0.033
le_conjunction: 0.045
le_passive: 0.022
sn_NP/S: 0.945
sn_PP_on_subject: 0.965
sn_relative_clause_on_subject: 0.98
sn_past_participle: 0.95
sn_NP/Z: 0.996
se_conjunction: 0.045
se_adjective: 0.137
se_understood_object: 0.064
se_relative_clause_on_obj: 0.042
se_PP_on_obj: 0.05
cn_embedded_under_if: 0.98
cn_after_if_clause: 0.951
cn_embedded_under_verb: 0.958
cn_disjunction: 0.942
cn_adverb: 0.892
ce_embedded_under_since: 0.048
ce_after_since_clause: 0.051
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2896
subsequence: 0.388
constituent: 0.2622

Entailed accuracy: 0.3132666666666667

Heuristic non-entailed results:
lexical_overlap: 0.7022
subsequence: 0.7444
constituent: 0.6846

Non-Entailed accuracy: 0.7104

Overall accuracy: 0.5118333333333334

Subcase results:
ln_subject/object_swap: 0.663
ln_preposition: 0.824
ln_relative_clause: 0.786
ln_passive: 0.425
ln_conjunction: 0.813
le_relative_clause: 0.278
le_around_prepositional_phrase: 0.216
le_around_relative_clause: 0.223
le_conjunction: 0.237
le_passive: 0.494
sn_NP/S: 0.771
sn_PP_on_subject: 0.74
sn_relative_clause_on_subject: 0.757
sn_past_participle: 0.574
sn_NP/Z: 0.88
se_conjunction: 0.268
se_adjective: 0.578
se_understood_object: 0.515
se_relative_clause_on_obj: 0.285
se_PP_on_obj: 0.294
cn_embedded_under_if: 0.782
cn_after_if_clause: 0.714
cn_embedded_under_verb: 0.709
cn_disjunction: 0.738
cn_adverb: 0.48
ce_embedded_under_since: 0.203
ce_after_since_clause: 0.224
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6496
subsequence: 0.7458
constituent: 0.5662

Entailed accuracy: 0.6538666666666667

Heuristic non-entailed results:
lexical_overlap: 0.4796
subsequence: 0.4398
constituent: 0.4622

Non-Entailed accuracy: 0.46053333333333335

Overall accuracy: 0.5572

Subcase results:
ln_subject/object_swap: 0.503
ln_preposition: 0.524
ln_relative_clause: 0.566
ln_passive: 0.272
ln_conjunction: 0.533
le_relative_clause: 0.524
le_around_prepositional_phrase: 0.709
le_around_relative_clause: 0.74
le_conjunction: 0.665
le_passive: 0.61
sn_NP/S: 0.342
sn_PP_on_subject: 0.28
sn_relative_clause_on_subject: 0.349
sn_past_participle: 0.548
sn_NP/Z: 0.68
se_conjunction: 0.692
se_adjective: 0.849
se_understood_object: 0.731
se_relative_clause_on_obj: 0.695
se_PP_on_obj: 0.762
cn_embedded_under_if: 0.814
cn_after_if_clause: 0.372
cn_embedded_under_verb: 0.518
cn_disjunction: 0.361
cn_adverb: 0.246
ce_embedded_under_since: 0.305
ce_after_since_clause: 0.619
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0434
subsequence: 0.1154
constituent: 0.0834

Entailed accuracy: 0.08073333333333334

Heuristic non-entailed results:
lexical_overlap: 0.9704
subsequence: 0.9448
constituent: 0.93

Non-Entailed accuracy: 0.9484

Overall accuracy: 0.5145666666666666

Subcase results:
ln_subject/object_swap: 0.981
ln_preposition: 0.98
ln_relative_clause: 0.982
ln_passive: 0.929
ln_conjunction: 0.98
le_relative_clause: 0.039
le_around_prepositional_phrase: 0.042
le_around_relative_clause: 0.06
le_conjunction: 0.042
le_passive: 0.034
sn_NP/S: 0.917
sn_PP_on_subject: 0.936
sn_relative_clause_on_subject: 0.95
sn_past_participle: 0.928
sn_NP/Z: 0.993
se_conjunction: 0.059
se_adjective: 0.229
se_understood_object: 0.124
se_relative_clause_on_obj: 0.077
se_PP_on_obj: 0.088
cn_embedded_under_if: 0.975
cn_after_if_clause: 0.934
cn_embedded_under_verb: 0.981
cn_disjunction: 0.878
cn_adverb: 0.882
ce_embedded_under_since: 0.038
ce_after_since_clause: 0.076
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:00,  1.95it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:01<01:00,  1.92it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:58,  1.98it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:56,  2.02it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:55,  2.05it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.1878
subsequence: 0.2932
constituent: 0.1694

Entailed accuracy: 0.2168

Heuristic non-entailed results:
lexical_overlap: 0.8156
subsequence: 0.826
constituent: 0.8034

Non-Entailed accuracy: 0.815

Overall accuracy: 0.5159

Subcase results:
ln_subject/object_swap: 0.817
ln_preposition: 0.904
ln_relative_clause: 0.899
ln_passive: 0.581
ln_conjunction: 0.877
le_relative_clause: 0.133
le_around_prepositional_phrase: 0.197
le_around_relative_clause: 0.196
le_conjunction: 0.172
le_passive: 0.241
sn_NP/S: 0.839
sn_PP_on_subject: 0.759
sn_relative_clause_on_subject: 0.81
sn_past_participle: 0.754
sn_NP/Z: 0.968
se_conjunction: 0.209
se_adjective: 0.526
se_understood_object: 0.367
se_relative_clause_on_obj: 0.191
se_PP_on_obj: 0.173
cn_embedded_under_if: 0.939
cn_after_if_clause: 0.836
cn_embedded_under_verb: 0.831
cn_disjunction: 0.791
cn_adverb: 0.62
ce_embedded_under_since: 0.065
ce_after_since_clause: 0.165
ce_embedded_under_verb: 0.123
ce_co






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.155
subsequence: 0.3446
constituent: 0.1662

Entailed accuracy: 0.22193333333333334

Heuristic non-entailed results:
lexical_overlap: 0.8422
subsequence: 0.8712
constituent: 0.8346

Non-Entailed accuracy: 0.8493333333333334

Overall accuracy: 0.5356333333333333

Subcase results:
ln_subject/object_swap: 0.824
ln_preposition: 0.918
ln_relative_clause: 0.922
ln_passive: 0.626
ln_conjunction: 0.921
le_relative_clause: 0.132
le_around_prepositional_phrase: 0.147
le_around_relative_clause: 0.146
le_conjunction: 0.122
le_passive: 0.228
sn_NP/S: 0.864
sn_PP_on_subject: 0.865
sn_relative_clause_on_subject: 0.908
sn_past_participle: 0.78
sn_NP/Z: 0.939
se_conjunction: 0.189
se_adjective: 0.53
se_understood_object: 0.456
se_relative_clause_on_obj: 0.237
se_PP_on_obj: 0.311
cn_embedded_under_if: 0.907
cn_after_if_clause: 0.838
cn_embedded_under_verb: 0.906
cn_disjunction: 0.835
cn_adverb: 0.687
ce_embedded_under_since: 0.111
ce_after_since_clause: 0.1






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8004
subsequence: 0.889
constituent: 0.817

Entailed accuracy: 0.8354666666666667

Heuristic non-entailed results:
lexical_overlap: 0.2954
subsequence: 0.2006
constituent: 0.2148

Non-Entailed accuracy: 0.23693333333333333

Overall accuracy: 0.5362

Subcase results:
ln_subject/object_swap: 0.305
ln_preposition: 0.347
ln_relative_clause: 0.373
ln_passive: 0.181
ln_conjunction: 0.271
le_relative_clause: 0.714
le_around_prepositional_phrase: 0.883
le_around_relative_clause: 0.885
le_conjunction: 0.853
le_passive: 0.667
sn_NP/S: 0.086
sn_PP_on_subject: 0.11
sn_relative_clause_on_subject: 0.158
sn_past_participle: 0.388
sn_NP/Z: 0.261
se_conjunction: 0.893
se_adjective: 0.914
se_understood_object: 0.88
se_relative_clause_on_obj: 0.89
se_PP_on_obj: 0.868
cn_embedded_under_if: 0.421
cn_after_if_clause: 0.176
cn_embedded_under_verb: 0.237
cn_disjunction: 0.158
cn_adverb: 0.082
ce_embedded_under_since: 0.673
ce_after_since_clause: 0.829
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2002
subsequence: 0.3346
constituent: 0.288

Entailed accuracy: 0.27426666666666666

Heuristic non-entailed results:
lexical_overlap: 0.8696
subsequence: 0.7844
constituent: 0.7304

Non-Entailed accuracy: 0.7948

Overall accuracy: 0.5345333333333333

Subcase results:
ln_subject/object_swap: 0.862
ln_preposition: 0.915
ln_relative_clause: 0.882
ln_passive: 0.829
ln_conjunction: 0.86
le_relative_clause: 0.187
le_around_prepositional_phrase: 0.232
le_around_relative_clause: 0.291
le_conjunction: 0.211
le_passive: 0.08
sn_NP/S: 0.629
sn_PP_on_subject: 0.74
sn_relative_clause_on_subject: 0.78
sn_past_participle: 0.824
sn_NP/Z: 0.949
se_conjunction: 0.315
se_adjective: 0.5
se_understood_object: 0.345
se_relative_clause_on_obj: 0.27
se_PP_on_obj: 0.243
cn_embedded_under_if: 0.852
cn_after_if_clause: 0.837
cn_embedded_under_verb: 0.802
cn_disjunction: 0.642
cn_adverb: 0.519
ce_embedded_under_since: 0.2
ce_after_since_clause: 0.207
ce_embedded_unde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:02<00:58,  1.96it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  2.00it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:55,  2.03it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.06it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4308
subsequence: 0.5504
constituent: 0.3896

Entailed accuracy: 0.45693333333333336

Heuristic non-entailed results:
lexical_overlap: 0.6518
subsequence: 0.5752
constituent: 0.5566

Non-Entailed accuracy: 0.5945333333333334

Overall accuracy: 0.5257333333333334

Subcase results:
ln_subject/object_swap: 0.631
ln_preposition: 0.724
ln_relative_clause: 0.694
ln_passive: 0.503
ln_conjunction: 0.707
le_relative_clause: 0.383
le_around_prepositional_phrase: 0.499
le_around_relative_clause: 0.523
le_conjunction: 0.437
le_passive: 0.312
sn_NP/S: 0.446
sn_PP_on_subject: 0.543
sn_relative_clause_on_subject: 0.539
sn_past_participle: 0.517
sn_NP/Z: 0.831
se_conjunction: 0.438
se_adjective: 0.663
se_understood_object: 0.688
se_relative_clause_on_obj: 0.481
se_PP_on_obj: 0.482
cn_embedded_under_if: 0.791
cn_after_if_clause: 0.639
cn_embedded_under_verb: 0.499
cn_disjunction: 0.541
cn_adverb: 0.313
ce_embedded_under_since: 0.224
ce_after_since_clause: 






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:59,  1.96it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.01it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5076
subsequence: 0.72
constituent: 0.5708

Entailed accuracy: 0.5994666666666667

Heuristic non-entailed results:
lexical_overlap: 0.5924
subsequence: 0.4766
constituent: 0.4408

Non-Entailed accuracy: 0.5032666666666666

Overall accuracy: 0.5513666666666667

Subcase results:
ln_subject/object_swap: 0.538
ln_preposition: 0.72
ln_relative_clause: 0.677
ln_passive: 0.317
ln_conjunction: 0.71
le_relative_clause: 0.482
le_around_prepositional_phrase: 0.509
le_around_relative_clause: 0.572
le_conjunction: 0.442
le_passive: 0.533
sn_NP/S: 0.458
sn_PP_on_subject: 0.426
sn_relative_clause_on_subject: 0.51
sn_past_participle: 0.501
sn_NP/Z: 0.488
se_conjunction: 0.556
se_adjective: 0.818
se_understood_object: 0.887
se_relative_clause_on_obj: 0.7
se_PP_on_obj: 0.639
cn_embedded_under_if: 0.539
cn_after_if_clause: 0.585
cn_embedded_under_verb: 0.39
cn_disjunction: 0.48
cn_adverb: 0.21
ce_embedded_under_since: 0.619
ce_after_since_clause: 0.413
ce_em






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3086
subsequence: 0.4906
constituent: 0.309

Entailed accuracy: 0.3694

Heuristic non-entailed results:
lexical_overlap: 0.756
subsequence: 0.699
constituent: 0.6986

Non-Entailed accuracy: 0.7178666666666667

Overall accuracy: 0.5436333333333333

Subcase results:
ln_subject/object_swap: 0.765
ln_preposition: 0.82
ln_relative_clause: 0.81
ln_passive: 0.575
ln_conjunction: 0.81
le_relative_clause: 0.246
le_around_prepositional_phrase: 0.324
le_around_relative_clause: 0.353
le_conjunction: 0.255
le_passive: 0.365
sn_NP/S: 0.672
sn_PP_on_subject: 0.6
sn_relative_clause_on_subject: 0.667
sn_past_participle: 0.716
sn_NP/Z: 0.84
se_conjunction: 0.334
se_adjective: 0.657
se_understood_object: 0.691
se_relative_clause_on_obj: 0.382
se_PP_on_obj: 0.389
cn_embedded_under_if: 0.848
cn_after_if_clause: 0.774
cn_embedded_under_verb: 0.748
cn_disjunction: 0.649
cn_adverb: 0.474
ce_embedded_under_since: 0.196
ce_after_since_clause: 0.258
ce_embedded_unde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9084
subsequence: 0.9466
constituent: 0.9452

Entailed accuracy: 0.9334

Heuristic non-entailed results:
lexical_overlap: 0.0958
subsequence: 0.1316
constituent: 0.0822

Non-Entailed accuracy: 0.1032

Overall accuracy: 0.5183

Subcase results:
ln_subject/object_swap: 0.084
ln_preposition: 0.122
ln_relative_clause: 0.131
ln_passive: 0.057
ln_conjunction: 0.085
le_relative_clause: 0.826
le_around_prepositional_phrase: 0.922
le_around_relative_clause: 0.94
le_conjunction: 0.959
le_passive: 0.895
sn_NP/S: 0.032
sn_PP_on_subject: 0.064
sn_relative_clause_on_subject: 0.074
sn_past_participle: 0.279
sn_NP/Z: 0.209
se_conjunction: 0.968
se_adjective: 0.962
se_understood_object: 0.905
se_relative_clause_on_obj: 0.954
se_PP_on_obj: 0.944
cn_embedded_under_if: 0.134
cn_after_if_clause: 0.094
cn_embedded_under_verb: 0.076
cn_disjunction: 0.077
cn_adverb: 0.03
ce_embedded_under_since: 0.934
ce_after_since_clause: 0.932
ce_embedded_under_verb: 0.951
ce_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:54,  2.13it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:54,  2.13it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.12it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.12it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.371
subsequence: 0.4926
constituent: 0.421

Entailed accuracy: 0.4282

Heuristic non-entailed results:
lexical_overlap: 0.6438
subsequence: 0.658
constituent: 0.6054

Non-Entailed accuracy: 0.6357333333333334

Overall accuracy: 0.5319666666666667

Subcase results:
ln_subject/object_swap: 0.67
ln_preposition: 0.711
ln_relative_clause: 0.644
ln_passive: 0.613
ln_conjunction: 0.581
le_relative_clause: 0.399
le_around_prepositional_phrase: 0.35
le_around_relative_clause: 0.409
le_conjunction: 0.467
le_passive: 0.23
sn_NP/S: 0.354
sn_PP_on_subject: 0.689
sn_relative_clause_on_subject: 0.679
sn_past_participle: 0.711
sn_NP/Z: 0.857
se_conjunction: 0.501
se_adjective: 0.546
se_understood_object: 0.439
se_relative_clause_on_obj: 0.545
se_PP_on_obj: 0.432
cn_embedded_under_if: 0.582
cn_after_if_clause: 0.707
cn_embedded_under_verb: 0.703
cn_disjunction: 0.568
cn_adverb: 0.467
ce_embedded_under_since: 0.52
ce_after_since_clause: 0.29
ce_embedded_und






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.549
subsequence: 0.6088
constituent: 0.5062

Entailed accuracy: 0.5546666666666666

Heuristic non-entailed results:
lexical_overlap: 0.4354
subsequence: 0.5116
constituent: 0.4328

Non-Entailed accuracy: 0.45993333333333336

Overall accuracy: 0.5073

Subcase results:
ln_subject/object_swap: 0.411
ln_preposition: 0.467
ln_relative_clause: 0.492
ln_passive: 0.334
ln_conjunction: 0.473
le_relative_clause: 0.514
le_around_prepositional_phrase: 0.522
le_around_relative_clause: 0.557
le_conjunction: 0.583
le_passive: 0.569
sn_NP/S: 0.363
sn_PP_on_subject: 0.474
sn_relative_clause_on_subject: 0.531
sn_past_participle: 0.429
sn_NP/Z: 0.761
se_conjunction: 0.582
se_adjective: 0.727
se_understood_object: 0.522
se_relative_clause_on_obj: 0.591
se_PP_on_obj: 0.622
cn_embedded_under_if: 0.414
cn_after_if_clause: 0.493
cn_embedded_under_verb: 0.504
cn_disjunction: 0.454
cn_adverb: 0.299
ce_embedded_under_since: 0.567
ce_after_since_clause: 0.409
ce_embe






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6962
subsequence: 0.8524
constituent: 0.7432

Entailed accuracy: 0.7639333333333334

Heuristic non-entailed results:
lexical_overlap: 0.3102
subsequence: 0.3258
constituent: 0.2556

Non-Entailed accuracy: 0.2972

Overall accuracy: 0.5305666666666666

Subcase results:
ln_subject/object_swap: 0.259
ln_preposition: 0.386
ln_relative_clause: 0.37
ln_passive: 0.219
ln_conjunction: 0.317
le_relative_clause: 0.676
le_around_prepositional_phrase: 0.663
le_around_relative_clause: 0.698
le_conjunction: 0.734
le_passive: 0.71
sn_NP/S: 0.227
sn_PP_on_subject: 0.324
sn_relative_clause_on_subject: 0.421
sn_past_participle: 0.342
sn_NP/Z: 0.315
se_conjunction: 0.776
se_adjective: 0.887
se_understood_object: 0.861
se_relative_clause_on_obj: 0.885
se_PP_on_obj: 0.853
cn_embedded_under_if: 0.156
cn_after_if_clause: 0.36
cn_embedded_under_verb: 0.297
cn_disjunction: 0.337
cn_adverb: 0.128
ce_embedded_under_since: 0.898
ce_after_since_clause: 0.606
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6224
subsequence: 0.7398
constituent: 0.5908

Entailed accuracy: 0.651

Heuristic non-entailed results:
lexical_overlap: 0.3766
subsequence: 0.4588
constituent: 0.4406

Non-Entailed accuracy: 0.42533333333333334

Overall accuracy: 0.5381666666666667

Subcase results:
ln_subject/object_swap: 0.32
ln_preposition: 0.45
ln_relative_clause: 0.468
ln_passive: 0.239
ln_conjunction: 0.406
le_relative_clause: 0.533
le_around_prepositional_phrase: 0.616
le_around_relative_clause: 0.616
le_conjunction: 0.657
le_passive: 0.69
sn_NP/S: 0.416
sn_PP_on_subject: 0.368
sn_relative_clause_on_subject: 0.433
sn_past_participle: 0.549
sn_NP/Z: 0.528
se_conjunction: 0.676
se_adjective: 0.867
se_understood_object: 0.755
se_relative_clause_on_obj: 0.664
se_PP_on_obj: 0.737
cn_embedded_under_if: 0.54
cn_after_if_clause: 0.529
cn_embedded_under_verb: 0.433
cn_disjunction: 0.436
cn_adverb: 0.265
ce_embedded_under_since: 0.571
ce_after_since_clause: 0.511
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7166
subsequence: 0.796
constituent: 0.713

Entailed accuracy: 0.7418666666666667

Heuristic non-entailed results:
lexical_overlap: 0.3294
subsequence: 0.3014
constituent: 0.2786

Non-Entailed accuracy: 0.3031333333333333

Overall accuracy: 0.5225

Subcase results:
ln_subject/object_swap: 0.31
ln_preposition: 0.402
ln_relative_clause: 0.369
ln_passive: 0.204
ln_conjunction: 0.362
le_relative_clause: 0.704
le_around_prepositional_phrase: 0.682
le_around_relative_clause: 0.774
le_conjunction: 0.69
le_passive: 0.733
sn_NP/S: 0.224
sn_PP_on_subject: 0.284
sn_relative_clause_on_subject: 0.28
sn_past_participle: 0.443
sn_NP/Z: 0.276
se_conjunction: 0.758
se_adjective: 0.858
se_understood_object: 0.856
se_relative_clause_on_obj: 0.815
se_PP_on_obj: 0.693
cn_embedded_under_if: 0.257
cn_after_if_clause: 0.408
cn_embedded_under_verb: 0.292
cn_disjunction: 0.323
cn_adverb: 0.113
ce_embedded_under_since: 0.847
ce_after_since_clause: 0.545
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.849
subsequence: 0.931
constituent: 0.918

Entailed accuracy: 0.8993333333333333

Heuristic non-entailed results:
lexical_overlap: 0.149
subsequence: 0.1634
constituent: 0.1266

Non-Entailed accuracy: 0.14633333333333334

Overall accuracy: 0.5228333333333334

Subcase results:
ln_subject/object_swap: 0.136
ln_preposition: 0.181
ln_relative_clause: 0.244
ln_passive: 0.092
ln_conjunction: 0.092
le_relative_clause: 0.746
le_around_prepositional_phrase: 0.886
le_around_relative_clause: 0.886
le_conjunction: 0.945
le_passive: 0.782
sn_NP/S: 0.061
sn_PP_on_subject: 0.078
sn_relative_clause_on_subject: 0.11
sn_past_participle: 0.35
sn_NP/Z: 0.218
se_conjunction: 0.956
se_adjective: 0.95
se_understood_object: 0.933
se_relative_clause_on_obj: 0.911
se_PP_on_obj: 0.905
cn_embedded_under_if: 0.175
cn_after_if_clause: 0.163
cn_embedded_under_verb: 0.137
cn_disjunction: 0.102
cn_adverb: 0.056
ce_embedded_under_since: 0.917
ce_after_since_clause: 0.888
c






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3546
subsequence: 0.4982
constituent: 0.3976

Entailed accuracy: 0.4168

Heuristic non-entailed results:
lexical_overlap: 0.6536
subsequence: 0.6818
constituent: 0.6552

Non-Entailed accuracy: 0.6635333333333333

Overall accuracy: 0.5401666666666667

Subcase results:
ln_subject/object_swap: 0.614
ln_preposition: 0.752
ln_relative_clause: 0.755
ln_passive: 0.599
ln_conjunction: 0.548
le_relative_clause: 0.338
le_around_prepositional_phrase: 0.338
le_around_relative_clause: 0.401
le_conjunction: 0.516
le_passive: 0.18
sn_NP/S: 0.413
sn_PP_on_subject: 0.667
sn_relative_clause_on_subject: 0.679
sn_past_participle: 0.741
sn_NP/Z: 0.909
se_conjunction: 0.519
se_adjective: 0.596
se_understood_object: 0.487
se_relative_clause_on_obj: 0.485
se_PP_on_obj: 0.404
cn_embedded_under_if: 0.751
cn_after_if_clause: 0.78
cn_embedded_under_verb: 0.716
cn_disjunction: 0.582
cn_adverb: 0.447
ce_embedded_under_since: 0.371
ce_after_since_clause: 0.288
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5678
subsequence: 0.6624
constituent: 0.526

Entailed accuracy: 0.5854

Heuristic non-entailed results:
lexical_overlap: 0.4334
subsequence: 0.5178
constituent: 0.3978

Non-Entailed accuracy: 0.44966666666666666

Overall accuracy: 0.5175333333333333

Subcase results:
ln_subject/object_swap: 0.35
ln_preposition: 0.531
ln_relative_clause: 0.559
ln_passive: 0.337
ln_conjunction: 0.39
le_relative_clause: 0.475
le_around_prepositional_phrase: 0.573
le_around_relative_clause: 0.623
le_conjunction: 0.682
le_passive: 0.486
sn_NP/S: 0.326
sn_PP_on_subject: 0.459
sn_relative_clause_on_subject: 0.527
sn_past_participle: 0.515
sn_NP/Z: 0.762
se_conjunction: 0.656
se_adjective: 0.766
se_understood_object: 0.699
se_relative_clause_on_obj: 0.569
se_PP_on_obj: 0.622
cn_embedded_under_if: 0.493
cn_after_if_clause: 0.5
cn_embedded_under_verb: 0.402
cn_disjunction: 0.36
cn_adverb: 0.234
ce_embedded_under_since: 0.487
ce_after_since_clause: 0.469
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7236
subsequence: 0.8662
constituent: 0.791

Entailed accuracy: 0.7936

Heuristic non-entailed results:
lexical_overlap: 0.3098
subsequence: 0.3244
constituent: 0.2256

Non-Entailed accuracy: 0.2866

Overall accuracy: 0.5401

Subcase results:
ln_subject/object_swap: 0.2
ln_preposition: 0.42
ln_relative_clause: 0.457
ln_passive: 0.159
ln_conjunction: 0.313
le_relative_clause: 0.608
le_around_prepositional_phrase: 0.726
le_around_relative_clause: 0.72
le_conjunction: 0.798
le_passive: 0.766
sn_NP/S: 0.223
sn_PP_on_subject: 0.239
sn_relative_clause_on_subject: 0.347
sn_past_participle: 0.417
sn_NP/Z: 0.396
se_conjunction: 0.867
se_adjective: 0.932
se_understood_object: 0.879
se_relative_clause_on_obj: 0.84
se_PP_on_obj: 0.813
cn_embedded_under_if: 0.261
cn_after_if_clause: 0.295
cn_embedded_under_verb: 0.217
cn_disjunction: 0.249
cn_adverb: 0.106
ce_embedded_under_since: 0.811
ce_after_since_clause: 0.716
ce_embedded_under_verb: 0.763
ce_conj






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6062
subsequence: 0.7584
constituent: 0.592

Entailed accuracy: 0.6522

Heuristic non-entailed results:
lexical_overlap: 0.4156
subsequence: 0.5002
constituent: 0.4142

Non-Entailed accuracy: 0.44333333333333336

Overall accuracy: 0.5477666666666666

Subcase results:
ln_subject/object_swap: 0.294
ln_preposition: 0.533
ln_relative_clause: 0.591
ln_passive: 0.231
ln_conjunction: 0.429
le_relative_clause: 0.461
le_around_prepositional_phrase: 0.596
le_around_relative_clause: 0.628
le_conjunction: 0.692
le_passive: 0.654
sn_NP/S: 0.42
sn_PP_on_subject: 0.378
sn_relative_clause_on_subject: 0.461
sn_past_participle: 0.614
sn_NP/Z: 0.628
se_conjunction: 0.736
se_adjective: 0.914
se_understood_object: 0.801
se_relative_clause_on_obj: 0.662
se_PP_on_obj: 0.679
cn_embedded_under_if: 0.535
cn_after_if_clause: 0.537
cn_embedded_under_verb: 0.405
cn_disjunction: 0.369
cn_adverb: 0.225
ce_embedded_under_since: 0.506
ce_after_since_clause: 0.503
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7094
subsequence: 0.8368
constituent: 0.7304

Entailed accuracy: 0.7588666666666667

Heuristic non-entailed results:
lexical_overlap: 0.3156
subsequence: 0.267
constituent: 0.2944

Non-Entailed accuracy: 0.29233333333333333

Overall accuracy: 0.5256

Subcase results:
ln_subject/object_swap: 0.221
ln_preposition: 0.387
ln_relative_clause: 0.409
ln_passive: 0.288
ln_conjunction: 0.273
le_relative_clause: 0.627
le_around_prepositional_phrase: 0.745
le_around_relative_clause: 0.797
le_conjunction: 0.797
le_passive: 0.581
sn_NP/S: 0.208
sn_PP_on_subject: 0.2
sn_relative_clause_on_subject: 0.249
sn_past_participle: 0.438
sn_NP/Z: 0.24
se_conjunction: 0.83
se_adjective: 0.898
se_understood_object: 0.914
se_relative_clause_on_obj: 0.783
se_PP_on_obj: 0.759
cn_embedded_under_if: 0.315
cn_after_if_clause: 0.428
cn_embedded_under_verb: 0.28
cn_disjunction: 0.325
cn_adverb: 0.124
ce_embedded_under_since: 0.741
ce_after_since_clause: 0.583
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8078
subsequence: 0.9022
constituent: 0.8616

Entailed accuracy: 0.8572

Heuristic non-entailed results:
lexical_overlap: 0.173
subsequence: 0.2022
constituent: 0.155

Non-Entailed accuracy: 0.17673333333333333

Overall accuracy: 0.5169666666666667

Subcase results:
ln_subject/object_swap: 0.137
ln_preposition: 0.219
ln_relative_clause: 0.203
ln_passive: 0.209
ln_conjunction: 0.097
le_relative_clause: 0.805
le_around_prepositional_phrase: 0.764
le_around_relative_clause: 0.828
le_conjunction: 0.909
le_passive: 0.733
sn_NP/S: 0.092
sn_PP_on_subject: 0.214
sn_relative_clause_on_subject: 0.223
sn_past_participle: 0.341
sn_NP/Z: 0.141
se_conjunction: 0.908
se_adjective: 0.91
se_understood_object: 0.877
se_relative_clause_on_obj: 0.917
se_PP_on_obj: 0.899
cn_embedded_under_if: 0.13
cn_after_if_clause: 0.185
cn_embedded_under_verb: 0.191
cn_disjunction: 0.163
cn_adverb: 0.106
ce_embedded_under_since: 0.966
ce_after_since_clause: 0.792
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:52,  2.

Heuristic entailed results:
lexical_overlap: 0.7126
subsequence: 0.7758
constituent: 0.716

Entailed accuracy: 0.7348

Heuristic non-entailed results:
lexical_overlap: 0.2828
subsequence: 0.3632
constituent: 0.3718

Non-Entailed accuracy: 0.33926666666666666

Overall accuracy: 0.5370333333333334

Subcase results:
ln_subject/object_swap: 0.299
ln_preposition: 0.317
ln_relative_clause: 0.414
ln_passive: 0.189
ln_conjunction: 0.195
le_relative_clause: 0.61
le_around_prepositional_phrase: 0.774
le_around_relative_clause: 0.718
le_conjunction: 0.845
le_passive: 0.616
sn_NP/S: 0.14
sn_PP_on_subject: 0.241
sn_relative_clause_on_subject: 0.371
sn_past_participle: 0.523
sn_NP/Z: 0.541
se_conjunction: 0.82
se_adjective: 0.745
se_understood_object: 0.698
se_relative_clause_on_obj: 0.789
se_PP_on_obj: 0.827
cn_embedded_under_if: 0.432
cn_after_if_clause: 0.509
cn_embedded_under_verb: 0.46
cn_disjunction: 0.252
cn_adverb: 0.206
ce_embedded_under_since: 0.737
ce_after_since_clause: 0.627
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.1626
subsequence: 0.2462
constituent: 0.184

Entailed accuracy: 0.1976

Heuristic non-entailed results:
lexical_overlap: 0.8328
subsequence: 0.8426
constituent: 0.8618

Non-Entailed accuracy: 0.8457333333333333

Overall accuracy: 0.5216666666666666

Subcase results:
ln_subject/object_swap: 0.839
ln_preposition: 0.873
ln_relative_clause: 0.893
ln_passive: 0.79
ln_conjunction: 0.769
le_relative_clause: 0.131
le_around_prepositional_phrase: 0.163
le_around_relative_clause: 0.156
le_conjunction: 0.253
le_passive: 0.11
sn_NP/S: 0.633
sn_PP_on_subject: 0.82
sn_relative_clause_on_subject: 0.889
sn_past_participle: 0.884
sn_NP/Z: 0.987
se_conjunction: 0.265
se_adjective: 0.259
se_understood_object: 0.254
se_relative_clause_on_obj: 0.197
se_PP_on_obj: 0.256
cn_embedded_under_if: 0.903
cn_after_if_clause: 0.93
cn_embedded_under_verb: 0.937
cn_disjunction: 0.773
cn_adverb: 0.766
ce_embedded_under_since: 0.16
ce_after_since_clause: 0.107
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2798
subsequence: 0.3414
constituent: 0.2202

Entailed accuracy: 0.28046666666666664

Heuristic non-entailed results:
lexical_overlap: 0.712
subsequence: 0.7768
constituent: 0.73

Non-Entailed accuracy: 0.7396

Overall accuracy: 0.5100333333333333

Subcase results:
ln_subject/object_swap: 0.668
ln_preposition: 0.779
ln_relative_clause: 0.837
ln_passive: 0.571
ln_conjunction: 0.705
le_relative_clause: 0.196
le_around_prepositional_phrase: 0.291
le_around_relative_clause: 0.259
le_conjunction: 0.368
le_passive: 0.285
sn_NP/S: 0.632
sn_PP_on_subject: 0.706
sn_relative_clause_on_subject: 0.809
sn_past_participle: 0.777
sn_NP/Z: 0.96
se_conjunction: 0.329
se_adjective: 0.35
se_understood_object: 0.315
se_relative_clause_on_obj: 0.294
se_PP_on_obj: 0.419
cn_embedded_under_if: 0.768
cn_after_if_clause: 0.82
cn_embedded_under_verb: 0.832
cn_disjunction: 0.636
cn_adverb: 0.594
ce_embedded_under_since: 0.194
ce_after_since_clause: 0.179
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4882
subsequence: 0.5964
constituent: 0.4784

Entailed accuracy: 0.521

Heuristic non-entailed results:
lexical_overlap: 0.5216
subsequence: 0.6026
constituent: 0.5358

Non-Entailed accuracy: 0.5533333333333333

Overall accuracy: 0.5371666666666667

Subcase results:
ln_subject/object_swap: 0.499
ln_preposition: 0.589
ln_relative_clause: 0.69
ln_passive: 0.322
ln_conjunction: 0.508
le_relative_clause: 0.397
le_around_prepositional_phrase: 0.502
le_around_relative_clause: 0.451
le_conjunction: 0.556
le_passive: 0.535
sn_NP/S: 0.466
sn_PP_on_subject: 0.524
sn_relative_clause_on_subject: 0.661
sn_past_participle: 0.65
sn_NP/Z: 0.712
se_conjunction: 0.559
se_adjective: 0.564
se_understood_object: 0.606
se_relative_clause_on_obj: 0.573
se_PP_on_obj: 0.68
cn_embedded_under_if: 0.507
cn_after_if_clause: 0.696
cn_embedded_under_verb: 0.636
cn_disjunction: 0.467
cn_adverb: 0.373
ce_embedded_under_since: 0.59
ce_after_since_clause: 0.336
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  2.01it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:01<00:59,  1.96it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:57,  2.01it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.04it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.06it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2728
subsequence: 0.4128
constituent: 0.2506

Entailed accuracy: 0.31206666666666666

Heuristic non-entailed results:
lexical_overlap: 0.7344
subsequence: 0.7754
constituent: 0.7842

Non-Entailed accuracy: 0.7646666666666667

Overall accuracy: 0.5383666666666667

Subcase results:
ln_subject/object_swap: 0.713
ln_preposition: 0.743
ln_relative_clause: 0.853
ln_passive: 0.593
ln_conjunction: 0.77
le_relative_clause: 0.161
le_around_prepositional_phrase: 0.334
le_around_relative_clause: 0.243
le_conjunction: 0.361
le_passive: 0.265
sn_NP/S: 0.657
sn_PP_on_subject: 0.654
sn_relative_clause_on_subject: 0.785
sn_past_participle: 0.852
sn_NP/Z: 0.929
se_conjunction: 0.361
se_adjective: 0.406
se_understood_object: 0.495
se_relative_clause_on_obj: 0.29
se_PP_on_obj: 0.512
cn_embedded_under_if: 0.826
cn_after_if_clause: 0.88
cn_embedded_under_verb: 0.892
cn_disjunction: 0.657
cn_adverb: 0.666
ce_embedded_under_since: 0.224
ce_after_since_clause: 0.1






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5524
subsequence: 0.658
constituent: 0.544

Entailed accuracy: 0.5848

Heuristic non-entailed results:
lexical_overlap: 0.4692
subsequence: 0.4476
constituent: 0.4678

Non-Entailed accuracy: 0.46153333333333335

Overall accuracy: 0.5231666666666667

Subcase results:
ln_subject/object_swap: 0.428
ln_preposition: 0.523
ln_relative_clause: 0.587
ln_passive: 0.423
ln_conjunction: 0.385
le_relative_clause: 0.47
le_around_prepositional_phrase: 0.622
le_around_relative_clause: 0.577
le_conjunction: 0.688
le_passive: 0.405
sn_NP/S: 0.294
sn_PP_on_subject: 0.345
sn_relative_clause_on_subject: 0.45
sn_past_participle: 0.609
sn_NP/Z: 0.54
se_conjunction: 0.683
se_adjective: 0.644
se_understood_object: 0.632
se_relative_clause_on_obj: 0.639
se_PP_on_obj: 0.692
cn_embedded_under_if: 0.483
cn_after_if_clause: 0.614
cn_embedded_under_verb: 0.547
cn_disjunction: 0.417
cn_adverb: 0.278
ce_embedded_under_since: 0.598
ce_after_since_clause: 0.354
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7954
subsequence: 0.8256
constituent: 0.7746

Entailed accuracy: 0.7985333333333333

Heuristic non-entailed results:
lexical_overlap: 0.191
subsequence: 0.2574
constituent: 0.2218

Non-Entailed accuracy: 0.2234

Overall accuracy: 0.5109666666666667

Subcase results:
ln_subject/object_swap: 0.198
ln_preposition: 0.197
ln_relative_clause: 0.226
ln_passive: 0.198
ln_conjunction: 0.136
le_relative_clause: 0.766
le_around_prepositional_phrase: 0.784
le_around_relative_clause: 0.817
le_conjunction: 0.887
le_passive: 0.723
sn_NP/S: 0.107
sn_PP_on_subject: 0.227
sn_relative_clause_on_subject: 0.277
sn_past_participle: 0.41
sn_NP/Z: 0.266
se_conjunction: 0.865
se_adjective: 0.754
se_understood_object: 0.703
se_relative_clause_on_obj: 0.882
se_PP_on_obj: 0.924
cn_embedded_under_if: 0.129
cn_after_if_clause: 0.295
cn_embedded_under_verb: 0.335
cn_disjunction: 0.178
cn_adverb: 0.172
ce_embedded_under_since: 0.94
ce_after_since_clause: 0.643
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8746
subsequence: 0.8986
constituent: 0.8852

Entailed accuracy: 0.8861333333333333

Heuristic non-entailed results:
lexical_overlap: 0.1338
subsequence: 0.1866
constituent: 0.1512

Non-Entailed accuracy: 0.1572

Overall accuracy: 0.5216666666666666

Subcase results:
ln_subject/object_swap: 0.146
ln_preposition: 0.182
ln_relative_clause: 0.211
ln_passive: 0.068
ln_conjunction: 0.062
le_relative_clause: 0.836
le_around_prepositional_phrase: 0.856
le_around_relative_clause: 0.863
le_conjunction: 0.942
le_passive: 0.876
sn_NP/S: 0.044
sn_PP_on_subject: 0.164
sn_relative_clause_on_subject: 0.225
sn_past_participle: 0.241
sn_NP/Z: 0.259
se_conjunction: 0.931
se_adjective: 0.89
se_understood_object: 0.834
se_relative_clause_on_obj: 0.915
se_PP_on_obj: 0.923
cn_embedded_under_if: 0.112
cn_after_if_clause: 0.192
cn_embedded_under_verb: 0.238
cn_disjunction: 0.142
cn_adverb: 0.072
ce_embedded_under_since: 0.935
ce_after_since_clause: 0.793
ce_embed






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:57,  1.97it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:55,  2.01it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:54,  2.04it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:53,  2.06it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.09it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:51,  2.09it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7922
subsequence: 0.8514
constituent: 0.8334

Entailed accuracy: 0.8256666666666667

Heuristic non-entailed results:
lexical_overlap: 0.2324
subsequence: 0.2442
constituent: 0.2302

Non-Entailed accuracy: 0.2356

Overall accuracy: 0.5306333333333333

Subcase results:
ln_subject/object_swap: 0.259
ln_preposition: 0.248
ln_relative_clause: 0.281
ln_passive: 0.207
ln_conjunction: 0.167
le_relative_clause: 0.741
le_around_prepositional_phrase: 0.83
le_around_relative_clause: 0.843
le_conjunction: 0.882
le_passive: 0.665
sn_NP/S: 0.135
sn_PP_on_subject: 0.196
sn_relative_clause_on_subject: 0.222
sn_past_participle: 0.316
sn_NP/Z: 0.352
se_conjunction: 0.853
se_adjective: 0.829
se_understood_object: 0.808
se_relative_clause_on_obj: 0.877
se_PP_on_obj: 0.89
cn_embedded_under_if: 0.208
cn_after_if_clause: 0.237
cn_embedded_under_verb: 0.264
cn_disjunction: 0.176
cn_adverb: 0.266
ce_embedded_under_since: 0.887
ce_after_since_clause: 0.838
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2162
subsequence: 0.3396
constituent: 0.283

Entailed accuracy: 0.2796

Heuristic non-entailed results:
lexical_overlap: 0.8146
subsequence: 0.757
constituent: 0.7654

Non-Entailed accuracy: 0.779

Overall accuracy: 0.5293

Subcase results:
ln_subject/object_swap: 0.842
ln_preposition: 0.826
ln_relative_clause: 0.831
ln_passive: 0.793
ln_conjunction: 0.781
le_relative_clause: 0.203
le_around_prepositional_phrase: 0.219
le_around_relative_clause: 0.241
le_conjunction: 0.307
le_passive: 0.111
sn_NP/S: 0.534
sn_PP_on_subject: 0.775
sn_relative_clause_on_subject: 0.801
sn_past_participle: 0.744
sn_NP/Z: 0.931
se_conjunction: 0.311
se_adjective: 0.305
se_understood_object: 0.437
se_relative_clause_on_obj: 0.334
se_PP_on_obj: 0.311
cn_embedded_under_if: 0.74
cn_after_if_clause: 0.823
cn_embedded_under_verb: 0.843
cn_disjunction: 0.651
cn_adverb: 0.77
ce_embedded_under_since: 0.357
ce_after_since_clause: 0.213
ce_embedded_under_verb: 0.179
ce_con






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.462
subsequence: 0.509
constituent: 0.407

Entailed accuracy: 0.4593333333333333

Heuristic non-entailed results:
lexical_overlap: 0.5596
subsequence: 0.5868
constituent: 0.5474

Non-Entailed accuracy: 0.5646

Overall accuracy: 0.5119666666666667

Subcase results:
ln_subject/object_swap: 0.611
ln_preposition: 0.563
ln_relative_clause: 0.596
ln_passive: 0.496
ln_conjunction: 0.532
le_relative_clause: 0.458
le_around_prepositional_phrase: 0.487
le_around_relative_clause: 0.475
le_conjunction: 0.496
le_passive: 0.394
sn_NP/S: 0.42
sn_PP_on_subject: 0.605
sn_relative_clause_on_subject: 0.659
sn_past_participle: 0.436
sn_NP/Z: 0.814
se_conjunction: 0.464
se_adjective: 0.543
se_understood_object: 0.464
se_relative_clause_on_obj: 0.53
se_PP_on_obj: 0.544
cn_embedded_under_if: 0.495
cn_after_if_clause: 0.626
cn_embedded_under_verb: 0.625
cn_disjunction: 0.452
cn_adverb: 0.539
ce_embedded_under_since: 0.486
ce_after_since_clause: 0.371
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5396
subsequence: 0.6966
constituent: 0.5994

Entailed accuracy: 0.6118666666666667

Heuristic non-entailed results:
lexical_overlap: 0.4742
subsequence: 0.4796
constituent: 0.4258

Non-Entailed accuracy: 0.45986666666666665

Overall accuracy: 0.5358666666666667

Subcase results:
ln_subject/object_swap: 0.478
ln_preposition: 0.493
ln_relative_clause: 0.549
ln_passive: 0.391
ln_conjunction: 0.46
le_relative_clause: 0.5
le_around_prepositional_phrase: 0.551
le_around_relative_clause: 0.567
le_conjunction: 0.589
le_passive: 0.491
sn_NP/S: 0.343
sn_PP_on_subject: 0.465
sn_relative_clause_on_subject: 0.544
sn_past_participle: 0.425
sn_NP/Z: 0.621
se_conjunction: 0.623
se_adjective: 0.617
se_understood_object: 0.814
se_relative_clause_on_obj: 0.71
se_PP_on_obj: 0.719
cn_embedded_under_if: 0.323
cn_after_if_clause: 0.464
cn_embedded_under_verb: 0.507
cn_disjunction: 0.399
cn_adverb: 0.436
ce_embedded_under_since: 0.758
ce_after_since_clause: 0.53






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:59,  1.95it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:01<01:00,  1.92it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:58,  1.98it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:56,  2.02it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:55,  2.05it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4078
subsequence: 0.5878
constituent: 0.4562

Entailed accuracy: 0.4839333333333333

Heuristic non-entailed results:
lexical_overlap: 0.6112
subsequence: 0.602
constituent: 0.6014

Non-Entailed accuracy: 0.6048666666666667

Overall accuracy: 0.5444

Subcase results:
ln_subject/object_swap: 0.64
ln_preposition: 0.598
ln_relative_clause: 0.646
ln_passive: 0.537
ln_conjunction: 0.635
le_relative_clause: 0.364
le_around_prepositional_phrase: 0.456
le_around_relative_clause: 0.405
le_conjunction: 0.459
le_passive: 0.355
sn_NP/S: 0.496
sn_PP_on_subject: 0.538
sn_relative_clause_on_subject: 0.618
sn_past_participle: 0.586
sn_NP/Z: 0.772
se_conjunction: 0.492
se_adjective: 0.531
se_understood_object: 0.706
se_relative_clause_on_obj: 0.568
se_PP_on_obj: 0.642
cn_embedded_under_if: 0.502
cn_after_if_clause: 0.631
cn_embedded_under_verb: 0.694
cn_disjunction: 0.469
cn_adverb: 0.711
ce_embedded_under_since: 0.54
ce_after_since_clause: 0.397
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6648
subsequence: 0.7976
constituent: 0.696

Entailed accuracy: 0.7194666666666667

Heuristic non-entailed results:
lexical_overlap: 0.3794
subsequence: 0.2906
constituent: 0.3208

Non-Entailed accuracy: 0.33026666666666665

Overall accuracy: 0.5248666666666667

Subcase results:
ln_subject/object_swap: 0.383
ln_preposition: 0.383
ln_relative_clause: 0.378
ln_passive: 0.377
ln_conjunction: 0.376
le_relative_clause: 0.671
le_around_prepositional_phrase: 0.711
le_around_relative_clause: 0.726
le_conjunction: 0.674
le_passive: 0.542
sn_NP/S: 0.203
sn_PP_on_subject: 0.313
sn_relative_clause_on_subject: 0.29
sn_past_participle: 0.304
sn_NP/Z: 0.343
se_conjunction: 0.711
se_adjective: 0.774
se_understood_object: 0.892
se_relative_clause_on_obj: 0.812
se_PP_on_obj: 0.799
cn_embedded_under_if: 0.265
cn_after_if_clause: 0.413
cn_embedded_under_verb: 0.359
cn_disjunction: 0.314
cn_adverb: 0.253
ce_embedded_under_since: 0.828
ce_after_since_clause: 0.






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8386
subsequence: 0.9046
constituent: 0.8448

Entailed accuracy: 0.8626666666666667

Heuristic non-entailed results:
lexical_overlap: 0.1504
subsequence: 0.1756
constituent: 0.1614

Non-Entailed accuracy: 0.16246666666666668

Overall accuracy: 0.5125666666666666

Subcase results:
ln_subject/object_swap: 0.175
ln_preposition: 0.163
ln_relative_clause: 0.143
ln_passive: 0.172
ln_conjunction: 0.099
le_relative_clause: 0.846
le_around_prepositional_phrase: 0.809
le_around_relative_clause: 0.849
le_conjunction: 0.897
le_passive: 0.792
sn_NP/S: 0.059
sn_PP_on_subject: 0.215
sn_relative_clause_on_subject: 0.187
sn_past_participle: 0.216
sn_NP/Z: 0.201
se_conjunction: 0.882
se_adjective: 0.87
se_understood_object: 0.855
se_relative_clause_on_obj: 0.967
se_PP_on_obj: 0.949
cn_embedded_under_if: 0.092
cn_after_if_clause: 0.191
cn_embedded_under_verb: 0.205
cn_disjunction: 0.18
cn_adverb: 0.139
ce_embedded_under_since: 0.977
ce_after_since_clause: 0.






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7492
subsequence: 0.8566
constituent: 0.8244

Entailed accuracy: 0.8100666666666667

Heuristic non-entailed results:
lexical_overlap: 0.254
subsequence: 0.234
constituent: 0.196

Non-Entailed accuracy: 0.228

Overall accuracy: 0.5190333333333333

Subcase results:
ln_subject/object_swap: 0.242
ln_preposition: 0.268
ln_relative_clause: 0.288
ln_passive: 0.31
ln_conjunction: 0.162
le_relative_clause: 0.757
le_around_prepositional_phrase: 0.743
le_around_relative_clause: 0.761
le_conjunction: 0.847
le_passive: 0.638
sn_NP/S: 0.093
sn_PP_on_subject: 0.254
sn_relative_clause_on_subject: 0.279
sn_past_participle: 0.231
sn_NP/Z: 0.313
se_conjunction: 0.839
se_adjective: 0.851
se_understood_object: 0.84
se_relative_clause_on_obj: 0.898
se_PP_on_obj: 0.855
cn_embedded_under_if: 0.133
cn_after_if_clause: 0.202
cn_embedded_under_verb: 0.277
cn_disjunction: 0.157
cn_adverb: 0.211
ce_embedded_under_since: 0.927
ce_after_since_clause: 0.773
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  2.00it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:55,  2.03it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.06it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.09it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:51,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.911
subsequence: 0.886
constituent: 0.8786

Entailed accuracy: 0.8918666666666667

Heuristic non-entailed results:
lexical_overlap: 0.0762
subsequence: 0.1352
constituent: 0.121

Non-Entailed accuracy: 0.1108

Overall accuracy: 0.5013333333333333

Subcase results:
ln_subject/object_swap: 0.079
ln_preposition: 0.073
ln_relative_clause: 0.107
ln_passive: 0.083
ln_conjunction: 0.039
le_relative_clause: 0.893
le_around_prepositional_phrase: 0.907
le_around_relative_clause: 0.905
le_conjunction: 0.945
le_passive: 0.905
sn_NP/S: 0.032
sn_PP_on_subject: 0.128
sn_relative_clause_on_subject: 0.161
sn_past_participle: 0.153
sn_NP/Z: 0.202
se_conjunction: 0.922
se_adjective: 0.743
se_understood_object: 0.838
se_relative_clause_on_obj: 0.946
se_PP_on_obj: 0.981
cn_embedded_under_if: 0.046
cn_after_if_clause: 0.126
cn_embedded_under_verb: 0.212
cn_disjunction: 0.07
cn_adverb: 0.151
ce_embedded_under_since: 0.959
ce_after_since_clause: 0.835
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7282
subsequence: 0.7754
constituent: 0.6954

Entailed accuracy: 0.733

Heuristic non-entailed results:
lexical_overlap: 0.3084
subsequence: 0.3426
constituent: 0.3732

Non-Entailed accuracy: 0.3414

Overall accuracy: 0.5372

Subcase results:
ln_subject/object_swap: 0.263
ln_preposition: 0.367
ln_relative_clause: 0.347
ln_passive: 0.231
ln_conjunction: 0.334
le_relative_clause: 0.761
le_around_prepositional_phrase: 0.66
le_around_relative_clause: 0.779
le_conjunction: 0.62
le_passive: 0.821
sn_NP/S: 0.049
sn_PP_on_subject: 0.293
sn_relative_clause_on_subject: 0.256
sn_past_participle: 0.375
sn_NP/Z: 0.74
se_conjunction: 0.717
se_adjective: 0.888
se_understood_object: 0.875
se_relative_clause_on_obj: 0.734
se_PP_on_obj: 0.663
cn_embedded_under_if: 0.723
cn_after_if_clause: 0.301
cn_embedded_under_verb: 0.396
cn_disjunction: 0.293
cn_adverb: 0.153
ce_embedded_under_since: 0.461
ce_after_since_clause: 0.729
ce_embedded_under_verb: 0.709
ce_co






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8646
subsequence: 0.9308
constituent: 0.833

Entailed accuracy: 0.8761333333333333

Heuristic non-entailed results:
lexical_overlap: 0.1964
subsequence: 0.1312
constituent: 0.183

Non-Entailed accuracy: 0.1702

Overall accuracy: 0.5231666666666667

Subcase results:
ln_subject/object_swap: 0.11
ln_preposition: 0.27
ln_relative_clause: 0.274
ln_passive: 0.111
ln_conjunction: 0.217
le_relative_clause: 0.815
le_around_prepositional_phrase: 0.874
le_around_relative_clause: 0.944
le_conjunction: 0.861
le_passive: 0.829
sn_NP/S: 0.037
sn_PP_on_subject: 0.093
sn_relative_clause_on_subject: 0.1
sn_past_participle: 0.081
sn_NP/Z: 0.345
se_conjunction: 0.917
se_adjective: 0.979
se_understood_object: 0.998
se_relative_clause_on_obj: 0.906
se_PP_on_obj: 0.854
cn_embedded_under_if: 0.498
cn_after_if_clause: 0.081
cn_embedded_under_verb: 0.105
cn_disjunction: 0.198
cn_adverb: 0.033
ce_embedded_under_since: 0.533
ce_after_since_clause: 0.921
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0798
subsequence: 0.153
constituent: 0.1086

Entailed accuracy: 0.1138

Heuristic non-entailed results:
lexical_overlap: 0.9292
subsequence: 0.9224
constituent: 0.8902

Non-Entailed accuracy: 0.9139333333333334

Overall accuracy: 0.5138666666666667

Subcase results:
ln_subject/object_swap: 0.937
ln_preposition: 0.979
ln_relative_clause: 0.972
ln_passive: 0.776
ln_conjunction: 0.982
le_relative_clause: 0.075
le_around_prepositional_phrase: 0.042
le_around_relative_clause: 0.076
le_conjunction: 0.041
le_passive: 0.165
sn_NP/S: 0.824
sn_PP_on_subject: 0.925
sn_relative_clause_on_subject: 0.912
sn_past_participle: 0.952
sn_NP/Z: 0.999
se_conjunction: 0.05
se_adjective: 0.461
se_understood_object: 0.141
se_relative_clause_on_obj: 0.078
se_PP_on_obj: 0.035
cn_embedded_under_if: 0.995
cn_after_if_clause: 0.952
cn_embedded_under_verb: 0.907
cn_disjunction: 0.979
cn_adverb: 0.618
ce_embedded_under_since: 0.003
ce_after_since_clause: 0.054
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6584
subsequence: 0.844
constituent: 0.6612

Entailed accuracy: 0.7212

Heuristic non-entailed results:
lexical_overlap: 0.4062
subsequence: 0.3008
constituent: 0.3724

Non-Entailed accuracy: 0.3598

Overall accuracy: 0.5405

Subcase results:
ln_subject/object_swap: 0.235
ln_preposition: 0.585
ln_relative_clause: 0.552
ln_passive: 0.163
ln_conjunction: 0.496
le_relative_clause: 0.657
le_around_prepositional_phrase: 0.564
le_around_relative_clause: 0.766
le_conjunction: 0.562
le_passive: 0.743
sn_NP/S: 0.242
sn_PP_on_subject: 0.227
sn_relative_clause_on_subject: 0.25
sn_past_participle: 0.28
sn_NP/Z: 0.505
se_conjunction: 0.861
se_adjective: 0.981
se_understood_object: 0.992
se_relative_clause_on_obj: 0.772
se_PP_on_obj: 0.614
cn_embedded_under_if: 0.84
cn_after_if_clause: 0.172
cn_embedded_under_verb: 0.35
cn_disjunction: 0.465
cn_adverb: 0.035
ce_embedded_under_since: 0.208
ce_after_since_clause: 0.824
ce_embedded_under_verb: 0.774
ce_con






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.0502
subsequence: 0.1388
constituent: 0.0982

Entailed accuracy: 0.09573333333333334

Heuristic non-entailed results:
lexical_overlap: 0.9476
subsequence: 0.9498
constituent: 0.8924

Non-Entailed accuracy: 0.9299333333333333

Overall accuracy: 0.5128333333333334

Subcase results:
ln_subject/object_swap: 0.939
ln_preposition: 0.987
ln_relative_clause: 0.98
ln_passive: 0.846
ln_conjunction: 0.986
le_relative_clause: 0.046
le_around_prepositional_phrase: 0.023
le_around_relative_clause: 0.046
le_conjunction: 0.036
le_passive: 0.1
sn_NP/S: 0.903
sn_PP_on_subject: 0.935
sn_relative_clause_on_subject: 0.957
sn_past_participle: 0.957
sn_NP/Z: 0.997
se_conjunction: 0.046
se_adjective: 0.361
se_understood_object: 0.203
se_relative_clause_on_obj: 0.064
se_PP_on_obj: 0.02
cn_embedded_under_if: 0.996
cn_after_if_clause: 0.92
cn_embedded_under_verb: 0.952
cn_disjunction: 0.987
cn_adverb: 0.607
ce_embedded_under_since: 0.018
ce_after_since_clause: 0.069






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:00,  1.95it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:58,  2.00it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.05it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.408
subsequence: 0.583
constituent: 0.3808

Entailed accuracy: 0.45726666666666665

Heuristic non-entailed results:
lexical_overlap: 0.5326
subsequence: 0.5528
constituent: 0.5546

Non-Entailed accuracy: 0.5466666666666666

Overall accuracy: 0.5019666666666667

Subcase results:
ln_subject/object_swap: 0.332
ln_preposition: 0.723
ln_relative_clause: 0.705
ln_passive: 0.227
ln_conjunction: 0.676
le_relative_clause: 0.394
le_around_prepositional_phrase: 0.253
le_around_relative_clause: 0.382
le_conjunction: 0.303
le_passive: 0.708
sn_NP/S: 0.546
sn_PP_on_subject: 0.563
sn_relative_clause_on_subject: 0.548
sn_past_participle: 0.376
sn_NP/Z: 0.731
se_conjunction: 0.42
se_adjective: 0.868
se_understood_object: 0.888
se_relative_clause_on_obj: 0.411
se_PP_on_obj: 0.328
cn_embedded_under_if: 0.828
cn_after_if_clause: 0.528
cn_embedded_under_verb: 0.5
cn_disjunction: 0.751
cn_adverb: 0.166
ce_embedded_under_since: 0.113
ce_after_since_clause: 0.341






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8108
subsequence: 0.9254
constituent: 0.719

Entailed accuracy: 0.8184

Heuristic non-entailed results:
lexical_overlap: 0.2016
subsequence: 0.1618
constituent: 0.3

Non-Entailed accuracy: 0.22113333333333332

Overall accuracy: 0.5197666666666667

Subcase results:
ln_subject/object_swap: 0.095
ln_preposition: 0.278
ln_relative_clause: 0.314
ln_passive: 0.048
ln_conjunction: 0.273
le_relative_clause: 0.809
le_around_prepositional_phrase: 0.772
le_around_relative_clause: 0.88
le_conjunction: 0.672
le_passive: 0.921
sn_NP/S: 0.059
sn_PP_on_subject: 0.112
sn_relative_clause_on_subject: 0.126
sn_past_participle: 0.187
sn_NP/Z: 0.325
se_conjunction: 0.886
se_adjective: 0.985
se_understood_object: 0.979
se_relative_clause_on_obj: 0.897
se_PP_on_obj: 0.88
cn_embedded_under_if: 0.802
cn_after_if_clause: 0.164
cn_embedded_under_verb: 0.226
cn_disjunction: 0.286
cn_adverb: 0.022
ce_embedded_under_since: 0.264
ce_after_since_clause: 0.84
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.1746
subsequence: 0.3084
constituent: 0.1912

Entailed accuracy: 0.22473333333333334

Heuristic non-entailed results:
lexical_overlap: 0.83
subsequence: 0.8134
constituent: 0.819

Non-Entailed accuracy: 0.8208

Overall accuracy: 0.5227666666666667

Subcase results:
ln_subject/object_swap: 0.783
ln_preposition: 0.901
ln_relative_clause: 0.919
ln_passive: 0.632
ln_conjunction: 0.915
le_relative_clause: 0.153
le_around_prepositional_phrase: 0.123
le_around_relative_clause: 0.224
le_conjunction: 0.103
le_passive: 0.27
sn_NP/S: 0.676
sn_PP_on_subject: 0.757
sn_relative_clause_on_subject: 0.829
sn_past_participle: 0.835
sn_NP/Z: 0.97
se_conjunction: 0.195
se_adjective: 0.632
se_understood_object: 0.353
se_relative_clause_on_obj: 0.212
se_PP_on_obj: 0.15
cn_embedded_under_if: 0.981
cn_after_if_clause: 0.876
cn_embedded_under_verb: 0.915
cn_disjunction: 0.924
cn_adverb: 0.399
ce_embedded_under_since: 0.022
ce_after_since_clause: 0.147
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3848
subsequence: 0.5542
constituent: 0.2952

Entailed accuracy: 0.4114

Heuristic non-entailed results:
lexical_overlap: 0.559
subsequence: 0.5786
constituent: 0.6574

Non-Entailed accuracy: 0.5983333333333334

Overall accuracy: 0.5048666666666667

Subcase results:
ln_subject/object_swap: 0.348
ln_preposition: 0.735
ln_relative_clause: 0.76
ln_passive: 0.277
ln_conjunction: 0.675
le_relative_clause: 0.324
le_around_prepositional_phrase: 0.313
le_around_relative_clause: 0.398
le_conjunction: 0.231
le_passive: 0.658
sn_NP/S: 0.533
sn_PP_on_subject: 0.513
sn_relative_clause_on_subject: 0.555
sn_past_participle: 0.451
sn_NP/Z: 0.841
se_conjunction: 0.435
se_adjective: 0.872
se_understood_object: 0.798
se_relative_clause_on_obj: 0.352
se_PP_on_obj: 0.314
cn_embedded_under_if: 0.95
cn_after_if_clause: 0.711
cn_embedded_under_verb: 0.609
cn_disjunction: 0.827
cn_adverb: 0.19
ce_embedded_under_since: 0.02
ce_after_since_clause: 0.24
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3268
subsequence: 0.6042
constituent: 0.2698

Entailed accuracy: 0.40026666666666666

Heuristic non-entailed results:
lexical_overlap: 0.6506
subsequence: 0.669
constituent: 0.7198

Non-Entailed accuracy: 0.6798

Overall accuracy: 0.5400333333333334

Subcase results:
ln_subject/object_swap: 0.432
ln_preposition: 0.837
ln_relative_clause: 0.876
ln_passive: 0.29
ln_conjunction: 0.818
le_relative_clause: 0.287
le_around_prepositional_phrase: 0.211
le_around_relative_clause: 0.307
le_conjunction: 0.209
le_passive: 0.62
sn_NP/S: 0.66
sn_PP_on_subject: 0.636
sn_relative_clause_on_subject: 0.762
sn_past_participle: 0.529
sn_NP/Z: 0.758
se_conjunction: 0.398
se_adjective: 0.918
se_understood_object: 0.857
se_relative_clause_on_obj: 0.455
se_PP_on_obj: 0.393
cn_embedded_under_if: 0.96
cn_after_if_clause: 0.729
cn_embedded_under_verb: 0.789
cn_disjunction: 0.891
cn_adverb: 0.23
ce_embedded_under_since: 0.03
ce_after_since_clause: 0.219
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  1.99it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:55,  1.99it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:53,  2.02it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:52,  2.05it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:51,  2.07it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:51,  2.

Heuristic entailed results:
lexical_overlap: 0.8966
subsequence: 0.9478
constituent: 0.8762

Entailed accuracy: 0.9068666666666667

Heuristic non-entailed results:
lexical_overlap: 0.1332
subsequence: 0.0774
constituent: 0.1442

Non-Entailed accuracy: 0.11826666666666667

Overall accuracy: 0.5125666666666666

Subcase results:
ln_subject/object_swap: 0.073
ln_preposition: 0.163
ln_relative_clause: 0.223
ln_passive: 0.036
ln_conjunction: 0.171
le_relative_clause: 0.846
le_around_prepositional_phrase: 0.877
le_around_relative_clause: 0.937
le_conjunction: 0.858
le_passive: 0.965
sn_NP/S: 0.03
sn_PP_on_subject: 0.046
sn_relative_clause_on_subject: 0.065
sn_past_participle: 0.121
sn_NP/Z: 0.125
se_conjunction: 0.942
se_adjective: 0.99
se_understood_object: 0.998
se_relative_clause_on_obj: 0.931
se_PP_on_obj: 0.878
cn_embedded_under_if: 0.406
cn_after_if_clause: 0.095
cn_embedded_under_verb: 0.079
cn_disjunction: 0.122
cn_adverb: 0.019
ce_embedded_under_since: 0.653
ce_after_since_clause: 0.






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4218
subsequence: 0.5672
constituent: 0.4314

Entailed accuracy: 0.47346666666666665

Heuristic non-entailed results:
lexical_overlap: 0.6366
subsequence: 0.57
constituent: 0.5958

Non-Entailed accuracy: 0.6008

Overall accuracy: 0.5371333333333334

Subcase results:
ln_subject/object_swap: 0.524
ln_preposition: 0.79
ln_relative_clause: 0.752
ln_passive: 0.408
ln_conjunction: 0.709
le_relative_clause: 0.404
le_around_prepositional_phrase: 0.308
le_around_relative_clause: 0.508
le_conjunction: 0.354
le_passive: 0.535
sn_NP/S: 0.319
sn_PP_on_subject: 0.484
sn_relative_clause_on_subject: 0.564
sn_past_participle: 0.659
sn_NP/Z: 0.824
se_conjunction: 0.528
se_adjective: 0.848
se_understood_object: 0.643
se_relative_clause_on_obj: 0.51
se_PP_on_obj: 0.307
cn_embedded_under_if: 0.858
cn_after_if_clause: 0.684
cn_embedded_under_verb: 0.615
cn_disjunction: 0.674
cn_adverb: 0.148
ce_embedded_under_since: 0.171
ce_after_since_clause: 0.342
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6154
subsequence: 0.732
constituent: 0.5438

Entailed accuracy: 0.6304

Heuristic non-entailed results:
lexical_overlap: 0.3802
subsequence: 0.2952
constituent: 0.3916

Non-Entailed accuracy: 0.3556666666666667

Overall accuracy: 0.4930333333333333

Subcase results:
ln_subject/object_swap: 0.201
ln_preposition: 0.536
ln_relative_clause: 0.501
ln_passive: 0.169
ln_conjunction: 0.494
le_relative_clause: 0.611
le_around_prepositional_phrase: 0.483
le_around_relative_clause: 0.709
le_conjunction: 0.498
le_passive: 0.776
sn_NP/S: 0.145
sn_PP_on_subject: 0.294
sn_relative_clause_on_subject: 0.273
sn_past_participle: 0.214
sn_NP/Z: 0.55
se_conjunction: 0.674
se_adjective: 0.937
se_understood_object: 0.908
se_relative_clause_on_obj: 0.662
se_PP_on_obj: 0.479
cn_embedded_under_if: 0.775
cn_after_if_clause: 0.431
cn_embedded_under_verb: 0.197
cn_disjunction: 0.524
cn_adverb: 0.031
ce_embedded_under_since: 0.164
ce_after_since_clause: 0.574
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:50,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.624
subsequence: 0.7956
constituent: 0.5774

Entailed accuracy: 0.6656666666666666

Heuristic non-entailed results:
lexical_overlap: 0.455
subsequence: 0.302
constituent: 0.406

Non-Entailed accuracy: 0.38766666666666666

Overall accuracy: 0.5266666666666666

Subcase results:
ln_subject/object_swap: 0.297
ln_preposition: 0.633
ln_relative_clause: 0.63
ln_passive: 0.117
ln_conjunction: 0.598
le_relative_clause: 0.594
le_around_prepositional_phrase: 0.539
le_around_relative_clause: 0.676
le_conjunction: 0.491
le_passive: 0.82
sn_NP/S: 0.232
sn_PP_on_subject: 0.313
sn_relative_clause_on_subject: 0.38
sn_past_participle: 0.278
sn_NP/Z: 0.307
se_conjunction: 0.675
se_adjective: 0.964
se_understood_object: 0.994
se_relative_clause_on_obj: 0.757
se_PP_on_obj: 0.588
cn_embedded_under_if: 0.633
cn_after_if_clause: 0.461
cn_embedded_under_verb: 0.271
cn_disjunction: 0.627
cn_adverb: 0.038
ce_embedded_under_since: 0.409
ce_after_since_clause: 0.506
c






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4722
subsequence: 0.6736
constituent: 0.4244

Entailed accuracy: 0.5234

Heuristic non-entailed results:
lexical_overlap: 0.5146
subsequence: 0.4578
constituent: 0.5798

Non-Entailed accuracy: 0.5174

Overall accuracy: 0.5204

Subcase results:
ln_subject/object_swap: 0.37
ln_preposition: 0.666
ln_relative_clause: 0.669
ln_passive: 0.206
ln_conjunction: 0.662
le_relative_clause: 0.46
le_around_prepositional_phrase: 0.333
le_around_relative_clause: 0.488
le_conjunction: 0.292
le_passive: 0.788
sn_NP/S: 0.365
sn_PP_on_subject: 0.4
sn_relative_clause_on_subject: 0.476
sn_past_participle: 0.432
sn_NP/Z: 0.616
se_conjunction: 0.504
se_adjective: 0.93
se_understood_object: 0.871
se_relative_clause_on_obj: 0.577
se_PP_on_obj: 0.486
cn_embedded_under_if: 0.905
cn_after_if_clause: 0.639
cn_embedded_under_verb: 0.531
cn_disjunction: 0.73
cn_adverb: 0.094
ce_embedded_under_since: 0.098
ce_after_since_clause: 0.38
ce_embedded_under_verb: 0.537
ce_conju






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  2.00it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.03it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9698
subsequence: 0.973
constituent: 0.9474

Entailed accuracy: 0.9634

Heuristic non-entailed results:
lexical_overlap: 0.0246
subsequence: 0.0378
constituent: 0.0782

Non-Entailed accuracy: 0.04686666666666667

Overall accuracy: 0.5051333333333333

Subcase results:
ln_subject/object_swap: 0.003
ln_preposition: 0.037
ln_relative_clause: 0.057
ln_passive: 0.008
ln_conjunction: 0.018
le_relative_clause: 0.915
le_around_prepositional_phrase: 0.967
le_around_relative_clause: 0.984
le_conjunction: 0.99
le_passive: 0.993
sn_NP/S: 0.008
sn_PP_on_subject: 0.008
sn_relative_clause_on_subject: 0.019
sn_past_participle: 0.074
sn_NP/Z: 0.08
se_conjunction: 0.99
se_adjective: 1.0
se_understood_object: 1.0
se_relative_clause_on_obj: 0.969
se_PP_on_obj: 0.906
cn_embedded_under_if: 0.201
cn_after_if_clause: 0.039
cn_embedded_under_verb: 0.019
cn_disjunction: 0.131
cn_adverb: 0.001
ce_embedded_under_since: 0.837
ce_after_since_clause: 0.968
ce_embedded_un






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6926
subsequence: 0.7452
constituent: 0.5874

Entailed accuracy: 0.6750666666666667

Heuristic non-entailed results:
lexical_overlap: 0.3376
subsequence: 0.3974
constituent: 0.4578

Non-Entailed accuracy: 0.3976

Overall accuracy: 0.5363333333333333

Subcase results:
ln_subject/object_swap: 0.256
ln_preposition: 0.479
ln_relative_clause: 0.386
ln_passive: 0.238
ln_conjunction: 0.329
le_relative_clause: 0.629
le_around_prepositional_phrase: 0.562
le_around_relative_clause: 0.759
le_conjunction: 0.76
le_passive: 0.753
sn_NP/S: 0.146
sn_PP_on_subject: 0.393
sn_relative_clause_on_subject: 0.351
sn_past_participle: 0.457
sn_NP/Z: 0.64
se_conjunction: 0.736
se_adjective: 0.9
se_understood_object: 0.899
se_relative_clause_on_obj: 0.72
se_PP_on_obj: 0.471
cn_embedded_under_if: 0.638
cn_after_if_clause: 0.481
cn_embedded_under_verb: 0.393
cn_disjunction: 0.689
cn_adverb: 0.088
ce_embedded_under_since: 0.449
ce_after_since_clause: 0.468
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:57,  2.00it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:56,  2.03it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.06it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7274
subsequence: 0.7702
constituent: 0.5852

Entailed accuracy: 0.6942666666666667

Heuristic non-entailed results:
lexical_overlap: 0.2402
subsequence: 0.273
constituent: 0.3572

Non-Entailed accuracy: 0.29013333333333335

Overall accuracy: 0.4922

Subcase results:
ln_subject/object_swap: 0.134
ln_preposition: 0.331
ln_relative_clause: 0.336
ln_passive: 0.182
ln_conjunction: 0.218
le_relative_clause: 0.692
le_around_prepositional_phrase: 0.632
le_around_relative_clause: 0.767
le_conjunction: 0.735
le_passive: 0.811
sn_NP/S: 0.134
sn_PP_on_subject: 0.28
sn_relative_clause_on_subject: 0.275
sn_past_participle: 0.244
sn_NP/Z: 0.432
se_conjunction: 0.776
se_adjective: 0.934
se_understood_object: 0.893
se_relative_clause_on_obj: 0.694
se_PP_on_obj: 0.554
cn_embedded_under_if: 0.511
cn_after_if_clause: 0.375
cn_embedded_under_verb: 0.259
cn_disjunction: 0.577
cn_adverb: 0.064
ce_embedded_under_since: 0.403
ce_after_since_clause: 0.51
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8316
subsequence: 0.904
constituent: 0.7522

Entailed accuracy: 0.8292666666666667

Heuristic non-entailed results:
lexical_overlap: 0.1942
subsequence: 0.1848
constituent: 0.256

Non-Entailed accuracy: 0.21166666666666667

Overall accuracy: 0.5204666666666666

Subcase results:
ln_subject/object_swap: 0.054
ln_preposition: 0.307
ln_relative_clause: 0.273
ln_passive: 0.097
ln_conjunction: 0.24
le_relative_clause: 0.773
le_around_prepositional_phrase: 0.763
le_around_relative_clause: 0.87
le_conjunction: 0.866
le_passive: 0.886
sn_NP/S: 0.098
sn_PP_on_subject: 0.21
sn_relative_clause_on_subject: 0.262
sn_past_participle: 0.167
sn_NP/Z: 0.187
se_conjunction: 0.865
se_adjective: 0.993
se_understood_object: 0.999
se_relative_clause_on_obj: 0.894
se_PP_on_obj: 0.769
cn_embedded_under_if: 0.282
cn_after_if_clause: 0.295
cn_embedded_under_verb: 0.171
cn_disjunction: 0.528
cn_adverb: 0.004
ce_embedded_under_since: 0.735
ce_after_since_clause: 0.613






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6922
subsequence: 0.8284
constituent: 0.5928

Entailed accuracy: 0.7044666666666667

Heuristic non-entailed results:
lexical_overlap: 0.2564
subsequence: 0.2996
constituent: 0.404

Non-Entailed accuracy: 0.32

Overall accuracy: 0.5122333333333333

Subcase results:
ln_subject/object_swap: 0.119
ln_preposition: 0.354
ln_relative_clause: 0.4
ln_passive: 0.11
ln_conjunction: 0.299
le_relative_clause: 0.624
le_around_prepositional_phrase: 0.585
le_around_relative_clause: 0.694
le_conjunction: 0.684
le_passive: 0.874
sn_NP/S: 0.255
sn_PP_on_subject: 0.246
sn_relative_clause_on_subject: 0.277
sn_past_participle: 0.347
sn_NP/Z: 0.373
se_conjunction: 0.803
se_adjective: 0.988
se_understood_object: 0.955
se_relative_clause_on_obj: 0.709
se_PP_on_obj: 0.687
cn_embedded_under_if: 0.702
cn_after_if_clause: 0.413
cn_embedded_under_verb: 0.258
cn_disjunction: 0.615
cn_adverb: 0.032
ce_embedded_under_since: 0.311
ce_after_since_clause: 0.56
ce_embedded_un






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8514
subsequence: 0.871
constituent: 0.753

Entailed accuracy: 0.8251333333333334

Heuristic non-entailed results:
lexical_overlap: 0.176
subsequence: 0.1648
constituent: 0.2516

Non-Entailed accuracy: 0.19746666666666668

Overall accuracy: 0.5113

Subcase results:
ln_subject/object_swap: 0.105
ln_preposition: 0.261
ln_relative_clause: 0.266
ln_passive: 0.038
ln_conjunction: 0.21
le_relative_clause: 0.8
le_around_prepositional_phrase: 0.778
le_around_relative_clause: 0.884
le_conjunction: 0.845
le_passive: 0.95
sn_NP/S: 0.126
sn_PP_on_subject: 0.14
sn_relative_clause_on_subject: 0.164
sn_past_participle: 0.194
sn_NP/Z: 0.2
se_conjunction: 0.869
se_adjective: 0.983
se_understood_object: 0.986
se_relative_clause_on_obj: 0.841
se_PP_on_obj: 0.676
cn_embedded_under_if: 0.377
cn_after_if_clause: 0.298
cn_embedded_under_verb: 0.132
cn_disjunction: 0.435
cn_adverb: 0.016
ce_embedded_under_since: 0.632
ce_after_since_clause: 0.678
ce_embedded_unde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9308
subsequence: 0.9674
constituent: 0.933

Entailed accuracy: 0.9437333333333333

Heuristic non-entailed results:
lexical_overlap: 0.073
subsequence: 0.069
constituent: 0.0902

Non-Entailed accuracy: 0.0774

Overall accuracy: 0.5105666666666666

Subcase results:
ln_subject/object_swap: 0.041
ln_preposition: 0.103
ln_relative_clause: 0.146
ln_passive: 0.03
ln_conjunction: 0.045
le_relative_clause: 0.866
le_around_prepositional_phrase: 0.932
le_around_relative_clause: 0.939
le_conjunction: 0.955
le_passive: 0.962
sn_NP/S: 0.01
sn_PP_on_subject: 0.049
sn_relative_clause_on_subject: 0.062
sn_past_participle: 0.11
sn_NP/Z: 0.114
se_conjunction: 0.974
se_adjective: 0.982
se_understood_object: 1.0
se_relative_clause_on_obj: 0.946
se_PP_on_obj: 0.935
cn_embedded_under_if: 0.181
cn_after_if_clause: 0.09
cn_embedded_under_verb: 0.063
cn_disjunction: 0.106
cn_adverb: 0.011
ce_embedded_under_since: 0.878
ce_after_since_clause: 0.926
ce_embedded_unde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<01:00,  1.95it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:01<01:00,  1.92it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:58,  1.98it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:56,  2.02it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:55,  2.05it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6224
subsequence: 0.7702
constituent: 0.5656

Entailed accuracy: 0.6527333333333334

Heuristic non-entailed results:
lexical_overlap: 0.378
subsequence: 0.4422
constituent: 0.49

Non-Entailed accuracy: 0.4367333333333333

Overall accuracy: 0.5447333333333333

Subcase results:
ln_subject/object_swap: 0.272
ln_preposition: 0.526
ln_relative_clause: 0.526
ln_passive: 0.249
ln_conjunction: 0.317
le_relative_clause: 0.53
le_around_prepositional_phrase: 0.58
le_around_relative_clause: 0.646
le_conjunction: 0.73
le_passive: 0.626
sn_NP/S: 0.096
sn_PP_on_subject: 0.397
sn_relative_clause_on_subject: 0.4
sn_past_participle: 0.572
sn_NP/Z: 0.746
se_conjunction: 0.768
se_adjective: 0.876
se_understood_object: 0.92
se_relative_clause_on_obj: 0.712
se_PP_on_obj: 0.575
cn_embedded_under_if: 0.7
cn_after_if_clause: 0.635
cn_embedded_under_verb: 0.46
cn_disjunction: 0.555
cn_adverb: 0.1
ce_embedded_under_since: 0.436
ce_after_since_clause: 0.415
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7786
subsequence: 0.8802
constituent: 0.7096

Entailed accuracy: 0.7894666666666666

Heuristic non-entailed results:
lexical_overlap: 0.2022
subsequence: 0.1878
constituent: 0.2432

Non-Entailed accuracy: 0.21106666666666668

Overall accuracy: 0.5002666666666666

Subcase results:
ln_subject/object_swap: 0.057
ln_preposition: 0.318
ln_relative_clause: 0.339
ln_passive: 0.137
ln_conjunction: 0.16
le_relative_clause: 0.694
le_around_prepositional_phrase: 0.709
le_around_relative_clause: 0.813
le_conjunction: 0.858
le_passive: 0.819
sn_NP/S: 0.037
sn_PP_on_subject: 0.203
sn_relative_clause_on_subject: 0.2
sn_past_participle: 0.14
sn_NP/Z: 0.359
se_conjunction: 0.888
se_adjective: 0.986
se_understood_object: 0.989
se_relative_clause_on_obj: 0.805
se_PP_on_obj: 0.733
cn_embedded_under_if: 0.432
cn_after_if_clause: 0.324
cn_embedded_under_verb: 0.122
cn_disjunction: 0.319
cn_adverb: 0.019
ce_embedded_under_since: 0.537
ce_after_since_clause: 0.63






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8676
subsequence: 0.9434
constituent: 0.8164

Entailed accuracy: 0.8758

Heuristic non-entailed results:
lexical_overlap: 0.1568
subsequence: 0.1364
constituent: 0.182

Non-Entailed accuracy: 0.1584

Overall accuracy: 0.5171

Subcase results:
ln_subject/object_swap: 0.028
ln_preposition: 0.266
ln_relative_clause: 0.307
ln_passive: 0.03
ln_conjunction: 0.153
le_relative_clause: 0.772
le_around_prepositional_phrase: 0.826
le_around_relative_clause: 0.889
le_conjunction: 0.898
le_passive: 0.953
sn_NP/S: 0.046
sn_PP_on_subject: 0.132
sn_relative_clause_on_subject: 0.152
sn_past_participle: 0.131
sn_NP/Z: 0.221
se_conjunction: 0.947
se_adjective: 0.994
se_understood_object: 0.999
se_relative_clause_on_obj: 0.901
se_PP_on_obj: 0.876
cn_embedded_under_if: 0.303
cn_after_if_clause: 0.192
cn_embedded_under_verb: 0.104
cn_disjunction: 0.307
cn_adverb: 0.004
ce_embedded_under_since: 0.742
ce_after_since_clause: 0.766
ce_embedded_under_verb: 0.894
ce_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7508
subsequence: 0.8838
constituent: 0.64

Entailed accuracy: 0.7582

Heuristic non-entailed results:
lexical_overlap: 0.2154
subsequence: 0.2538
constituent: 0.344

Non-Entailed accuracy: 0.2710666666666667

Overall accuracy: 0.5146333333333334

Subcase results:
ln_subject/object_swap: 0.071
ln_preposition: 0.331
ln_relative_clause: 0.406
ln_passive: 0.069
ln_conjunction: 0.2
le_relative_clause: 0.644
le_around_prepositional_phrase: 0.667
le_around_relative_clause: 0.751
le_conjunction: 0.782
le_passive: 0.91
sn_NP/S: 0.147
sn_PP_on_subject: 0.192
sn_relative_clause_on_subject: 0.219
sn_past_participle: 0.297
sn_NP/Z: 0.414
se_conjunction: 0.878
se_adjective: 0.997
se_understood_object: 0.982
se_relative_clause_on_obj: 0.777
se_PP_on_obj: 0.785
cn_embedded_under_if: 0.617
cn_after_if_clause: 0.409
cn_embedded_under_verb: 0.188
cn_disjunction: 0.486
cn_adverb: 0.02
ce_embedded_under_since: 0.397
ce_after_since_clause: 0.598
ce_embedded_un






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.824
subsequence: 0.9078
constituent: 0.765

Entailed accuracy: 0.8322666666666667

Heuristic non-entailed results:
lexical_overlap: 0.177
subsequence: 0.1352
constituent: 0.26

Non-Entailed accuracy: 0.19073333333333334

Overall accuracy: 0.5115

Subcase results:
ln_subject/object_swap: 0.06
ln_preposition: 0.253
ln_relative_clause: 0.321
ln_passive: 0.084
ln_conjunction: 0.167
le_relative_clause: 0.747
le_around_prepositional_phrase: 0.776
le_around_relative_clause: 0.858
le_conjunction: 0.873
le_passive: 0.866
sn_NP/S: 0.072
sn_PP_on_subject: 0.117
sn_relative_clause_on_subject: 0.123
sn_past_participle: 0.188
sn_NP/Z: 0.176
se_conjunction: 0.911
se_adjective: 0.98
se_understood_object: 0.998
se_relative_clause_on_obj: 0.848
se_PP_on_obj: 0.802
cn_embedded_under_if: 0.401
cn_after_if_clause: 0.334
cn_embedded_under_verb: 0.135
cn_disjunction: 0.409
cn_adverb: 0.021
ce_embedded_under_since: 0.648
ce_after_since_clause: 0.675
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:59,  1.97it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.01it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8834
subsequence: 0.948
constituent: 0.88

Entailed accuracy: 0.9038

Heuristic non-entailed results:
lexical_overlap: 0.0884
subsequence: 0.1096
constituent: 0.1652

Non-Entailed accuracy: 0.12106666666666667

Overall accuracy: 0.5124333333333333

Subcase results:
ln_subject/object_swap: 0.008
ln_preposition: 0.165
ln_relative_clause: 0.134
ln_passive: 0.091
ln_conjunction: 0.044
le_relative_clause: 0.846
le_around_prepositional_phrase: 0.804
le_around_relative_clause: 0.907
le_conjunction: 0.976
le_passive: 0.884
sn_NP/S: 0.048
sn_PP_on_subject: 0.112
sn_relative_clause_on_subject: 0.075
sn_past_participle: 0.189
sn_NP/Z: 0.124
se_conjunction: 0.979
se_adjective: 0.993
se_understood_object: 1.0
se_relative_clause_on_obj: 0.935
se_PP_on_obj: 0.833
cn_embedded_under_if: 0.186
cn_after_if_clause: 0.188
cn_embedded_under_verb: 0.081
cn_disjunction: 0.369
cn_adverb: 0.002
ce_embedded_under_since: 0.886
ce_after_since_clause: 0.798
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8182
subsequence: 0.885
constituent: 0.7786

Entailed accuracy: 0.8272666666666667

Heuristic non-entailed results:
lexical_overlap: 0.2058
subsequence: 0.2234
constituent: 0.2994

Non-Entailed accuracy: 0.24286666666666668

Overall accuracy: 0.5350666666666667

Subcase results:
ln_subject/object_swap: 0.174
ln_preposition: 0.253
ln_relative_clause: 0.339
ln_passive: 0.09
ln_conjunction: 0.173
le_relative_clause: 0.74
le_around_prepositional_phrase: 0.79
le_around_relative_clause: 0.825
le_conjunction: 0.848
le_passive: 0.888
sn_NP/S: 0.026
sn_PP_on_subject: 0.177
sn_relative_clause_on_subject: 0.246
sn_past_participle: 0.387
sn_NP/Z: 0.281
se_conjunction: 0.876
se_adjective: 0.903
se_understood_object: 0.961
se_relative_clause_on_obj: 0.862
se_PP_on_obj: 0.823
cn_embedded_under_if: 0.447
cn_after_if_clause: 0.422
cn_embedded_under_verb: 0.317
cn_disjunction: 0.236
cn_adverb: 0.075
ce_embedded_under_since: 0.694
ce_after_since_clause: 0.69






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:57,  1.99it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:56,  2.02it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:55,  2.05it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:53,  2.08it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.09it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3436
subsequence: 0.4502
constituent: 0.288

Entailed accuracy: 0.3606

Heuristic non-entailed results:
lexical_overlap: 0.6698
subsequence: 0.7172
constituent: 0.7496

Non-Entailed accuracy: 0.7122

Overall accuracy: 0.5364

Subcase results:
ln_subject/object_swap: 0.607
ln_preposition: 0.776
ln_relative_clause: 0.792
ln_passive: 0.539
ln_conjunction: 0.635
le_relative_clause: 0.257
le_around_prepositional_phrase: 0.304
le_around_relative_clause: 0.335
le_conjunction: 0.407
le_passive: 0.415
sn_NP/S: 0.284
sn_PP_on_subject: 0.717
sn_relative_clause_on_subject: 0.766
sn_past_participle: 0.873
sn_NP/Z: 0.946
se_conjunction: 0.405
se_adjective: 0.606
se_understood_object: 0.511
se_relative_clause_on_obj: 0.392
se_PP_on_obj: 0.337
cn_embedded_under_if: 0.871
cn_after_if_clause: 0.909
cn_embedded_under_verb: 0.843
cn_disjunction: 0.756
cn_adverb: 0.369
ce_embedded_under_since: 0.195
ce_after_since_clause: 0.154
ce_embedded_under_verb: 0.261
ce






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5032
subsequence: 0.609
constituent: 0.3544

Entailed accuracy: 0.48886666666666667

Heuristic non-entailed results:
lexical_overlap: 0.4956
subsequence: 0.544
constituent: 0.5816

Non-Entailed accuracy: 0.5404

Overall accuracy: 0.5146333333333334

Subcase results:
ln_subject/object_swap: 0.35
ln_preposition: 0.635
ln_relative_clause: 0.675
ln_passive: 0.3
ln_conjunction: 0.518
le_relative_clause: 0.366
le_around_prepositional_phrase: 0.444
le_around_relative_clause: 0.506
le_conjunction: 0.528
le_passive: 0.672
sn_NP/S: 0.223
sn_PP_on_subject: 0.54
sn_relative_clause_on_subject: 0.612
sn_past_participle: 0.571
sn_NP/Z: 0.774
se_conjunction: 0.524
se_adjective: 0.771
se_understood_object: 0.743
se_relative_clause_on_obj: 0.503
se_PP_on_obj: 0.504
cn_embedded_under_if: 0.737
cn_after_if_clause: 0.761
cn_embedded_under_verb: 0.607
cn_disjunction: 0.587
cn_adverb: 0.216
ce_embedded_under_since: 0.197
ce_after_since_clause: 0.227
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6232
subsequence: 0.7706
constituent: 0.553

Entailed accuracy: 0.6489333333333334

Heuristic non-entailed results:
lexical_overlap: 0.41
subsequence: 0.3998
constituent: 0.4434

Non-Entailed accuracy: 0.41773333333333335

Overall accuracy: 0.5333333333333333

Subcase results:
ln_subject/object_swap: 0.275
ln_preposition: 0.546
ln_relative_clause: 0.609
ln_passive: 0.151
ln_conjunction: 0.469
le_relative_clause: 0.481
le_around_prepositional_phrase: 0.563
le_around_relative_clause: 0.613
le_conjunction: 0.633
le_passive: 0.826
sn_NP/S: 0.192
sn_PP_on_subject: 0.38
sn_relative_clause_on_subject: 0.495
sn_past_participle: 0.462
sn_NP/Z: 0.47
se_conjunction: 0.685
se_adjective: 0.91
se_understood_object: 0.911
se_relative_clause_on_obj: 0.693
se_PP_on_obj: 0.654
cn_embedded_under_if: 0.553
cn_after_if_clause: 0.642
cn_embedded_under_verb: 0.416
cn_disjunction: 0.529
cn_adverb: 0.077
ce_embedded_under_since: 0.503
ce_after_since_clause: 0.371







Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  2.01it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.501
subsequence: 0.692
constituent: 0.378

Entailed accuracy: 0.5236666666666666

Heuristic non-entailed results:
lexical_overlap: 0.4694
subsequence: 0.5204
constituent: 0.6498

Non-Entailed accuracy: 0.5465333333333333

Overall accuracy: 0.5351

Subcase results:
ln_subject/object_swap: 0.343
ln_preposition: 0.582
ln_relative_clause: 0.675
ln_passive: 0.21
ln_conjunction: 0.537
le_relative_clause: 0.333
le_around_prepositional_phrase: 0.463
le_around_relative_clause: 0.495
le_conjunction: 0.477
le_passive: 0.737
sn_NP/S: 0.283
sn_PP_on_subject: 0.473
sn_relative_clause_on_subject: 0.574
sn_past_participle: 0.611
sn_NP/Z: 0.661
se_conjunction: 0.548
se_adjective: 0.836
se_understood_object: 0.851
se_relative_clause_on_obj: 0.57
se_PP_on_obj: 0.655
cn_embedded_under_if: 0.839
cn_after_if_clause: 0.82
cn_embedded_under_verb: 0.68
cn_disjunction: 0.694
cn_adverb: 0.216
ce_embedded_under_since: 0.203
ce_after_since_clause: 0.221
ce_embedded_un






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6836
subsequence: 0.8078
constituent: 0.6064

Entailed accuracy: 0.6992666666666667

Heuristic non-entailed results:
lexical_overlap: 0.3568
subsequence: 0.2996
constituent: 0.4054

Non-Entailed accuracy: 0.3539333333333333

Overall accuracy: 0.5266

Subcase results:
ln_subject/object_swap: 0.281
ln_preposition: 0.461
ln_relative_clause: 0.495
ln_passive: 0.186
ln_conjunction: 0.361
le_relative_clause: 0.59
le_around_prepositional_phrase: 0.623
le_around_relative_clause: 0.716
le_conjunction: 0.721
le_passive: 0.768
sn_NP/S: 0.09
sn_PP_on_subject: 0.289
sn_relative_clause_on_subject: 0.326
sn_past_participle: 0.468
sn_NP/Z: 0.325
se_conjunction: 0.761
se_adjective: 0.859
se_understood_object: 0.911
se_relative_clause_on_obj: 0.756
se_PP_on_obj: 0.752
cn_embedded_under_if: 0.538
cn_after_if_clause: 0.571
cn_embedded_under_verb: 0.355
cn_disjunction: 0.449
cn_adverb: 0.114
ce_embedded_under_since: 0.516
ce_after_since_clause: 0.414
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.902
subsequence: 0.9534
constituent: 0.842

Entailed accuracy: 0.8991333333333333

Heuristic non-entailed results:
lexical_overlap: 0.0874
subsequence: 0.1334
constituent: 0.1758

Non-Entailed accuracy: 0.1322

Overall accuracy: 0.5156666666666667

Subcase results:
ln_subject/object_swap: 0.035
ln_preposition: 0.136
ln_relative_clause: 0.127
ln_passive: 0.062
ln_conjunction: 0.077
le_relative_clause: 0.848
le_around_prepositional_phrase: 0.84
le_around_relative_clause: 0.931
le_conjunction: 0.961
le_passive: 0.93
sn_NP/S: 0.049
sn_PP_on_subject: 0.13
sn_relative_clause_on_subject: 0.118
sn_past_participle: 0.227
sn_NP/Z: 0.143
se_conjunction: 0.94
se_adjective: 0.978
se_understood_object: 0.986
se_relative_clause_on_obj: 0.945
se_PP_on_obj: 0.918
cn_embedded_under_if: 0.17
cn_after_if_clause: 0.273
cn_embedded_under_verb: 0.13
cn_disjunction: 0.303
cn_adverb: 0.003
ce_embedded_under_since: 0.871
ce_after_since_clause: 0.706
ce_embedded_und






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8462
subsequence: 0.9176
constituent: 0.824

Entailed accuracy: 0.8626

Heuristic non-entailed results:
lexical_overlap: 0.1794
subsequence: 0.1802
constituent: 0.2102

Non-Entailed accuracy: 0.18993333333333334

Overall accuracy: 0.5262666666666667

Subcase results:
ln_subject/object_swap: 0.148
ln_preposition: 0.269
ln_relative_clause: 0.312
ln_passive: 0.057
ln_conjunction: 0.111
le_relative_clause: 0.759
le_around_prepositional_phrase: 0.821
le_around_relative_clause: 0.863
le_conjunction: 0.935
le_passive: 0.853
sn_NP/S: 0.028
sn_PP_on_subject: 0.223
sn_relative_clause_on_subject: 0.225
sn_past_participle: 0.225
sn_NP/Z: 0.2
se_conjunction: 0.898
se_adjective: 0.946
se_understood_object: 0.96
se_relative_clause_on_obj: 0.896
se_PP_on_obj: 0.888
cn_embedded_under_if: 0.211
cn_after_if_clause: 0.321
cn_embedded_under_verb: 0.224
cn_disjunction: 0.261
cn_adverb: 0.034
ce_embedded_under_since: 0.858
ce_after_since_clause: 0.662
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8698
subsequence: 0.929
constituent: 0.87

Entailed accuracy: 0.8896

Heuristic non-entailed results:
lexical_overlap: 0.1528
subsequence: 0.1524
constituent: 0.1446

Non-Entailed accuracy: 0.14993333333333334

Overall accuracy: 0.5197666666666667

Subcase results:
ln_subject/object_swap: 0.121
ln_preposition: 0.188
ln_relative_clause: 0.192
ln_passive: 0.097
ln_conjunction: 0.166
le_relative_clause: 0.807
le_around_prepositional_phrase: 0.874
le_around_relative_clause: 0.907
le_conjunction: 0.853
le_passive: 0.908
sn_NP/S: 0.07
sn_PP_on_subject: 0.113
sn_relative_clause_on_subject: 0.116
sn_past_participle: 0.22
sn_NP/Z: 0.243
se_conjunction: 0.922
se_adjective: 0.965
se_understood_object: 0.988
se_relative_clause_on_obj: 0.885
se_PP_on_obj: 0.885
cn_embedded_under_if: 0.283
cn_after_if_clause: 0.097
cn_embedded_under_verb: 0.149
cn_disjunction: 0.16
cn_adverb: 0.034
ce_embedded_under_since: 0.689
ce_after_since_clause: 0.908
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.4428
subsequence: 0.5752
constituent: 0.516

Entailed accuracy: 0.5113333333333333

Heuristic non-entailed results:
lexical_overlap: 0.6056
subsequence: 0.581
constituent: 0.5252

Non-Entailed accuracy: 0.5706

Overall accuracy: 0.5409666666666667

Subcase results:
ln_subject/object_swap: 0.599
ln_preposition: 0.706
ln_relative_clause: 0.618
ln_passive: 0.489
ln_conjunction: 0.616
le_relative_clause: 0.429
le_around_prepositional_phrase: 0.382
le_around_relative_clause: 0.488
le_conjunction: 0.442
le_passive: 0.473
sn_NP/S: 0.287
sn_PP_on_subject: 0.551
sn_relative_clause_on_subject: 0.529
sn_past_participle: 0.678
sn_NP/Z: 0.86
se_conjunction: 0.525
se_adjective: 0.774
se_understood_object: 0.701
se_relative_clause_on_obj: 0.463
se_PP_on_obj: 0.413
cn_embedded_under_if: 0.733
cn_after_if_clause: 0.55
cn_embedded_under_verb: 0.533
cn_disjunction: 0.608
cn_adverb: 0.202
ce_embedded_under_since: 0.296
ce_after_since_clause: 0.494
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.12it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.12it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:50,  2.12it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6274
subsequence: 0.7192
constituent: 0.5854

Entailed accuracy: 0.644

Heuristic non-entailed results:
lexical_overlap: 0.3754
subsequence: 0.3892
constituent: 0.3634

Non-Entailed accuracy: 0.376

Overall accuracy: 0.51

Subcase results:
ln_subject/object_swap: 0.279
ln_preposition: 0.438
ln_relative_clause: 0.437
ln_passive: 0.259
ln_conjunction: 0.464
le_relative_clause: 0.597
le_around_prepositional_phrase: 0.58
le_around_relative_clause: 0.644
le_conjunction: 0.567
le_passive: 0.749
sn_NP/S: 0.212
sn_PP_on_subject: 0.371
sn_relative_clause_on_subject: 0.367
sn_past_participle: 0.328
sn_NP/Z: 0.668
se_conjunction: 0.637
se_adjective: 0.892
se_understood_object: 0.867
se_relative_clause_on_obj: 0.592
se_PP_on_obj: 0.608
cn_embedded_under_if: 0.535
cn_after_if_clause: 0.392
cn_embedded_under_verb: 0.342
cn_disjunction: 0.423
cn_adverb: 0.125
ce_embedded_under_since: 0.357
ce_after_since_clause: 0.597
ce_embedded_under_verb: 0.673
ce_con






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.12it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.6704
subsequence: 0.8284
constituent: 0.713

Entailed accuracy: 0.7372666666666666

Heuristic non-entailed results:
lexical_overlap: 0.355
subsequence: 0.3042
constituent: 0.2996

Non-Entailed accuracy: 0.3196

Overall accuracy: 0.5284333333333333

Subcase results:
ln_subject/object_swap: 0.23
ln_preposition: 0.454
ln_relative_clause: 0.457
ln_passive: 0.159
ln_conjunction: 0.475
le_relative_clause: 0.619
le_around_prepositional_phrase: 0.587
le_around_relative_clause: 0.693
le_conjunction: 0.633
le_passive: 0.82
sn_NP/S: 0.207
sn_PP_on_subject: 0.299
sn_relative_clause_on_subject: 0.316
sn_past_participle: 0.256
sn_NP/Z: 0.443
se_conjunction: 0.753
se_adjective: 0.962
se_understood_object: 0.977
se_relative_clause_on_obj: 0.753
se_PP_on_obj: 0.697
cn_embedded_under_if: 0.455
cn_after_if_clause: 0.258
cn_embedded_under_verb: 0.283
cn_disjunction: 0.444
cn_adverb: 0.058
ce_embedded_under_since: 0.547
ce_after_since_clause: 0.74
ce_embedded_






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:59,  1.98it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.02it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.642
subsequence: 0.7962
constituent: 0.636

Entailed accuracy: 0.6914

Heuristic non-entailed results:
lexical_overlap: 0.359
subsequence: 0.3558
constituent: 0.369

Non-Entailed accuracy: 0.3612666666666667

Overall accuracy: 0.5263333333333333

Subcase results:
ln_subject/object_swap: 0.271
ln_preposition: 0.432
ln_relative_clause: 0.438
ln_passive: 0.188
ln_conjunction: 0.466
le_relative_clause: 0.605
le_around_prepositional_phrase: 0.586
le_around_relative_clause: 0.679
le_conjunction: 0.523
le_passive: 0.817
sn_NP/S: 0.267
sn_PP_on_subject: 0.302
sn_relative_clause_on_subject: 0.306
sn_past_participle: 0.334
sn_NP/Z: 0.57
se_conjunction: 0.7
se_adjective: 0.965
se_understood_object: 0.913
se_relative_clause_on_obj: 0.696
se_PP_on_obj: 0.707
cn_embedded_under_if: 0.654
cn_after_if_clause: 0.32
cn_embedded_under_verb: 0.33
cn_disjunction: 0.462
cn_adverb: 0.079
ce_embedded_under_since: 0.342
ce_after_since_clause: 0.686
ce_embedded_unde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:56,  1.98it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:55,  2.02it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:54,  2.05it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:53,  2.07it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:52,  2.08it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.09it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:51,  2.10it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8308
subsequence: 0.9046
constituent: 0.8128

Entailed accuracy: 0.8494

Heuristic non-entailed results:
lexical_overlap: 0.2028
subsequence: 0.1404
constituent: 0.1916

Non-Entailed accuracy: 0.17826666666666666

Overall accuracy: 0.5138333333333334

Subcase results:
ln_subject/object_swap: 0.172
ln_preposition: 0.294
ln_relative_clause: 0.241
ln_passive: 0.07
ln_conjunction: 0.237
le_relative_clause: 0.796
le_around_prepositional_phrase: 0.787
le_around_relative_clause: 0.879
le_conjunction: 0.781
le_passive: 0.911
sn_NP/S: 0.082
sn_PP_on_subject: 0.161
sn_relative_clause_on_subject: 0.121
sn_past_participle: 0.137
sn_NP/Z: 0.201
se_conjunction: 0.865
se_adjective: 0.975
se_understood_object: 0.983
se_relative_clause_on_obj: 0.869
se_PP_on_obj: 0.831
cn_embedded_under_if: 0.365
cn_after_if_clause: 0.148
cn_embedded_under_verb: 0.131
cn_disjunction: 0.283
cn_adverb: 0.031
ce_embedded_under_since: 0.628
ce_after_since_clause: 0.841
ce_embe






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.914
subsequence: 0.9552
constituent: 0.8888

Entailed accuracy: 0.9193333333333333

Heuristic non-entailed results:
lexical_overlap: 0.0742
subsequence: 0.1046
constituent: 0.1304

Non-Entailed accuracy: 0.10306666666666667

Overall accuracy: 0.5112

Subcase results:
ln_subject/object_swap: 0.025
ln_preposition: 0.134
ln_relative_clause: 0.094
ln_passive: 0.041
ln_conjunction: 0.077
le_relative_clause: 0.876
le_around_prepositional_phrase: 0.876
le_around_relative_clause: 0.94
le_conjunction: 0.938
le_passive: 0.94
sn_NP/S: 0.11
sn_PP_on_subject: 0.061
sn_relative_clause_on_subject: 0.075
sn_past_participle: 0.13
sn_NP/Z: 0.147
se_conjunction: 0.958
se_adjective: 0.992
se_understood_object: 0.989
se_relative_clause_on_obj: 0.943
se_PP_on_obj: 0.894
cn_embedded_under_if: 0.178
cn_after_if_clause: 0.082
cn_embedded_under_verb: 0.069
cn_disjunction: 0.317
cn_adverb: 0.006
ce_embedded_under_since: 0.788
ce_after_since_clause: 0.895
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8812
subsequence: 0.9472
constituent: 0.8998

Entailed accuracy: 0.9094

Heuristic non-entailed results:
lexical_overlap: 0.1154
subsequence: 0.1246
constituent: 0.1134

Non-Entailed accuracy: 0.1178

Overall accuracy: 0.5136

Subcase results:
ln_subject/object_swap: 0.046
ln_preposition: 0.165
ln_relative_clause: 0.206
ln_passive: 0.053
ln_conjunction: 0.107
le_relative_clause: 0.809
le_around_prepositional_phrase: 0.863
le_around_relative_clause: 0.877
le_conjunction: 0.927
le_passive: 0.93
sn_NP/S: 0.064
sn_PP_on_subject: 0.086
sn_relative_clause_on_subject: 0.104
sn_past_participle: 0.176
sn_NP/Z: 0.193
se_conjunction: 0.948
se_adjective: 0.99
se_understood_object: 0.992
se_relative_clause_on_obj: 0.897
se_PP_on_obj: 0.909
cn_embedded_under_if: 0.197
cn_after_if_clause: 0.073
cn_embedded_under_verb: 0.099
cn_disjunction: 0.188
cn_adverb: 0.01
ce_embedded_under_since: 0.816
ce_after_since_clause: 0.908
ce_embedded_under_verb: 0.942
ce_c






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.9368
subsequence: 0.9656
constituent: 0.9472

Entailed accuracy: 0.9498666666666666

Heuristic non-entailed results:
lexical_overlap: 0.069
subsequence: 0.0802
constituent: 0.0434

Non-Entailed accuracy: 0.0642

Overall accuracy: 0.5070333333333333

Subcase results:
ln_subject/object_swap: 0.056
ln_preposition: 0.116
ln_relative_clause: 0.073
ln_passive: 0.017
ln_conjunction: 0.083
le_relative_clause: 0.925
le_around_prepositional_phrase: 0.904
le_around_relative_clause: 0.954
le_conjunction: 0.927
le_passive: 0.974
sn_NP/S: 0.023
sn_PP_on_subject: 0.089
sn_relative_clause_on_subject: 0.055
sn_past_participle: 0.096
sn_NP/Z: 0.138
se_conjunction: 0.933
se_adjective: 0.992
se_understood_object: 0.994
se_relative_clause_on_obj: 0.969
se_PP_on_obj: 0.94
cn_embedded_under_if: 0.069
cn_after_if_clause: 0.051
cn_embedded_under_verb: 0.051
cn_disjunction: 0.035
cn_adverb: 0.011
ce_embedded_under_since: 0.919
ce_after_since_clause: 0.908
ce_embedd






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.09it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7704
subsequence: 0.8492
constituent: 0.8312

Entailed accuracy: 0.8169333333333333

Heuristic non-entailed results:
lexical_overlap: 0.2366
subsequence: 0.1792
constituent: 0.2576

Non-Entailed accuracy: 0.22446666666666668

Overall accuracy: 0.5207

Subcase results:
ln_subject/object_swap: 0.26
ln_preposition: 0.254
ln_relative_clause: 0.317
ln_passive: 0.19
ln_conjunction: 0.162
le_relative_clause: 0.729
le_around_prepositional_phrase: 0.794
le_around_relative_clause: 0.818
le_conjunction: 0.835
le_passive: 0.676
sn_NP/S: 0.036
sn_PP_on_subject: 0.155
sn_relative_clause_on_subject: 0.213
sn_past_participle: 0.221
sn_NP/Z: 0.271
se_conjunction: 0.883
se_adjective: 0.751
se_understood_object: 0.927
se_relative_clause_on_obj: 0.827
se_PP_on_obj: 0.858
cn_embedded_under_if: 0.339
cn_after_if_clause: 0.366
cn_embedded_under_verb: 0.256
cn_disjunction: 0.178
cn_adverb: 0.149
ce_embedded_under_since: 0.8
ce_after_since_clause: 0.79
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.07it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.2358
subsequence: 0.3948
constituent: 0.3234

Entailed accuracy: 0.318

Heuristic non-entailed results:
lexical_overlap: 0.7544
subsequence: 0.7124
constituent: 0.738

Non-Entailed accuracy: 0.7349333333333333

Overall accuracy: 0.5264666666666666

Subcase results:
ln_subject/object_swap: 0.756
ln_preposition: 0.777
ln_relative_clause: 0.819
ln_passive: 0.751
ln_conjunction: 0.669
le_relative_clause: 0.237
le_around_prepositional_phrase: 0.215
le_around_relative_clause: 0.261
le_conjunction: 0.327
le_passive: 0.139
sn_NP/S: 0.371
sn_PP_on_subject: 0.71
sn_relative_clause_on_subject: 0.738
sn_past_participle: 0.8
sn_NP/Z: 0.943
se_conjunction: 0.406
se_adjective: 0.414
se_understood_object: 0.497
se_relative_clause_on_obj: 0.328
se_PP_on_obj: 0.329
cn_embedded_under_if: 0.844
cn_after_if_clause: 0.866
cn_embedded_under_verb: 0.756
cn_disjunction: 0.674
cn_adverb: 0.55
ce_embedded_under_since: 0.274
ce_after_since_clause: 0.218
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:58,  1.98it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:57,  2.02it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:56,  2.05it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:55,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.3336
subsequence: 0.5028
constituent: 0.3546

Entailed accuracy: 0.397

Heuristic non-entailed results:
lexical_overlap: 0.6818
subsequence: 0.5814
constituent: 0.6146

Non-Entailed accuracy: 0.6259333333333333

Overall accuracy: 0.5114666666666666

Subcase results:
ln_subject/object_swap: 0.676
ln_preposition: 0.694
ln_relative_clause: 0.743
ln_passive: 0.632
ln_conjunction: 0.664
le_relative_clause: 0.35
le_around_prepositional_phrase: 0.316
le_around_relative_clause: 0.36
le_conjunction: 0.366
le_passive: 0.276
sn_NP/S: 0.399
sn_PP_on_subject: 0.599
sn_relative_clause_on_subject: 0.653
sn_past_participle: 0.451
sn_NP/Z: 0.805
se_conjunction: 0.456
se_adjective: 0.473
se_understood_object: 0.705
se_relative_clause_on_obj: 0.4
se_PP_on_obj: 0.48
cn_embedded_under_if: 0.669
cn_after_if_clause: 0.737
cn_embedded_under_verb: 0.606
cn_disjunction: 0.596
cn_adverb: 0.465
ce_embedded_under_since: 0.286
ce_after_since_clause: 0.269
ce_embedded_u






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.08it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.10it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.5558
subsequence: 0.744
constituent: 0.6332

Entailed accuracy: 0.6443333333333333

Heuristic non-entailed results:
lexical_overlap: 0.4602
subsequence: 0.4036
constituent: 0.4038

Non-Entailed accuracy: 0.4225333333333333

Overall accuracy: 0.5334333333333333

Subcase results:
ln_subject/object_swap: 0.434
ln_preposition: 0.522
ln_relative_clause: 0.569
ln_passive: 0.312
ln_conjunction: 0.464
le_relative_clause: 0.527
le_around_prepositional_phrase: 0.508
le_around_relative_clause: 0.528
le_conjunction: 0.593
le_passive: 0.623
sn_NP/S: 0.278
sn_PP_on_subject: 0.357
sn_relative_clause_on_subject: 0.448
sn_past_participle: 0.395
sn_NP/Z: 0.54
se_conjunction: 0.719
se_adjective: 0.737
se_understood_object: 0.85
se_relative_clause_on_obj: 0.673
se_PP_on_obj: 0.741
cn_embedded_under_if: 0.469
cn_after_if_clause: 0.576
cn_embedded_under_verb: 0.337
cn_disjunction: 0.437
cn_adverb: 0.2
ce_embedded_under_since: 0.595
ce_after_since_clause: 0.491







Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.346
subsequence: 0.581
constituent: 0.404

Entailed accuracy: 0.44366666666666665

Heuristic non-entailed results:
lexical_overlap: 0.6788
subsequence: 0.5912
constituent: 0.6424

Non-Entailed accuracy: 0.6374666666666666

Overall accuracy: 0.5405666666666666

Subcase results:
ln_subject/object_swap: 0.659
ln_preposition: 0.699
ln_relative_clause: 0.75
ln_passive: 0.583
ln_conjunction: 0.703
le_relative_clause: 0.309
le_around_prepositional_phrase: 0.328
le_around_relative_clause: 0.391
le_conjunction: 0.367
le_passive: 0.335
sn_NP/S: 0.442
sn_PP_on_subject: 0.538
sn_relative_clause_on_subject: 0.568
sn_past_participle: 0.613
sn_NP/Z: 0.795
se_conjunction: 0.507
se_adjective: 0.608
se_understood_object: 0.779
se_relative_clause_on_obj: 0.463
se_PP_on_obj: 0.548
cn_embedded_under_if: 0.757
cn_after_if_clause: 0.729
cn_embedded_under_verb: 0.602
cn_disjunction: 0.666
cn_adverb: 0.458
ce_embedded_under_since: 0.252
ce_after_since_clause: 0.33






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.629
subsequence: 0.7994
constituent: 0.6976

Entailed accuracy: 0.7086666666666667

Heuristic non-entailed results:
lexical_overlap: 0.4158
subsequence: 0.246
constituent: 0.328

Non-Entailed accuracy: 0.32993333333333336

Overall accuracy: 0.5193

Subcase results:
ln_subject/object_swap: 0.41
ln_preposition: 0.426
ln_relative_clause: 0.436
ln_passive: 0.407
ln_conjunction: 0.4
le_relative_clause: 0.655
le_around_prepositional_phrase: 0.654
le_around_relative_clause: 0.696
le_conjunction: 0.637
le_passive: 0.503
sn_NP/S: 0.105
sn_PP_on_subject: 0.259
sn_relative_clause_on_subject: 0.255
sn_past_participle: 0.261
sn_NP/Z: 0.35
se_conjunction: 0.752
se_adjective: 0.758
se_understood_object: 0.96
se_relative_clause_on_obj: 0.749
se_PP_on_obj: 0.778
cn_embedded_under_if: 0.37
cn_after_if_clause: 0.426
cn_embedded_under_verb: 0.314
cn_disjunction: 0.337
cn_adverb: 0.193
ce_embedded_under_since: 0.685
ce_after_since_clause: 0.564
ce_embedded_und






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.07it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.09it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8486
subsequence: 0.906
constituent: 0.8696

Entailed accuracy: 0.8747333333333334

Heuristic non-entailed results:
lexical_overlap: 0.1424
subsequence: 0.1208
constituent: 0.1412

Non-Entailed accuracy: 0.1348

Overall accuracy: 0.5047666666666667

Subcase results:
ln_subject/object_swap: 0.154
ln_preposition: 0.168
ln_relative_clause: 0.155
ln_passive: 0.12
ln_conjunction: 0.115
le_relative_clause: 0.86
le_around_prepositional_phrase: 0.791
le_around_relative_clause: 0.858
le_conjunction: 0.892
le_passive: 0.842
sn_NP/S: 0.022
sn_PP_on_subject: 0.164
sn_relative_clause_on_subject: 0.147
sn_past_participle: 0.113
sn_NP/Z: 0.158
se_conjunction: 0.901
se_adjective: 0.884
se_understood_object: 0.951
se_relative_clause_on_obj: 0.903
se_PP_on_obj: 0.891
cn_embedded_under_if: 0.121
cn_after_if_clause: 0.227
cn_embedded_under_verb: 0.119
cn_disjunction: 0.163
cn_adverb: 0.076
ce_embedded_under_since: 0.904
ce_after_since_clause: 0.779
ce_embedde






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:57,  2.04it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:56,  2.04it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.06it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.08it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.10it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.7806
subsequence: 0.8774
constituent: 0.8544

Entailed accuracy: 0.8374666666666667

Heuristic non-entailed results:
lexical_overlap: 0.2224
subsequence: 0.1732
constituent: 0.1744

Non-Entailed accuracy: 0.19

Overall accuracy: 0.5137333333333334

Subcase results:
ln_subject/object_swap: 0.208
ln_preposition: 0.276
ln_relative_clause: 0.284
ln_passive: 0.194
ln_conjunction: 0.15
le_relative_clause: 0.764
le_around_prepositional_phrase: 0.721
le_around_relative_clause: 0.796
le_conjunction: 0.863
le_passive: 0.759
sn_NP/S: 0.033
sn_PP_on_subject: 0.203
sn_relative_clause_on_subject: 0.201
sn_past_participle: 0.177
sn_NP/Z: 0.252
se_conjunction: 0.901
se_adjective: 0.861
se_understood_object: 0.927
se_relative_clause_on_obj: 0.851
se_PP_on_obj: 0.847
cn_embedded_under_if: 0.195
cn_after_if_clause: 0.267
cn_embedded_under_verb: 0.138
cn_disjunction: 0.182
cn_adverb: 0.09
ce_embedded_under_since: 0.875
ce_after_since_clause: 0.758
ce_embedded






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.10it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.11it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8472
subsequence: 0.8564
constituent: 0.84

Entailed accuracy: 0.8478666666666667

Heuristic non-entailed results:
lexical_overlap: 0.1626
subsequence: 0.197
constituent: 0.169

Non-Entailed accuracy: 0.1762

Overall accuracy: 0.5120333333333333

Subcase results:
ln_subject/object_swap: 0.159
ln_preposition: 0.19
ln_relative_clause: 0.182
ln_passive: 0.13
ln_conjunction: 0.152
le_relative_clause: 0.855
le_around_prepositional_phrase: 0.803
le_around_relative_clause: 0.844
le_conjunction: 0.856
le_passive: 0.878
sn_NP/S: 0.022
sn_PP_on_subject: 0.238
sn_relative_clause_on_subject: 0.206
sn_past_participle: 0.14
sn_NP/Z: 0.379
se_conjunction: 0.85
se_adjective: 0.833
se_understood_object: 0.788
se_relative_clause_on_obj: 0.905
se_PP_on_obj: 0.906
cn_embedded_under_if: 0.176
cn_after_if_clause: 0.244
cn_embedded_under_verb: 0.161
cn_disjunction: 0.141
cn_adverb: 0.123
ce_embedded_under_since: 0.847
ce_after_since_clause: 0.748
ce_embedded_und






Evaluating:   0%|          | 0/118 [00:00<?, ?it/s][A[A[A[A[A




Evaluating:   1%|          | 1/118 [00:00<00:56,  2.06it/s][A[A[A[A[A




Evaluating:   2%|▏         | 2/118 [00:00<00:55,  2.08it/s][A[A[A[A[A




Evaluating:   3%|▎         | 3/118 [00:01<00:55,  2.09it/s][A[A[A[A[A




Evaluating:   3%|▎         | 4/118 [00:01<00:54,  2.09it/s][A[A[A[A[A




Evaluating:   4%|▍         | 5/118 [00:02<00:53,  2.10it/s][A[A[A[A[A




Evaluating:   5%|▌         | 6/118 [00:02<00:53,  2.11it/s][A[A[A[A[A




Evaluating:   6%|▌         | 7/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   7%|▋         | 8/118 [00:03<00:52,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 9/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   8%|▊         | 10/118 [00:04<00:51,  2.11it/s][A[A[A[A[A




Evaluating:   9%|▉         | 11/118 [00:05<00:50,  2.11it/s][A[A[A[A[A




Evaluating:  10%|█         | 12/118 [00:05<00:50,  2.

Heuristic entailed results:
lexical_overlap: 0.8944
subsequence: 0.91
constituent: 0.919

Entailed accuracy: 0.9078

Heuristic non-entailed results:
lexical_overlap: 0.1066
subsequence: 0.111
constituent: 0.115

Non-Entailed accuracy: 0.11086666666666667

Overall accuracy: 0.5093333333333333

Subcase results:
ln_subject/object_swap: 0.103
ln_preposition: 0.15
ln_relative_clause: 0.16
ln_passive: 0.021
ln_conjunction: 0.099
le_relative_clause: 0.873
le_around_prepositional_phrase: 0.848
le_around_relative_clause: 0.884
le_conjunction: 0.895
le_passive: 0.972
sn_NP/S: 0.003
sn_PP_on_subject: 0.158
sn_relative_clause_on_subject: 0.142
sn_past_participle: 0.078
sn_NP/Z: 0.174
se_conjunction: 0.908
se_adjective: 0.841
se_understood_object: 0.939
se_relative_clause_on_obj: 0.948
se_PP_on_obj: 0.914
cn_embedded_under_if: 0.118
cn_after_if_clause: 0.166
cn_embedded_under_verb: 0.113
cn_disjunction: 0.092
cn_adverb: 0.086
ce_embedded_under_since: 0.952
ce_after_since_clause: 0.873
ce_embedded_u




In [23]:
layer_results

{(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11): 0.5296666666666666,
 (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10): 0.5225333333333333,
 (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11): 0.5524333333333333,
 (0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11): 0.5049,
 (0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11): 0.5255666666666666,
 (0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11): 0.5303333333333333,
 (0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11): 0.5251333333333333,
 (0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11): 0.5063333333333333,
 (0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11): 0.5156,
 (0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11): 0.5245666666666666,
 (0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11): 0.5183333333333333,
 (0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11): 0.5384,
 (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11): 0.5166333333333334,
 (0, 1, 2, 3, 4, 5, 6, 7, 8, 9): 0.5382333333333333,
 (0, 1, 2, 3, 4, 5, 6, 7, 8, 10): 0.5308666666666667,
 (0, 1, 2, 3, 4, 5, 6, 7, 8, 11): 0.5098333333333334,
 (0, 1, 2, 3, 4, 5, 6, 7, 9, 10): 0.5507,
 (0, 1, 2, 3, 4, 5, 6, 7, 9, 11): 0.5158666666666667,
 (0, 1, 2, 3, 4, 5, 6, 7

In [26]:
with open( "layer_removal_results.pkl", "wb") as f:
    pickle.dump(layer_results, f)