# Predefined

In [1]:
from __future__ import absolute_import, division, print_function

import argparse
import glob
import logging
import os
import pickle
import random
import re
import shutil

import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, SequentialSampler, RandomSampler
from torch.utils.data.distributed import DistributedSampler

try:
    from torch.utils.tensorboard import SummaryWriter
except:
    from tensorboardX import SummaryWriter

from tqdm import trange
from tqdm.autonotebook import tqdm

from data_loader.hybrid_data_loaders import *
from data_loader.header_data_loaders import *
from data_loader.CT_Wiki_data_loaders import *
from data_loader.RE_data_loaders import *
from data_loader.EL_data_loaders import *
from model.configuration import TableConfig
from model.model import HybridTableMaskedLM, HybridTableCER, TableHeaderRanking, HybridTableCT,HybridTableEL,HybridTableRE,BertRE
from model.transformers import BertConfig,BertTokenizer, WEIGHTS_NAME, AdamW, get_linear_schedule_with_warmup
from utils.util import *
from baselines.row_population.metric import average_precision,ndcg_at_k
from baselines.cell_filling.cell_filling import *
from model import metric

logger = logging.getLogger(__name__)

MODEL_CLASSES = {
    'CER': (TableConfig, HybridTableCER, BertTokenizer),
    'CF' : (TableConfig, HybridTableMaskedLM, BertTokenizer),
    'HR': (TableConfig, TableHeaderRanking, BertTokenizer),
    'CT': (TableConfig, HybridTableCT, BertTokenizer),
    'EL': (TableConfig, HybridTableEL, BertTokenizer),
    'RE': (TableConfig, HybridTableRE, BertTokenizer),
    'REBERT': (BertConfig, BertRE, BertTokenizer)
}

# set data directory, this will be used to load test data
data_dir = r"G:\CPSC448\TURL\data\wikitables_v2"

config_name = "configs/table-base-config_v2.json"
device = torch.device('cuda')
# load entity vocab from entity_vocab.txt
entity_vocab = load_entity_vocab(data_dir, ignore_bad_title=True, min_ent_count=2)
entity_wikid2id = {entity_vocab[x]['wiki_id']:x for x in entity_vocab}

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

type_vocab = load_type_vocab(data_dir)
entity_vocab = load_entity_vocab(data_dir, ignore_bad_title=True, min_ent_count=2)

id2type = {idx:t for t, idx in type_vocab.items()}
t2d_invalid = set()

def average_precision(output, relevance_labels):
    with torch.no_grad():
        sorted_output = torch.argsort(output, dim=-1, descending=True)
        sorted_labels = torch.gather(relevance_labels, -1, sorted_output).float()
        cum_correct = torch.cumsum(sorted_labels, dim=-1)
        cum_precision = cum_correct / torch.arange(start=1,end=cum_correct.shape[-1]+1, device=cum_correct.device)[None, :]
        cum_precision = cum_precision * sorted_labels
        total_valid = torch.sum(sorted_labels, dim=-1)
        total_valid[total_valid==0] = 1
        average_precision = torch.sum(cum_precision, dim=-1)/total_valid

    return average_precision

  from tqdm.autonotebook import tqdm


total number of entity: 926135
remove because of empty title: 14206
remove because count<2: 847401
total number of entity: 926135
remove because of empty title: 14206
remove because count<2: 847401


In [2]:
DATASET_PATH = r"G:\CPSC448\TURL\data\wikitables_v2\procressed_WikiCT\test.pickle"
CHECKPOINTS = [
    r"G:\CPSC448\TURL\data\pre-trained_models\checkpoints/0/pytorch_model.bin",
    r"G:\CPSC448\TURL\data\pre-trained_models\checkpoints/1/pytorch_model.bin",
    r"G:\CPSC448\TURL\data\pre-trained_models\checkpoints/2/pytorch_model.bin",
    r"G:\CPSC448\TURL\data\pre-trained_models\checkpoints/3/pytorch_model.bin",
    r"G:\CPSC448\TURL\data\pre-trained_models\checkpoints/4/pytorch_model.bin",
    r"G:\CPSC448\TURL\data\pre-trained_models\checkpoints/5/pytorch_model.bin"
]
TEST_JSON_ALL = r"G:\CPSC448\TURL\data\wikitables_v2\test.all_table_col_type.json"
TEST_JSON = r"G:\CPSC448\TURL\data\wikitables_v2\test.table_col_type.json"
with open(TEST_JSON_ALL, 'r') as f:
    ALL_TABLES = json.load(f)
with open(os.path.join(data_dir, 'test.table_col_type.json'), 'r') as f:
    TEST_TABLES = json.load(f)

In [3]:
# Modify json
def readTable(json_path, table_num):
    with open(json_path, 'r') as f:
        return json.load(f)[table_num]

def writeJson(json_path, tables):
    with open(json_path, 'w') as f:
        json.dump(tables, f)

# Get the Logits

### Predefined

In [115]:
# Get the logits from prediction using a checkpoint on the test_dataset. 
# Please also set the mode.
def predict(test_dataset, checkpoint, mode):
    # Define the evaluation sets
    per_type_accuracy = {}
    per_type_precision = {}
    per_type_recall = {}
    per_type_f1 = {}
    map = {}
    precision = {}
    recall = {}
    f1 = {}
    per_table_result = {}
    
    # Start prediction
    print(f"Mode: {mode}")
    config_class, model_class, _ = MODEL_CLASSES['CT']
    config = config_class.from_pretrained(config_name)
    config.class_num = len(type_vocab)
    config.mode = mode
    model = model_class(config, is_simple=True)
    checkpoint = torch.load(checkpoint)
    model.load_state_dict(checkpoint)
    model.to(device)
    model.eval()
    eval_batch_size = 20
    eval_sampler = SequentialSampler(test_dataset)
    eval_dataloader = CTLoader(test_dataset, sampler=eval_sampler, batch_size=eval_batch_size, is_train=False)
    eval_loss = 0.0
    eval_map = 0.0
    nb_eval_steps = 0
    eval_targets = []
    eval_prediction_scores = []
    eval_pred = []
    eval_mask = []
    per_table_result[mode] = {}
    
    logits = []
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        table_ids, input_tok, input_tok_type, input_tok_pos, input_tok_mask, \
            input_ent_text, input_ent_text_length, input_ent, input_ent_type, input_ent_mask, \
            column_entity_mask, column_header_mask, labels_mask, labels = batch
        input_tok = input_tok.to(device)
        input_tok_type = input_tok_type.to(device)
        input_tok_pos = input_tok_pos.to(device)
        input_tok_mask = input_tok_mask.to(device)
        input_ent_text = input_ent_text.to(device)
        input_ent_text_length = input_ent_text_length.to(device)
        input_ent = input_ent.to(device)
        input_ent_type = input_ent_type.to(device)
        input_ent_mask = input_ent_mask.to(device)
        column_entity_mask = column_entity_mask.to(device)
        column_header_mask = column_header_mask.to(device)
        labels_mask = labels_mask.to(device)
        labels = labels.to(device)
        if mode == 1:
            input_ent_mask = input_ent_mask[:,:,input_tok_mask.shape[1]:]
            input_tok = None
            input_tok_type = None
            input_tok_pos = None
            input_tok_mask = None
        elif mode == 2:
            input_tok_mask = input_tok_mask[:,:,:input_tok_mask.shape[1]]
            input_ent_text = None
            input_ent_text_length = None
            input_ent = None
            input_ent_type = None
            input_ent_mask = None
        elif mode == 3:
            input_ent = None
        elif mode == 4:
            input_ent_mask = input_ent_mask[:,:,input_tok_mask.shape[1]:]
            input_tok = None
            input_tok_type = None
            input_tok_pos = None
            input_tok_mask = None
            input_ent = None
        elif mode == 5:
            input_ent_mask = input_ent_mask[:,:,input_tok_mask.shape[1]:]
            input_tok = None
            input_tok_type = None
            input_tok_pos = None
            input_tok_mask = None
            input_ent_text = None
            input_ent_text_length = None
        with torch.no_grad():
            outputs = model(input_tok, input_tok_type, input_tok_pos, input_tok_mask,\
                input_ent_text, input_ent_text_length, input_ent, input_ent_type, input_ent_mask, column_entity_mask, column_header_mask, labels_mask, labels)
            loss = outputs[0]
            prediction_scores = outputs[1]
            for l_i in t2d_invalid:
                prediction_scores[:,:,l_i] = -1000
            for idx, table_id in enumerate(table_ids):
                valid = labels_mask[idx].nonzero().max().item()+1
                if table_id not in per_table_result[mode]:
                    per_table_result[mode][table_id] = [[],labels_mask[idx,:valid],labels[idx,:valid]]
                per_table_result[mode][table_id][0].append(prediction_scores[idx,:valid])
            ap = metric.average_precision(prediction_scores.view(-1, config.class_num), labels.view((-1, config.class_num)))
            map = (ap*labels_mask.view(-1)).sum()/labels_mask.sum()
            eval_loss += loss.mean().item()
            eval_map += map.item()
            eval_targets.extend(labels.view(-1, config.class_num).tolist())
            eval_prediction_scores.extend(prediction_scores.view(-1, config.class_num).tolist())
            eval_pred.extend((torch.sigmoid(prediction_scores.view(-1, config.class_num))>0.5).tolist())
            eval_mask.extend(labels_mask.view(-1).tolist())
        nb_eval_steps += 1
        # print(loss.shape)
        logits.append(prediction_scores)
        
    eval_targets = np.array(eval_targets)
    eval_prediction_scores = np.array(eval_prediction_scores)
    eval_mask = np.array(eval_mask)
    eval_prediction_ranks = np.argsort(np.argsort(-eval_prediction_scores))
    eval_pred = np.array(eval_pred)
    eval_tp = eval_mask[:,np.newaxis]*eval_pred*eval_targets
    eval_precision = np.sum(eval_tp,axis=0)/np.sum(eval_mask[:,np.newaxis]*eval_pred,axis=0)
    eval_precision = np.nan_to_num(eval_precision, 1)
    eval_recall = np.sum(eval_tp,axis=0)/np.sum(eval_mask[:,np.newaxis]*eval_targets,axis=0)
    eval_recall = np.nan_to_num(eval_recall, 1)
    eval_f1 = 2*eval_precision*eval_recall/(eval_precision+eval_recall)
    eval_f1 = np.nan_to_num(eval_f1, 0)
    per_type_instance_num = np.sum(eval_mask[:,np.newaxis]*eval_targets,axis=0)
    per_type_instance_num[per_type_instance_num==0] = 1
    per_type_correct_instance_num = np.sum(eval_mask[:,np.newaxis]*(eval_prediction_ranks<eval_targets.sum(axis=1)[:,np.newaxis])*eval_targets,axis=0)
    per_type_accuracy[mode] = per_type_correct_instance_num/per_type_instance_num
    per_type_precision[mode] = eval_precision
    per_type_recall[mode] = eval_recall
    per_type_f1[mode] = eval_f1
    precision[mode] = np.sum(eval_tp)/np.sum(eval_mask[:,np.newaxis]*eval_pred)
    recall[mode] = np.sum(eval_tp)/np.sum(eval_mask[:,np.newaxis]*eval_targets)
    f1[mode] = 2*precision[mode]*recall[mode]/(precision[mode]+recall[mode])
    
    return logits, per_table_result, precision, recall, f1

# This is the max row_number of each table
LENGTH_ALL_TABLES = []

for table in ALL_TABLES:
    length = 0
    for col in table[6]:
        if len(col) > length:
            length = len(col)
    LENGTH_ALL_TABLES.append(length)

In [119]:
def wrappedPredict(tables):
    writeJson(TEST_JSON, tables)

    if os.path.exists(DATASET_PATH):
        os.remove(DATASET_PATH)
    test_dataset = WikiCTDataset(data_dir, entity_vocab, type_vocab, max_input_tok=500, src="test", max_length = [50, 10, 10], force_new=False, tokenizer = None)


    # Get the logits and the predicted results
    logits, per_table_result, precision, recall, f1 = predict(test_dataset, CHECKPOINTS[4], 4)
    
    return logits, per_table_result, precision, recall, f1

LOGITS, _, _, _, _ = wrappedPredict(ALL_TABLES)

try creating preprocessed data in G:\CPSC448\TURL\data\wikitables_v2\procressed_WikiCT\test.pickle
4764 test tables


100%|██████████| 4764/4764 [00:08<00:00, 593.26it/s]


Mode: 4


Evaluating: 100%|██████████| 239/239 [00:02<00:00, 104.40it/s]
  eval_precision = np.sum(eval_tp,axis=0)/np.sum(eval_mask[:,np.newaxis]*eval_pred,axis=0)
  eval_recall = np.sum(eval_tp,axis=0)/np.sum(eval_mask[:,np.newaxis]*eval_targets,axis=0)
  eval_f1 = 2*eval_precision*eval_recall/(eval_precision+eval_recall)


#### Evaluation

In [6]:
total_corr = 0
total_valid = 0
errors = []
for table_id, result in per_table_result[4].items():
    prediction_scores, label_mask, label = result
    prediction_scores = torch.stack(prediction_scores, 0).mean(0)
    current_corr = 0
    for col_idx, pred in enumerate(prediction_scores.argmax(-1).tolist()):
        current_corr += label[col_idx, pred].item()
    total_valid += label_mask.sum().item()
    total_corr += current_corr
    if current_corr!=label_mask.sum().item():
        errors.append(table_id)
print(total_corr/total_valid, total_valid)

NameError: name 'per_table_result' is not defined

## Adversarial attacks

In [22]:
import copy

# Get a column from a txt file
def extractColumn(file_path, column_index):
    column_values = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            columns = line.strip().split('\t')
            if len(columns) > column_index:
                column_values.append(columns[column_index])
    return column_values

# # Randomly switch entities in tables with randomly choosing entities
# def switchEntities(tables, percentage):
#     entity_list = extractColumn(os.path.join(data_dir, "entity_vocab.txt"), 2)
#     entity_id_list = extractColumn(os.path.join(data_dir, "entity_vocab.txt"), 1)
#     tables_copy = copy.deepcopy(tables)
    
#     for table_index in range (len(tables)):
#         col_num = len(tables[table_index][6])
#         entity_position = []
#         # Get the number of entities
#         for col_index in range(col_num):
#             row_num = len(tables[table_index][6][col_index])
#             for row_index in range(row_num):
#                 entity_position.append([row_index, col_index])
        
#         # Randomly choose entities * percentage entities. random_entity_positions is a list
#         random_entity_positions = random.sample(entity_position, int(len(entity_position) * percentage))
        
        
#         for random_entity_position in random_entity_positions:
#             # Random index in the list containing all the entities
#             rand_num = random.randint(0, len(entity_list))
            
#             [random_entity_row, random_entity_col] = random_entity_position
#             tables_copy[table_index][6][random_entity_col][random_entity_row][1] = [int(entity_id_list[rand_num]), entity_list[rand_num]]
                
#     return tables_copy

def maskEntities(tables, percentage):
    entity_list = extractColumn(os.path.join(data_dir, "entity_vocab.txt"), 2)
    entity_id_list = extractColumn(os.path.join(data_dir, "entity_vocab.txt"), 1)
    tables_copy = copy.deepcopy(tables)
    
    for table_index in range (len(tables)):
        col_num = len(tables[table_index][6])
        entity_position = []
        # Get the number of entities
        for col_index in range(col_num):
            row_num = len(tables[table_index][6][col_index])
            for row_index in range(row_num):
                entity_position.append([row_index, col_index])
        
        # Randomly choose entities * percentage entities. random_entity_positions is a list
        random_entity_positions = random.sample(entity_position, int(len(entity_position) * percentage))
        
        
        for random_entity_position in random_entity_positions:            
            [random_entity_row, random_entity_col] = random_entity_position
            tables_copy[table_index][6][random_entity_col][random_entity_row][1][1] = 'ENT_MASK'
                
    return tables_copy

In [None]:
# switched_tables = switchEntities(ALL_TABLES, 0.8)
# logits, per_table_result = wrappedPredict(switched_tables)

try creating preprocessed data in G:\CPSC448\TURL\data\wikitables_v2\procressed_WikiCT\test.pickle
4764 test tables


100%|██████████| 4764/4764 [00:07<00:00, 595.84it/s]


Mode: 4


Evaluating: 100%|██████████| 239/239 [00:02<00:00, 108.65it/s]


In [122]:
def evaluateAcc(per_table_result):
    total_corr = 0
    total_valid = 0
    errors = []
    for table_id, result in per_table_result[4].items():
        prediction_scores, label_mask, label = result
        prediction_scores = torch.stack(prediction_scores, 0).mean(0)
        current_corr = 0
        for col_idx, pred in enumerate(prediction_scores.argmax(-1).tolist()):
            current_corr += label[col_idx, pred].item()
        total_valid += label_mask.sum().item()
        total_corr += current_corr
        if current_corr!=label_mask.sum().item():
            errors.append(table_id)
    return total_corr/total_valid, total_valid

# Mask

In [7]:
# len(ALL_TABLES[0][6][])


logits, _ = wrappedPredict([ALL_TABLES[859]] * 30)
len(logits)


try creating preprocessed data in G:\CPSC448\TURL\data\wikitables_v2\procressed_WikiCT\test.pickle
30 test tables


100%|██████████| 30/30 [00:00<00:00, 158.37it/s]


Mode: 4


Evaluating: 100%|██████████| 2/2 [00:00<00:00, 62.50it/s]


2

In [8]:
import csv

def deleteOutOfRowBound(tables, length_tables, indices_tables, row_idx):
    filtered_indices = [i for i in range(len(length_tables)) if length_tables[i] > row_idx]
    
    length_tables = [length_tables[i] for i in filtered_indices]
    tables = [tables[i] for i in filtered_indices]
    indices_tables = [indices_tables[i] for i in filtered_indices]
    
    return tables, length_tables, indices_tables

        
# Mask the given row of all the tables in tables
def maskRowOfTables(tables, row_idx):
    for table_idx in range(len(tables)):
        for col_idx in range(len(tables[table_idx][6])):
            try:
                tables[table_idx][6][col_idx][row_idx][1][1] = 'ENT_MASK'
            except IndexError:
                # We don't care about the index error for incomplete tables
                continue
        
    return tables

def writeCSV(table_num, logits_difference_row):
    file_path = rf'G:\CPSC448\TURL\data\logits_difference\table_{table_num}.csv'

    if os.path.exists(file_path):
        with open(file_path, 'a', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(logits_difference_row)
                
    else:
        with open(file_path, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(logits_difference_row)

def readCSV(table_num, dir):
    # Construct the file name
    file_name = f'table_{table_num}.csv'
    # Construct the full file path
    file_path = os.path.join(dir, file_name)
        
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path, header=None)
    return df

In [10]:
all_tables_copy_first_10 = copy.deepcopy(ALL_TABLES)
for table_idx in range(len(all_tables_copy_first_10)):
    print(table_idx)
    if LENGTH_ALL_TABLES[table_idx] > 10:
        for col_idx in range(len(all_tables_copy_first_10[table_idx][6])):
            all_tables_copy_first_10[table_idx][6][col_idx] = ALL_TABLES[table_idx][6][col_idx][:10]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [11]:
# Copy all tables, this will be shorten
copy_all_tables = copy.deepcopy(ALL_TABLES)
copy_all_lengths = copy.deepcopy(LENGTH_ALL_TABLES)
copy_all_table_indices = list(range(len(ALL_TABLES)))

row_idx = 0
while len(copy_all_tables) > 0 and row_idx < 10:
    copy_all_tables, copy_all_lengths, copy_all_table_indices = deleteOutOfRowBound(copy_all_tables, copy_all_lengths, copy_all_table_indices, row_idx)
    
    if len(copy_all_tables) == 0:
        break
    
    # Remember the tables before masked. For recovery.
    temp_tables = copy.deepcopy(copy_all_tables)
    copy_all_tables = maskRowOfTables(copy_all_tables, row_idx)
    logits_masked, _ = wrappedPredict(copy_all_tables)
    
    for batch_idx in range(len(logits_masked)):
        for table_index_in_batch in range(len(logits_masked[batch_idx])):
            # Create a logits difference table
            logits_difference_row = []
            
            # Get the table index among the remaining tables
            table_index_in_remaining = table_index_in_batch + 20 * batch_idx
            # Get the index among all the tables
            table_index_in_all = copy_all_table_indices[table_index_in_remaining]
            # Go to LOGITS and get the logits for that table
            correct_table_logits = LOGITS[table_index_in_all//20][table_index_in_all%20]
            print(correct_table_logits[0][196])
            
            table = ALL_TABLES[table_index_in_all]
            correct_labels = table[7]
            col_num = len(correct_labels)
            
            for col_idx in range(len(correct_labels)):
                label_index = type_vocab[correct_labels[col_idx][0]]
                
                logits_difference_row.append(math.fabs(correct_table_logits[col_idx][label_index].item() - logits_masked[batch_idx][table_index_in_batch][col_idx][label_index].item()))
                print(table_index_in_all, " ", math.fabs(logits_masked[batch_idx][table_index_in_batch][col_idx][label_index].item()))
                
                
            writeCSV(table_index_in_all, logits_difference_row)
    # Deal with the logits
    ####
    # Recover
    copy_all_tables = temp_tables
    row_idx += 1


try creating preprocessed data in G:\CPSC448\TURL\data\wikitables_v2\procressed_WikiCT\test.pickle
4764 test tables


100%|██████████| 4764/4764 [00:08<00:00, 593.52it/s]


Mode: 4


Evaluating: 100%|██████████| 239/239 [00:02<00:00, 105.41it/s]


tensor(5.8106, device='cuda:0')
0   3.9039015769958496
0   4.9104323387146
0   5.271502494812012
tensor(5.1820, device='cuda:0')
1   3.5028645992279053
tensor(-10.1736, device='cuda:0')
2   7.481492519378662
tensor(-9.0531, device='cuda:0')
3   3.5706112384796143
3   5.537189960479736
tensor(8.1981, device='cuda:0')
4   0.7786154747009277
4   10.272275924682617
4   9.043630599975586
4   1.700524091720581
4   0.2976509928703308
tensor(-11.9803, device='cuda:0')
5   3.721766948699951
5   5.2342987060546875
5   4.699069499969482
tensor(-7.1353, device='cuda:0')
6   1.119568109512329
6   0.638356626033783
tensor(-8.6637, device='cuda:0')
7   8.007267951965332
7   7.922821998596191
7   0.4694805145263672
7   5.4163594245910645
tensor(-13.9754, device='cuda:0')
8   0.6420846581459045
8   8.324915885925293
8   2.8551182746887207
tensor(-6.9729, device='cuda:0')
9   0.33328676223754883
tensor(-12.3943, device='cuda:0')
10   0.1038508266210556
10   4.142678737640381
tensor(-11.1881, device='cud

100%|██████████| 4764/4764 [00:07<00:00, 599.92it/s]


Mode: 4


Evaluating: 100%|██████████| 239/239 [00:02<00:00, 106.10it/s]


tensor(5.8106, device='cuda:0')
0   4.147253513336182
0   5.086191654205322
0   5.64082145690918
tensor(5.1820, device='cuda:0')
1   3.154409885406494
tensor(-10.1736, device='cuda:0')
2   7.440959930419922
tensor(-9.0531, device='cuda:0')
3   3.77518892288208
3   5.376843452453613
tensor(8.1981, device='cuda:0')
4   0.6115894317626953
4   10.615586280822754
4   9.297675132751465
4   1.8948262929916382
4   0.8114312291145325
tensor(-11.9803, device='cuda:0')
5   3.3185431957244873
5   5.509428977966309
5   5.106231689453125
tensor(-7.1353, device='cuda:0')
6   0.28826749324798584
6   0.45173895359039307
tensor(-8.6637, device='cuda:0')
7   8.010783195495605
7   7.855672836303711
7   0.04334722459316254
7   5.405117511749268
tensor(-13.9754, device='cuda:0')
8   0.4755110442638397
8   8.986690521240234
8   2.890331745147705
tensor(-6.9729, device='cuda:0')
9   0.3977053761482239
tensor(-12.3943, device='cuda:0')
10   0.06719009578227997
10   4.795506000518799
tensor(-11.1881, device='cu

100%|██████████| 4764/4764 [00:07<00:00, 599.46it/s]


Mode: 4


Evaluating: 100%|██████████| 239/239 [00:02<00:00, 108.53it/s]


tensor(5.8106, device='cuda:0')
0   3.8641300201416016
0   4.095239639282227
0   4.268550872802734
tensor(5.1820, device='cuda:0')
1   3.4355759620666504
tensor(-10.1736, device='cuda:0')
2   7.724266052246094
tensor(-9.0531, device='cuda:0')
3   3.775188446044922
3   5.37684440612793
tensor(8.1981, device='cuda:0')
4   0.8638331294059753
4   10.66644287109375
4   9.13486099243164
4   1.8547059297561646
4   0.733142614364624
tensor(-11.9803, device='cuda:0')
5   3.798931360244751
5   4.637833118438721
5   5.070479869842529
tensor(-7.1353, device='cuda:0')
6   0.23201096057891846
6   0.5293117761611938
tensor(-8.6637, device='cuda:0')
7   7.43203067779541
7   7.754350662231445
7   0.05388729274272919
7   5.240611553192139
tensor(-13.9754, device='cuda:0')
8   0.47550997138023376
8   8.98669147491455
8   2.890331506729126
tensor(-6.9729, device='cuda:0')
9   0.7690766453742981
tensor(-12.3943, device='cuda:0')
10   0.17887066304683685
10   4.52567720413208
tensor(-11.1881, device='cuda:0

100%|██████████| 4747/4747 [00:07<00:00, 595.14it/s]


Mode: 4


Evaluating: 100%|██████████| 238/238 [00:02<00:00, 109.66it/s]


tensor(5.8106, device='cuda:0')
0   2.6975133419036865
0   4.928041458129883
0   5.505032539367676
tensor(5.1820, device='cuda:0')
1   3.409850597381592
tensor(-10.1736, device='cuda:0')
2   7.48149299621582
tensor(-9.0531, device='cuda:0')
3   3.970216751098633
3   5.071773052215576
tensor(8.1981, device='cuda:0')
4   0.832797646522522
4   10.659117698669434
4   9.032876968383789
4   1.7456241846084595
4   0.5538389086723328
tensor(-11.9803, device='cuda:0')
5   3.259441375732422
5   4.843860149383545
5   5.529317855834961
tensor(-7.1353, device='cuda:0')
6   0.29160547256469727
6   0.4425520896911621
tensor(-8.6637, device='cuda:0')
7   7.994659423828125
7   7.91359806060791
7   0.31782615184783936
7   4.749967098236084
tensor(-13.9754, device='cuda:0')
8   0.5750963091850281
8   8.626542091369629
8   2.4914002418518066
tensor(-6.9729, device='cuda:0')
9   0.585054337978363
tensor(-12.3943, device='cuda:0')
10   0.16040216386318207
10   5.048832893371582
tensor(-11.1881, device='cuda

100%|██████████| 4734/4734 [00:07<00:00, 599.51it/s]


Mode: 4


Evaluating: 100%|██████████| 237/237 [00:02<00:00, 106.81it/s]


tensor(5.8106, device='cuda:0')
0   4.2091288566589355
0   7.2440690994262695
0   7.793813705444336
tensor(5.1820, device='cuda:0')
1   3.84205961227417
tensor(-10.1736, device='cuda:0')
2   7.101912498474121
tensor(-9.0531, device='cuda:0')
3   3.417083501815796
3   5.344935417175293
tensor(8.1981, device='cuda:0')
4   0.8361294269561768
4   10.349536895751953
4   9.255976676940918
4   1.8595062494277954
4   0.5429767966270447
tensor(-11.9803, device='cuda:0')
5   3.9076650142669678
5   5.681626796722412
5   5.8563618659973145
tensor(-7.1353, device='cuda:0')
6   0.5863100290298462
6   0.5552995204925537
tensor(-8.6637, device='cuda:0')
7   8.179512977600098
7   8.139331817626953
7   0.14387480914592743
7   5.052979946136475
tensor(-13.9754, device='cuda:0')
8   0.4536726176738739
8   8.750545501708984
8   2.7106637954711914
tensor(-6.9729, device='cuda:0')
9   0.684513509273529
tensor(-12.3943, device='cuda:0')
10   0.052554234862327576
10   5.0068888664245605
tensor(-11.1881, device

100%|██████████| 4332/4332 [00:07<00:00, 579.50it/s]


Mode: 4


Evaluating: 100%|██████████| 217/217 [00:02<00:00, 105.73it/s]


tensor(5.1820, device='cuda:0')
1   4.0415520668029785
tensor(-10.1736, device='cuda:0')
2   7.892250061035156
tensor(-9.0531, device='cuda:0')
3   3.605980157852173
3   5.4244184494018555
tensor(8.1981, device='cuda:0')
4   0.9123921394348145
4   10.332039833068848
4   9.016100883483887
4   1.6805511713027954
4   0.21116812527179718
tensor(-11.9803, device='cuda:0')
5   3.6978471279144287
5   4.960272789001465
5   3.503075361251831
tensor(-7.1353, device='cuda:0')
6   0.45922255516052246
6   0.4569324254989624
tensor(-8.6637, device='cuda:0')
7   7.6395955085754395
7   7.685778617858887
7   0.1005450189113617
7   5.146246433258057
tensor(-13.9754, device='cuda:0')
8   0.6162019371986389
8   8.674148559570312
8   2.7494382858276367
tensor(-6.9729, device='cuda:0')
9   0.2828044891357422
tensor(-12.3943, device='cuda:0')
10   0.08432959020137787
10   3.3407390117645264
tensor(-11.1881, device='cuda:0')
11   1.0668599605560303
11   7.880593299865723
tensor(-10.9250, device='cuda:0')
12  

100%|██████████| 3838/3838 [00:06<00:00, 551.92it/s]


Mode: 4


Evaluating: 100%|██████████| 192/192 [00:01<00:00, 105.28it/s]


tensor(5.1820, device='cuda:0')
1   3.3196113109588623
tensor(-10.1736, device='cuda:0')
2   7.748186111450195
tensor(-9.0531, device='cuda:0')
3   2.6873724460601807
3   5.6399030685424805
tensor(8.1981, device='cuda:0')
4   0.8975465297698975
4   10.059991836547852
4   9.493624687194824
4   1.7406665086746216
4   0.005320325493812561
tensor(-11.9803, device='cuda:0')
5   3.722487211227417
5   4.913059711456299
5   5.657564640045166
tensor(-7.1353, device='cuda:0')
6   0.5336158871650696
6   0.6439893245697021
tensor(-8.6637, device='cuda:0')
7   7.749938488006592
7   7.885863304138184
7   0.5371323823928833
7   4.719115734100342
tensor(-13.9754, device='cuda:0')
8   0.540476381778717
8   8.759703636169434
8   2.750241279602051
tensor(-6.9729, device='cuda:0')
9   0.5238262414932251
tensor(-12.3943, device='cuda:0')
10   0.03769509494304657
10   5.285715579986572
tensor(-11.1881, device='cuda:0')
11   1.328095555305481
11   7.969252586364746
tensor(-10.9250, device='cuda:0')
12   0.10

100%|██████████| 3480/3480 [00:06<00:00, 546.89it/s]


Mode: 4


Evaluating: 100%|██████████| 174/174 [00:01<00:00, 108.59it/s]


tensor(5.1820, device='cuda:0')
1   3.656851291656494
tensor(-10.1736, device='cuda:0')
2   7.748186111450195
tensor(-9.0531, device='cuda:0')
3   3.1999964714050293
3   5.701557636260986
tensor(8.1981, device='cuda:0')
4   0.7604091167449951
4   10.282176971435547
4   9.516803741455078
4   1.694717288017273
4   0.359493613243103
tensor(-7.1353, device='cuda:0')
6   0.47243911027908325
6   0.1585538387298584
tensor(-8.6637, device='cuda:0')
7   6.955726623535156
7   6.711733341217041
7   0.1678531914949417
7   5.207864284515381
tensor(-13.9754, device='cuda:0')
8   0.5389437079429626
8   8.398809432983398
8   2.9392080307006836
tensor(-11.1881, device='cuda:0')
11   1.058170199394226
11   7.933403015136719
tensor(-10.9250, device='cuda:0')
12   0.028518542647361755
12   5.932252407073975
tensor(-10.3454, device='cuda:0')
13   2.9734647274017334
13   1.3276983499526978
13   5.416641712188721
tensor(-12.6097, device='cuda:0')
14   7.463977813720703
14   2.9348325729370117
14   8.37496662

100%|██████████| 3088/3088 [00:05<00:00, 538.54it/s]


Mode: 4


Evaluating: 100%|██████████| 155/155 [00:01<00:00, 104.91it/s]


tensor(5.1820, device='cuda:0')
1   3.773275375366211
tensor(-10.1736, device='cuda:0')
2   6.4212517738342285
tensor(-9.0531, device='cuda:0')
3   3.3175220489501953
3   5.234463691711426
tensor(8.1981, device='cuda:0')
4   0.7197490930557251
4   10.219622611999512
4   9.096622467041016
4   1.6668704748153687
4   0.34003597497940063
tensor(-7.1353, device='cuda:0')
6   0.3520987033843994
6   2.15495228767395
tensor(-8.6637, device='cuda:0')
7   8.04639720916748
7   7.711164951324463
7   0.12477637827396393
7   5.129449367523193
tensor(-13.9754, device='cuda:0')
8   0.3036055266857147
8   8.363317489624023
8   2.8520989418029785
tensor(-11.1881, device='cuda:0')
11   1.2633287906646729
11   8.099272727966309
tensor(-10.9250, device='cuda:0')
12   0.04896734654903412
12   5.70068883895874
tensor(-10.3454, device='cuda:0')
13   3.4922330379486084
13   1.316002368927002
13   5.169089317321777
tensor(-12.6097, device='cuda:0')
14   7.574180603027344
14   2.918126344680786
14   8.5920248031

100%|██████████| 2831/2831 [00:05<00:00, 531.03it/s]


Mode: 4


Evaluating: 100%|██████████| 142/142 [00:01<00:00, 109.35it/s]


tensor(5.1820, device='cuda:0')
1   4.098048686981201
tensor(-10.1736, device='cuda:0')
2   6.42125129699707
tensor(-9.0531, device='cuda:0')
3   3.5823514461517334
3   5.655904769897461
tensor(8.1981, device='cuda:0')
4   1.2063885927200317
4   10.604655265808105
4   9.500044822692871
4   1.6797913312911987
4   0.6833003163337708
tensor(-8.6637, device='cuda:0')
7   7.799715995788574
7   7.819261074066162
7   0.08705611526966095
7   5.134461879730225
tensor(-13.9754, device='cuda:0')
8   0.6680952310562134
8   8.634198188781738
8   2.77717661857605
tensor(-11.1881, device='cuda:0')
11   1.4801363945007324
11   8.263944625854492
tensor(-10.9250, device='cuda:0')
12   0.02495799958705902
12   5.3303728103637695
tensor(-10.3454, device='cuda:0')
13   3.1859304904937744
13   1.2559449672698975
13   5.551663398742676
tensor(-12.6097, device='cuda:0')
14   7.703813076019287
14   2.844677686691284
14   8.715007781982422
tensor(-12.0408, device='cuda:0')
15   3.5295228958129883
15   1.6949349

#### Rank

In [12]:
def load_all_csvs(dir):
    # Initialize an empty list to store the DataFrames
    all_tables = []
    # List all files in the directory
    files = os.listdir(dir)
    # Filter and sort the files in natural order
    files = [file for file in files if file.startswith('table_') and file.endswith('.csv')]
    files.sort(key=lambda f: int(f.split('_')[1].split('.')[0]))
    
    # Loop through the files and read those matching the pattern
    for file in files:
        # Extract the table number from the file name
        table_num = int(file.split('_')[1].split('.')[0])
        # Read the CSV file and store it in the list
        all_tables.append(readCSV(table_num, dir))
    return all_tables

dir = r'G:\CPSC448\TURL\data\logits_difference'
all_tables_logits_difference = load_all_csvs(dir)


Unnamed: 0,0
0,0.236759
1,0.277291
2,0.006015
3,0.236758
4,0.616339
5,0.173999
6,0.029935
7,0.029935
8,1.296999
9,1.297


In [55]:
all_tables_copy_first_10[17]

['33863640-1',
 'list of air india fc managers',
 33863640,
 'statistics',
 '',
 ['name', 'nationality'],
 [[[[0, 0], [39919400, 'Bimal Ghosh']],
   [[2, 0], [31102211, 'Santosh Kashyap']],
   [[3, 0], [36450423, 'Godfrey Pereira']],
   [[4, 0], [38067060, 'Anthony Fernandes']],
   [[5, 0], [38686264, 'Naushad Moosa']]],
  [[[0, 1], [14533, 'India']],
   [[1, 1], [14533, 'India']],
   [[2, 1], [14533, 'India']],
   [[3, 1], [14533, 'India']],
   [[4, 1], [14533, 'India']],
   [[5, 1], [14533, 'India']]]],
 [['sports.pro_athlete', 'people.person'],
  ['location.country', 'location.location']]]

In [56]:
def rank_and_sort_tables(all_tables_logits_difference, all_tables_copy_first_10):
    new_all_tables_copy_first_10 = copy.deepcopy(all_tables_copy_first_10)
    all_sorting_order = []
    for table_num, df in enumerate(all_tables_logits_difference):
        sorting_order = []
        
        # Sort each column and keep track of the sorting order
        for col_num in range(len(df.columns)):
            sorted_indices = df.iloc[:, col_num].sort_values(ascending=False).index
            
            if len(all_tables_copy_first_10[table_num][6][col_num]) < len(sorted_indices):
                sorted_indices = [idx for idx in sorted_indices if idx < len(all_tables_copy_first_10[table_num][6][col_num])]
            
            sorting_order.append(sorted_indices)
            # sorting_order[col_num] = sorting_order[col_num][:len(all_tables_copy_first_10[table_num][6][col_num])]
        all_sorting_order.append(sorting_order)
        # Apply the sorting order to all_tables_copy_first_10
        for col_num, order in enumerate(sorting_order):
            col_data = copy.deepcopy(new_all_tables_copy_first_10[table_num][6][col_num])
            # Create a new list for the sorted column
            print(table_num)
            sorted_col_data = []
            for i in order:     
                sorted_col_data.append(col_data[i]) 
            # Replace the original column data with the sorted data
            new_all_tables_copy_first_10[table_num][6][col_num] = sorted_col_data
        
    return new_all_tables_copy_first_10, all_sorting_order

sorted_tables, sorted_orders = rank_and_sort_tables(all_tables_logits_difference, all_tables_copy_first_10)


0
0
0
1
2
3
3
4
4
4
4
4
5
5
5
6
6
7
7
7
7
8
8
8
9
10
10
11
11
12
12
13
13
13
14
14
14
15
15
16
16
17
17
18
18
18
18
19
19
19
20
20
21
21
21
21
22
23
23
23
24
24
25
25
26
26
27
27
27
28
28
29
29
30
30
30
30
31
31
31
31
32
32
32
33
33
34
34
35
35
35
36
36
36
37
37
37
38
38
38
39
39
39
40
40
40
41
41
42
42
42
43
43
44
45
45
45
46
46
46
46
46
46
46
46
47
47
47
48
49
49
49
49
50
50
51
52
52
53
53
54
55
55
56
56
57
57
58
58
59
59
59
60
60
61
61
62
62
63
63
64
64
65
65
66
66
66
67
67
67
68
68
69
69
69
70
70
71
71
71
71
72
72
72
72
73
73
73
74
74
75
75
75
75
76
76
76
77
77
77
78
79
79
79
80
80
80
80
81
82
82
82
82
83
84
84
84
84
85
85
85
86
86
87
87
87
87
88
88
88
88
89
89
90
90
90
90
91
91
91
92
92
92
92
93
93
94
95
95
95
96
96
96
96
96
97
97
97
98
98
98
99
99
100
100
101
101
102
102
103
103
104
104
104
105
105
105
106
107
108
109
109
109
110
110
110
111
111
111
111
111
112
113
113
114
114
114
115
115
116
116
116
116
117
117
118
119
119
119
119
120
120
120
121
121
122
122
123
123
123
124
124


 49%|████▉     | 2344/4764 [00:57<00:59, 40.55it/s] 

3156
3156
3156
3157
3157
3157
3158
3158
3159
3159
3160
3160
3161
3161
3161
3162
3162
3162
3163
3163
3163
3164
3164
3164
3164
3164
3165
3165
3165
3166
3166
3166
3167
3168
3168
3169
3169
3169
3170
3170
3170
3170
3170
3171
3171
3171
3172
3173
3173
3174
3174
3175
3175
3175
3176
3176
3176
3177
3177
3178
3178
3178
3179
3179
3179
3179
3180
3180
3181
3181
3182
3183
3183
3183
3184
3184
3184
3184
3185
3185
3185
3186
3186
3186
3187
3187
3187
3187
3188
3188
3188
3189
3189
3190
3190
3190
3191
3191
3191
3192
3192
3192
3193
3193
3193
3193
3194
3194
3194
3194
3195
3195
3196
3196
3196
3196
3197
3197
3197
3198
3198
3198
3198
3198
3199
3199
3199
3199
3200
3200
3200
3200
3201
3201
3202
3202
3202
3202
3202
3203
3203
3203
3204
3204
3204
3205
3205
3206
3206
3206
3207
3208
3209
3209
3209
3210
3210
3210
3211
3211
3212
3212
3212
3213
3213
3213
3214
3214
3215
3215
3216
3216
3217
3217
3217
3218
3218
3218
3219
3220
3221
3221
3222
3223
3224
3224
3225
3225
3226
3226
3227
3227
3227
3228
3228
3228
3229
3229
3229
3230





3350
3350
3351
3351
3351
3352
3352
3353
3353
3353
3354
3354
3354
3355
3355
3356
3356
3356
3356
3357
3357
3358
3358
3358
3359
3359
3360
3360
3361
3361
3362
3362
3363
3363
3364
3364
3364
3365
3365
3366
3366
3367
3367
3368
3368
3369
3369
3369
3370
3370
3370
3371
3371
3371
3372
3372
3372
3372
3373
3373
3373
3374
3374
3374
3375
3375
3376
3377
3377
3378
3378
3379
3379
3379
3380
3380
3380
3380
3380
3381
3381
3381
3381
3382
3382
3382
3382
3383
3383
3384
3384
3384
3385
3385
3385
3385
3386
3386
3386
3386
3386
3387
3388
3388
3389
3389
3389
3390
3390
3390
3391
3392
3392
3393
3393
3394
3394
3394
3395
3395
3395
3395
3396
3396
3397
3398
3398
3398
3399
3399
3399
3400
3400
3401
3401
3401
3402
3402
3402
3403
3403
3403
3403
3404
3404
3404
3405
3405
3405
3405
3405
3406
3406
3406
3407
3407
3407
3407
3407
3407
3408
3408
3408
3409
3410
3411
3412
3412
3413
3413
3414
3414
3415
3415
3415
3415
3416
3416
3416
3416
3417
3417
3418
3418
3419
3419
3420
3420
3420
3421
3421
3421
3422
3422
3422
3422
3423
3423
3424
3424


In [44]:
ALL_TABLES[1]

['27298128-1',
 'kagawa at-large district (house of councillors)',
 27298128,
 'elected councillors',
 '',
 ['election year'],
 [[[[0, 0], [31629558, '1947']],
   [[1, 0], [31629891, '1950']],
   [[2, 0], [31629950, '1953']],
   [[3, 0], [9739213, '1956']],
   [[5, 0], [31630662, '1959']],
   [[6, 0], [9739598, '1962']],
   [[7, 0], [31630757, '1965']],
   [[8, 0], [31630776, '1968']],
   [[9, 0], [31630792, '1971']],
   [[11, 0], [31630810, '1974']],
   [[12, 0], [13244464, '1977']],
   [[13, 0], [10325807, '1980']],
   [[14, 0], [9739824, '1983']],
   [[15, 0], [13244443, '1986']],
   [[16, 0], [9021420, '1989']],
   [[17, 0], [13244408, '1992']],
   [[18, 0], [13244347, '1995']],
   [[19, 0], [13244305, '1998']],
   [[20, 0], [10124546, '2001']],
   [[21, 0], [809953, '2004']],
   [[22, 0], [9128597, '2007']],
   [[23, 0], [23702616, '2010']]]],
 [['time.event']]]

#### Switch Top n%

In [64]:
# Switch entities randomly with a given seed
def switchEntities(tables, percentage, seed):
    random.seed(seed)
    entity_list = extractColumn(os.path.join(data_dir, "entity_vocab.txt"), 2)
    entity_id_list = extractColumn(os.path.join(data_dir, "entity_vocab.txt"), 1)
    tables_copy = copy.deepcopy(tables)
    
    for table_index in range(len(tables)):
        col_num = len(tables[table_index][6])
        for col_index in range(col_num):
            row_num = len(tables[table_index][6][col_index])
        
            # Randomly choose entities * percentage entities in a column
            random_row_indices = random.sample(range(row_num), int(row_num * percentage))
            
            for random_row_idx in random_row_indices:
                # Random index in the list containing all the entities
                rand_num = random.randint(0, len(entity_list) - 1)
                
                tables_copy[table_index][6][col_index][random_row_idx][1] = [int(entity_id_list[rand_num]), entity_list[rand_num]]
                
    return tables_copy

# Switch entities with top percentage importance scores with a given seed
def switchImportantEntities(tables, percentage, sorted_orders, seed):
    random.seed(seed)
    entity_list = extractColumn(os.path.join(data_dir, "entity_vocab.txt"), 2)
    entity_id_list = extractColumn(os.path.join(data_dir, "entity_vocab.txt"), 1)
    tables_copy = copy.deepcopy(tables)
    
    for table_index in range(len(tables)):
        col_num = len(tables[table_index][6])
        for col_index in range(col_num):
            row_num = len(tables[table_index][6][col_index])
        
            replaced_entity_num_in_a_column = int(row_num * percentage)
        
            for replaced_row_idx in range(replaced_entity_num_in_a_column):
                # Random index in the list containing all the entities
                rand_num = random.randint(0, len(entity_list) - 1)
                # print(sorted_orders[table_index][col_index][replaced_row_idx])
                tables_copy[table_index][6][col_index][sorted_orders[table_index][col_index][replaced_row_idx]][1] = [int(entity_id_list[rand_num]), entity_list[rand_num]]
                
    return tables_copy

In [123]:
switched_tables_importance = switchImportantEntities(all_tables_copy_first_10, 0.6, sorted_orders, 2)
switched_tables_random = switchEntities(all_tables_copy_first_10, 0.6, 2)
logits_importance, per_table_result_importance, precision_importance, recall_importance, f1_importance = wrappedPredict(switched_tables_importance)
logits_random, per_table_result_random, precision_random, recall_random, f1_random = wrappedPredict(switched_tables_random)

try creating preprocessed data in G:\CPSC448\TURL\data\wikitables_v2\procressed_WikiCT\test.pickle
4764 test tables


100%|██████████| 4764/4764 [00:12<00:00, 370.36it/s]


Mode: 4


Evaluating: 100%|██████████| 239/239 [00:02<00:00, 107.81it/s]
  eval_precision = np.sum(eval_tp,axis=0)/np.sum(eval_mask[:,np.newaxis]*eval_pred,axis=0)
  eval_recall = np.sum(eval_tp,axis=0)/np.sum(eval_mask[:,np.newaxis]*eval_targets,axis=0)
  eval_f1 = 2*eval_precision*eval_recall/(eval_precision+eval_recall)


try creating preprocessed data in G:\CPSC448\TURL\data\wikitables_v2\procressed_WikiCT\test.pickle
4764 test tables


100%|██████████| 4764/4764 [00:08<00:00, 537.25it/s]


Mode: 4


Evaluating: 100%|██████████| 239/239 [00:02<00:00, 103.81it/s]


In [124]:
print("Random strategy: ", evaluateAcc(per_table_result_random), precision_random, recall_random, f1_random)
print("Smart strategy: ", evaluateAcc(per_table_result_importance), precision_importance, recall_importance, f1_importance)

Random strategy:  (0.7064875239923225, 13025.0) {4: 0.8279632721202004} {4: 0.3929873217115689} {4: 0.5329930145083289}
Smart strategy:  (0.6702495201535509, 13025.0) {4: 0.7943786982248521} {4: 0.3616877971473851} {4: 0.4970597843841882}
