In [2]:
import os
import re
import copy
import random
import collections
import torch
import numpy as np
import pandas as pd
import json
import pickle
import nltk

from tqdm import tqdm
from rank_bm25 import BM25Okapi
from tqdm import tqdm
from pathlib import Path
from torch.utils.data import Dataset
from transformers import TrainerCallback, AutoTokenizer
os.chdir('/home/s2310409/workspace/coliee-2024/')
from utils.misc import get_query, get_summary


def load_data(dir):
    with open(dir, 'r') as fp:
        train_data = json.load(fp)

    data = []
    for key in train_data.keys():
        data.append([key, train_data[key]])

    return pd.DataFrame(data, columns=['source', 'target'])

## Query on all documents

In [3]:
with open('dataset/c2023/bm25_candidates_test.json', 'r') as fp:
    candidate_dicts = json.load(fp)

data_df = load_data(f'dataset/test.json')

data_df['candidates'] = data_df['source'].apply(lambda x: candidate_dicts[x])
data_df['query'] = data_df['source'].apply(lambda x: get_query(x))

# calculate accuracy metrics for BM25 + TF-IDF
correct = 0
n_retrived = 0
n_relevant = 0

coverages = []

for index, row in data_df.iterrows():
    source = row['source']
    target = row['target']
    preds = row['candidates']
    coverages.append(len(preds))
    n_retrived += len(preds)
    n_relevant += len(target)
    for prediction in preds:
        if prediction in target:
            correct += 1

precision = correct / n_retrived
recall = correct / n_relevant

print(f"Average # candidates: {np.mean(coverages)}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1: {2 * precision * recall / (precision + recall)}")

Average # candidates: 50.0
Precision: 0.03630094043887147
Recall: 0.6967509025270758
F1: 0.06900661462368154


In [15]:
data_df

Unnamed: 0,source,target,candidates,query
0,070318.txt,[015076.txt],"[032432.txt, 071237.txt, 019716.txt, 027423.tx...",rpo board adamidis guideline applicant hearing...
1,077960.txt,"[009054.txt, 040860.txt]","[071412.txt, 060516.txt, 024547.txt, 087722.tx...",removal custody child children order dunn idah...
2,042319.txt,"[093691.txt, 075956.txt, 084953.txt, 022987.txt]","[027719.txt, 067612.txt, 059275.txt, 026904.tx...",beyer cross affidavit prothonotary examination...
3,041766.txt,[039269.txt],"[071818.txt, 056351.txt, 009599.txt, 046346.tx...",drug clinical nds data 002 health omitted 08 n...
4,077407.txt,[038669.txt],"[038092.txt, 096647.txt, 056351.txt, 060210.tx...",communication 23 privilege litigation counsel ...
...,...,...,...,...
314,085079.txt,"[044669.txt, 003144.txt]","[080328.txt, 056351.txt, 068423.txt, 041404.tx...",cso promotions shephard cst adjudicator jse co...
315,031370.txt,"[096341.txt, 060602.txt, 047107.txt, 084522.tx...","[027678.txt, 086122.txt, 060516.txt, 031040.tx...",removal peru irreparable applicant 3d spouse p...
316,085828.txt,"[004301.txt, 074887.txt, 088994.txt]","[008459.txt, 053850.txt, 003821.txt, 087722.tx...",officer applicants india singh riots prra roop...
317,024957.txt,"[015009.txt, 080348.txt]","[066045.txt, 077315.txt, 075868.txt, 022332.tx...",annuity seizure civil 224 unseizable debtor co...


# Mono T5

In [4]:
def prompt(document, query):
    return f'##Query: {query} ##Document: {document} ##Relevant:'

# Model infer

In [6]:
from transformers import (
    AutoConfig,
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    TrainingArguments
)
ckpt_dir = os.path.join('./train_logs/monot5-large-10k_hns/ckpt/checkpoint-521')
tokenizer = AutoTokenizer.from_pretrained('castorini/monot5-large-msmarco-10k')
model = AutoModelForSeq2SeqLM.from_pretrained(ckpt_dir).to('cuda')
model.eval()

T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=1024, out_features=4096, bias=False)
              (wo): Linear(in_features=4096, out_features=1024, bias=False)
              (d

In [28]:
e_id = 0
source = data_df.iloc[e_id].source
candidates = data_df.iloc[e_id].candidates
query = data_df.iloc[e_id].query
for candidate in candidates:
    candidate_summary = get_summary('015076.txt')
    text = prompt(document=tokenizer.decode(tokenizer.encode(candidate_summary, add_special_tokens=False, max_length=450, truncation=True)), query=query)
    print(text)
    break

##Query: rpo board guideline adamidis applicant france hearing bias ldk actions refugee apprehension member protection immigration recuse port kosovo cvv officer negative findings inference members mosley ##Document: Pelletier, J. : This is an application under section 82.1 of the Immigration Act, R.S.C. 1985, c. The CRDD found that the applicant was not a credible witness. The applicant argues that the CRDD's finding of implausibility was unreasonable. Application for judicial review of the decision of the Convention Refugee Determination Division, dated December 16, 1999. The application for judicial review of ten applicants were heard together because of certain common issues, one of which was whether the applicants had become refugee sur place. Each of the applicants made a claim before the Convention Refugee Determination Division ("CRDD") on the basis of well-founded fear of persecution of imputed political opinion The CRDD found that there was insufficient objective grounds to f

In [29]:
inputs = tokenizer(text, return_tensors='pt').to('cuda')
outputs = model.generate(**inputs, output_scores=True, return_dict_in_generate=True, max_new_tokens=10)
outputs

GreedySearchEncoderDecoderOutput(sequences=tensor([[   0, 1176,    1]], device='cuda:0'), scores=(tensor([[-31.0093, -17.6671, -17.9352,  ..., -46.2695, -46.2710, -46.3849]],
       device='cuda:0'), tensor([[-127.4821,  -40.6877,  -88.1471,  ..., -174.7162, -175.4172,
         -175.3853]], device='cuda:0')), encoder_attentions=None, encoder_hidden_states=None, decoder_attentions=None, cross_attentions=None, decoder_hidden_states=None)

In [41]:
tokenizer.decode(outputs.sequences[0])

'<pad> false</s>'

In [40]:
outputs.sequences[0]

tensor([   0, 6136,    1], device='cuda:0')

In [37]:
(outputs.scores)

(tensor([[-33.8814, -18.8008, -19.5146,  ..., -48.9525, -48.9488, -49.0627]],
        device='cuda:0'),
 tensor([[-128.0669,  -41.3602,  -88.5412,  ..., -175.1797, -175.8803,
          -175.8492]], device='cuda:0'))

In [38]:
prediction_dict = {}
for e_id in tqdm(range(len(data_df))):
    source = data_df.iloc[e_id].source
    candidates = data_df.iloc[e_id].candidates
    query = data_df.iloc[e_id].query
    prediction_dict[source] = {
        'result':[],
        'raw':{}
    }
    for candidate in candidates:
        candidate_summary = summary_data[candidate]
        text = prompt(document=tokenizer.decode(tokenizer.encode(candidate_summary, add_special_tokens=False, max_length=450, truncation=True)), query=query)
        inputs = tokenizer(text, return_tensors='pt').to('cuda')
        with torch.no_grad():
            outputs = model.generate(**inputs, output_scores=True, return_dict_in_generate=True, max_new_tokens=10)
            prediction_dict[source]['raw'] = {
                'sequences': list(outputs.sequences[0].cpu().detach().numpy()),
                'scores': list([outputs.scores[0].cpu().detach().numpy(), outputs.scores[1].cpu().detach().numpy()])
            }
            decoded_output = tokenizer.decode(outputs.sequences[0])
            if 'true' in decoded_output:
                prediction_dict[source]['result'].append(candidate)

  0%|                                                                                                                        | 0/319 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (519 > 512). Running this sequence through the model will result in indexing errors
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 319/319 [54:18<00:00, 10.21s/it]


In [39]:
data_df['prediction'] = data_df['source'].apply(lambda x: prediction_dict[x]['result'])

# calculate accuracy metrics for BM25 + TF-IDF
correct = 0
n_retrived = 0
n_relevant = 0

coverages = []

for index, row in data_df.iterrows():
    source = row['source']
    target = row['target']
    preds = row['prediction']
    coverages.append(len(preds))
    n_retrived += len(preds)
    n_relevant += len(target)
    for prediction in preds:
        if prediction in target:
            correct += 1

precision = correct / n_retrived
recall = correct / n_relevant

print(f"Coverage: {np.mean(coverages)/len(file_list)}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1: {2 * precision * recall / (precision + recall)}")

Coverage: 0.08880463032844525
Precision: 0.016011138183083886
Recall: 0.6642599277978339
F1: 0.0312685869657575


# Pygaggle reranking

In [7]:
import sys
sys.path.append('/home/s2310409/workspace/coliee-2024/modules/pygaggle')
from utils.dataset import build_dataset

test_dataset = build_dataset(mode='test')

2024-01-12 16:43:23 [INFO] env: 
Using override env var JVM_PATH (/home/s2310409/jdk/lib/server/libjvm.so) to load libjvm.
Please report your system information (os version, java
version, etc), and the path that works for you, to the
PyJNIus project, at https://github.com/kivy/pyjnius/issues.
so we can improve the automatic discovery.

2024-01-12 16:43:24 [INFO] loader: Loading faiss with AVX2 support.
2024-01-12 16:43:24 [INFO] loader: Successfully loaded faiss with AVX2 support.


In [49]:
import jsonlines
import subprocess
import shutil


def create_bm25_indexes(segment="test"):
    tmp_dir = "tmp"
    shutil.rmtree(tmp_dir, ignore_errors=True)
    os.makedirs(tmp_dir, exist_ok=True)
    
    indexes_dir = f'bm25/{segment}'
    shutil.rmtree(indexes_dir, ignore_errors=True)
    os.makedirs(indexes_dir, exist_ok=True)

    with open(f'dataset/{segment}.json', 'r') as fp:
        data_dict = json.load(fp)
    
    all_files = []
    for key in data_dict.keys():
        all_files.append(key)
        all_files.extend(data_dict[key])
    file_list = list(set(all_files))

    if segment == "test":
        file_list = [f for f in os.listdir(f'dataset/c2023/{segment}_files') if f.endswith('.txt')]
    file_list = sorted(file_list)

    processed_file_dict = {}
    for file in [f for f in os.listdir("dataset/processed") if not f.startswith('.')]:
        processed_file = f"dataset/processed/{file}"
        with open(processed_file, 'r') as fp:
            processed_document = fp.read()
            processed_file_dict[file] = {
                'sentences': processed_document.split('\n\n'),
                'processed_document': processed_document
            }
    
    # data_df = build_df(mode=segment)
    # for case in tqdm(data_df['source'].values):
    #     base_case_num = case.split(".txt")[0]
    #     candidate_cases = data_df[data_df['source'] == case]['candidates'].values[0]
    #     for cand_case in candidate_cases:
    #         cand_case_data = processed_file_dict[cand_case]['processed_document']
    #         cand_num = cand_case.split(".txt")[0]
    #         dict_ = { "id": f"{base_case_num}_candidate{cand_num}.txt_task2", "contents": cand_case_data}
    #         with jsonlines.open(f"{tmp_dir}/candidate.jsonl", mode="a") as writer:
    #             writer.write(dict_)

    for case in tqdm(file_list):
        dict_  = { "id": f"{case}", "contents": processed_file_dict[case]['processed_document']}
        with jsonlines.open(f"{tmp_dir}/candidate.jsonl", mode="a") as writer:
            writer.write(dict_)

    subprocess.run(["/home/s2310409/miniconda3/envs/coliee-24/bin/python", "-m", "pyserini.index", "-collection", "JsonCollection",
                    "-generator", "DefaultLuceneDocumentGenerator", "-threads", "1", "-input",
                    f"{tmp_dir}", "-index", f"{indexes_dir}", "-storePositions", "-storeDocvectors",
                    "-storeRaw"])
    
create_bm25_indexes(segment="dev")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 505/505 [00:01<00:00, 406.41it/s]


2024-01-13 14:01:32,202 INFO  [main] index.IndexCollection (IndexCollection.java:391) - Setting log level to INFO
2024-01-13 14:01:32,203 INFO  [main] index.IndexCollection (IndexCollection.java:394) - Starting indexer...
2024-01-13 14:01:32,203 INFO  [main] index.IndexCollection (IndexCollection.java:396) - DocumentCollection path: tmp
2024-01-13 14:01:32,203 INFO  [main] index.IndexCollection (IndexCollection.java:397) - CollectionClass: JsonCollection
2024-01-13 14:01:32,203 INFO  [main] index.IndexCollection (IndexCollection.java:398) - Generator: DefaultLuceneDocumentGenerator
2024-01-13 14:01:32,204 INFO  [main] index.IndexCollection (IndexCollection.java:399) - Threads: 1
2024-01-13 14:01:32,204 INFO  [main] index.IndexCollection (IndexCollection.java:400) - Language: en
2024-01-13 14:01:32,204 INFO  [main] index.IndexCollection (IndexCollection.java:401) - Stemmer: porter
2024-01-13 14:01:32,204 INFO  [main] index.IndexCollection (IndexCollection.java:402) - Keep stopwords? fal

In [37]:
from pyserini.search import LuceneSearcher
from collections import defaultdict

def predict_bm25(searcher, query, case):
    bm25_score = defaultdict(lambda: 0)
    hits = searcher.search(query, k=10000)
    for hit in hits:
        if hit.docid != case:
            bm25_score[hit.docid] = max(hit.score, bm25_score[hit.docid])
    return bm25_score

def predict_all_bm25(bm25_index_path, eval_segment="test",
                     k1=None, b=None, topk=None):
    searcher = LuceneSearcher(bm25_index_path)
    if k1 and b:
        print(f"k1: {k1}, b: {b}")
        searcher.set_bm25(k1, b)

    with open(f'dataset/{eval_segment}.json', 'r') as fp:
        data_dict = json.load(fp)

    # file_list = [f for f in os.listdir(f'dataset/c2023/{eval_segment}_files') if f.endswith('.txt')]
    file_list = list(data_dict.keys())
    file_list = sorted(file_list)

    bm25_scores = {}

    for case in tqdm(file_list):
        query = get_query(case)
        try:
            score = predict_bm25(searcher, query, case)
        except:
            print(f"Error: {case}")
            raise Exception
        if topk is not None:
            sorted_score = sorted(score.items(), key=lambda x: x[1], reverse=True)[:topk]
            score = {x[0]: x[1] for x in sorted_score}
        bm25_scores[case] = score

    # dataset_path = "/home/thanhtc/mnt/datasets/COLIEE2023/Task2/data_org"
    # corpus_dir, cases_dir, _ = get_task2_data(dataset_path, eval_segment)
    # bm25_scores = {}
    # for case in cases_dir:
    #     base_case_data = preprocess_case_data(corpus_dir / case / "entailed_fragment.txt")
    #     score = predict_bm25(searcher, base_case_data, case)
    #     if topk is not None:
    #         sorted_score = sorted(score.items(), key=lambda x: x[1], reverse=True)[:topk]
    #         score = {x[0]: x[1] for x in sorted_score}
    #     bm25_scores[case] = score
    return bm25_scores

In [38]:
from pygaggle.rerank.base import Query, Text
from pygaggle.rerank.transformer import MonoT5
from utils.misc import load_json
from transformers import AutoConfig, AutoTokenizer, AutoModel, \
    T5ForConditionalGeneration

def predict_monot5(reranker, doc, candidate_cases, config):
    if config["train_uncased"]:
        doc = doc.lower()
    query = Query(doc)
    texts = []

    for i, cand_case in enumerate(candidate_cases):
        cand_case_data = get_summary(cand_case)
        texts.append(Text(cand_case_data, metadata={"docid": cand_case}))

    monot5_score = defaultdict(lambda: 0)
    result = reranker.rerank(query, texts)
    for c in result:
        cand_id = c.metadata["docid"]
        monot5_score[cand_id] = max(
            monot5_score[cand_id], np.exp(c.score) * 100)
    return monot5_score

def predict_all_monot5(ckpt_path, candidate_dict, eval_segment="test"):
    device = torch.device("cuda")
    model = T5ForConditionalGeneration.from_pretrained(ckpt_path).to(device).eval()
    reranker = MonoT5(model=model)

    if "ckpt" not in ckpt_path:
        config = {"train_uncased": False}
    else:
        # root_dir = ckpt_path.split("ckpt")[0]
        # config_path = os.path.join(root_dir, "train_configs.json")
        config_path = "configs/monot5-large-10k_hns.json"
        config = load_json(config_path)

    with open(f'dataset/{eval_segment}.json', 'r') as fp:
        data_dict = json.load(fp)

    # file_list = [f for f in os.listdir(f'dataset/c2023/{eval_segment}_files') if f.endswith('.txt')]
    file_list = list(data_dict.keys())
    file_list = sorted(file_list)

    # with open('dataset/c2023/bm25_candidates_test.json', 'r') as fp:
    #     candidate_dict = json.load(fp)

    monot5_scores = {}
    for case in tqdm(file_list):
        query = get_query(case)
        candidates = candidate_dict[case]
        try:
            score = predict_monot5(reranker, query, candidates, config)
        except:
            print(f"Error: {case}")
            raise Exception
        monot5_scores[case] = score

    # corpus_dir, cases_dir, _ = get_task2_data(dataset_path, eval_segment)
    # monot5_scores = {}
    # for case in cases_dir:
    #     base_case_data = preprocess_case_data(
    #         corpus_dir / case / "entailed_fragment.txt", uncased=config["train_uncased"])

    #     candidate_dir = corpus_dir / case / "paragraphs"
    #     score = predict_monot5(reranker, base_case_data, candidate_dir, config)
    #     monot5_scores[case] = score
    return monot5_scores

In [None]:
def eval_bm25_end_model_ranking(bm25_scores, scores, candidate_dict, top_k=1, margin=0, alpha=1,
                                eval_segment="test"):
    print(f"\n[{eval_segment}] k: {top_k} - margin: {margin} - alpha: {alpha}")

    with open(f'dataset/{eval_segment}.json', 'r') as fp:
        data_dict = json.load(fp)
    
    # file_list = [f for f in os.listdir(f'dataset/c2023/{eval_segment}_files') if f.endswith('.txt')]
    file_list = list(data_dict.keys())
    file_list = sorted(file_list)

    # with open('dataset/c2023/bm25_candidates_test.json', 'r') as fp:
    #     candidate_dict = json.load(fp)

    with open(f'dataset/{eval_segment}.json', 'r') as fp:
        label_data = json.load(fp)

    tp, fp, fn = 0, 0, 0
    for case in label_data.keys():
        bm25_score = bm25_scores[case]
        score = scores[case]
        candidate_cases = candidate_dict[case]
        final_score = []
        for cand_case in candidate_cases:
            if alpha == 1:
                if cand_case not in bm25_score:
                    final_score.append(0)
                else:
                    final_score.append(score[cand_case])
            else:
                final_score.append(alpha * score[cand_case] + 
                                   (1 - alpha) * bm25_score.get(cand_case, 0))

        label = [1 if f in label_data[case] else 0 for f in candidate_cases]
        top_ind = np.argsort(final_score)[-top_k:]
        pred_ind = [top_ind[-1]]
        for i in top_ind[:-1]:
            if final_score[top_ind[-1]] - final_score[i] < margin:
                pred_ind.append(i)
        pred = np.zeros_like(label)
        pred[pred_ind] = 1

        tp += np.sum([1 if a == b and a == 1 else 0 for a, b in zip(pred, label)])
        fp += np.sum([1 if a != b and a == 1 else 0 for a, b in zip(pred, label)])
        fn += np.sum([1 if a != b and a == 0 else 0 for a, b in zip(pred, label)])

    p = tp / (tp + fp)
    r = tp / (tp + fn)
    f1 = 2 * ((p * r) / (p + r))

    # print(f"[{eval_segment}] Metrics: {[f1, p, r]} - {[top_k, margin, alpha]}")
    return [f1, p, r]

def eval_bm25_end_model(bm25_index_path, candidate_dict, ckpt_path=None, top_k=None, margin=None, alpha=None,
                        eval_segment="test", model_class="monot5"):
    bm25_scores = predict_all_bm25(bm25_index_path, eval_segment)
    if model_class == "monot5":
        predict_func = predict_all_monot5
    else:
        raise ValueError(model_class)

    scores = predict_func(ckpt_path, candidate_dict, eval_segment)
    if top_k is None:
        list_k = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        list_margin = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        list_alpha = [0.5, 0.6, 0.7, 0.8, 0.9, 1]

        best_metric = [0, 0, 0]
        best_config = []

        for k in list_k:
            for margin in list_margin:
                for alpha in list_alpha:
                    res = eval_bm25_end_model_ranking(bm25_scores, scores, candidate_dict, k, margin, alpha, eval_segment)
                    if res[0] > best_metric[0]:
                        best_metric = res
                        best_config = [k, margin, alpha]
        print(f"[{eval_segment}] Best metrics: {best_metric} - {best_config}")

        if eval_segment == "dev":
            bm25_test_index_path = "bm25/test"
            bm25_test_scores = predict_all_bm25(bm25_test_index_path, eval_segment="test")
            with open('dataset/c2023/bm25_candidates_test_50.json', 'r') as fp:
                test_candidate_dict = json.load(fp)

            test_scores = predict_func(ckpt_path, test_candidate_dict, eval_segment="test")
            test_metric = eval_bm25_end_model_ranking(bm25_test_scores, test_scores, test_candidate_dict,
                best_config[0], best_config[1], best_config[2], "test")
            print(f"[test] Best metrics: {test_metric} - {best_config}")
            return best_metric, test_metric, best_config

        return best_metric, best_config
    else:
        return eval_bm25_end_model_ranking(bm25_scores, scores, candidate_dict, top_k, margin, alpha, eval_segment)

In [19]:
bm25_index_path='bm25/test'
ckpt_path='train_logs/monot5-large-10k_hns/ckpt/checkpoint-521'
top_k=2
margin=2
alpha=0.7
eval_segment='test'
model_class='monot5'

bm25_scores = predict_all_bm25(bm25_index_path, eval_segment)
scores = predict_all_monot5(ckpt_path, eval_segment)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1335/1335 [03:47<00:00,  5.87it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1335/1335 [36:30<00:00,  1.64s/it]


In [26]:
eval_bm25_end_model_ranking(bm25_scores, scores, top_k, margin, alpha, eval_segment)


[test] k: 2 - margin: 2 - alpha: 0.7
[test] Metrics: [0.19759450171821305, 0.19658119658119658, 0.19861830742659758] - [2, 2, 0.7]


[0.19759450171821305, 0.19658119658119658, 0.19861830742659758]

In [53]:
with open('dataset/c2023/bm25_candidates_dev_50.json', 'r') as fp:
    candidate_dict = json.load(fp)

eval_bm25_end_model(
    bm25_index_path='bm25/dev',
    candidate_dict=candidate_dict,
    ckpt_path='train_logs/monot5-large-10k_hns/ckpt/checkpoint-938',
    eval_segment='dev',
    model_class='monot5'
)

  0%|                                                                                                                         | 0/96 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 96/96 [00:06<00:00, 13.87it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 96/96 [02:43<00:00,  1.70s/it]



[dev] k: 1 - margin: 0 - alpha: 0.5
[dev] Metrics: [0.21809744779582366, 0.4895833333333333, 0.14029850746268657] - [1, 0, 0.5]

[dev] k: 1 - margin: 0 - alpha: 0.6
[dev] Metrics: [0.23201856148491876, 0.5208333333333334, 0.14925373134328357] - [1, 0, 0.6]

[dev] k: 1 - margin: 0 - alpha: 0.7
[dev] Metrics: [0.24129930394431554, 0.5416666666666666, 0.15522388059701492] - [1, 0, 0.7]

[dev] k: 1 - margin: 0 - alpha: 0.8
[dev] Metrics: [0.24129930394431554, 0.5416666666666666, 0.15522388059701492] - [1, 0, 0.8]

[dev] k: 1 - margin: 0 - alpha: 0.9
[dev] Metrics: [0.24129930394431554, 0.5416666666666666, 0.15522388059701492] - [1, 0, 0.9]

[dev] k: 1 - margin: 0 - alpha: 1
[dev] Metrics: [0.12064965197215777, 0.2708333333333333, 0.07761194029850746] - [1, 0, 1]

[dev] k: 1 - margin: 1 - alpha: 0.5
[dev] Metrics: [0.21809744779582366, 0.4895833333333333, 0.14029850746268657] - [1, 1, 0.5]

[dev] k: 1 - margin: 1 - alpha: 0.6
[dev] Metrics: [0.23201856148491876, 0.5208333333333334, 0.14925

  0%|                                                                                                                        | 0/319 [00:00<?, ?it/s]

[dev] Metrics: [0.3290155440414508, 0.2906178489702517, 0.37910447761194027] - [5, 6, 0.6]

[dev] k: 5 - margin: 6 - alpha: 0.7
[dev] Metrics: [0.3384615384615384, 0.2966292134831461, 0.3940298507462687] - [5, 6, 0.7]

[dev] k: 5 - margin: 6 - alpha: 0.8
[dev] Metrics: [0.33541927409261574, 0.28879310344827586, 0.4] - [5, 6, 0.8]

[dev] k: 5 - margin: 6 - alpha: 0.9
[dev] Metrics: [0.3165644171779141, 0.26875, 0.3850746268656716] - [5, 6, 0.9]

[dev] k: 5 - margin: 6 - alpha: 1
[dev] Metrics: [0.2576687116564417, 0.21875, 0.31343283582089554] - [5, 6, 1]

[dev] k: 5 - margin: 7 - alpha: 0.5
[dev] Metrics: [0.3311603650586701, 0.29398148148148145, 0.37910447761194027] - [5, 7, 0.5]

[dev] k: 5 - margin: 7 - alpha: 0.6
[dev] Metrics: [0.327319587628866, 0.28798185941043086, 0.37910447761194027] - [5, 7, 0.6]

[dev] k: 5 - margin: 7 - alpha: 0.7
[dev] Metrics: [0.337992376111817, 0.2942477876106195, 0.3970149253731343] - [5, 7, 0.7]

[dev] k: 5 - margin: 7 - alpha: 0.8
[dev] Metrics: [0.3

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 319/319 [00:53<00:00,  5.99it/s]
 40%|███████████████████████████████████████████▊                                                                  | 127/319 [03:28<05:16,  1.65s/it]