In [1]:
import os
os.environ['GROQ_API_KEY'] = open('api_key.txt', 'r').read()

In [2]:
from search_engine_versions.bm25_abstract_only import SearchEngine as bm25_abstract_only_search_engine
from search_engine_versions.semantic_abstract_only import SearchEngine as semantic_abstract_only_search_engine
from search_engine_versions.hybrid_abstract_only import SearchEngine as hybrid_abstract_only_search_engine
from search_engine_versions.max_sim_per_context_sentences_abstract_only import SearchEngine as max_sim_per_context_sentences_abstract_only_search_engine
from search_engine_versions.max_sim_cross_context_sentences_abstract_only import SearchEngine as max_sim_cross_context_sentences_abstract_only_search_engine
from search_engine_versions.max_sim_per_context_chunks_abstract_only import SearchEngine as max_sim_per_context_chunks_abstract_only_search_engine
from search_engine_versions.max_sim_cross_context_chunks_abstract_only import SearchEngine as max_sim_cross_context_chunks_abstract_only_search_engine
from search_engine_versions.bm25_title_only import SearchEngine as bm25_title_only_search_engine
from search_engine_versions.bm25_abstract_and_title import SearchEngine as bm25_title_and_abstract_search_engine
from search_engine_versions.semantic_abstract_and_title import SearchEngine as semantic_title_and_abstract_search_engine
from search_engine_versions.hybrid_abstract_and_title import SearchEngine as hybrid_title_and_abstract_search_engine
from search_engine_versions.max_sim_coss_context_chunks_abstract_and_title import SearchEngine as max_sim_cross_context_chunks_title_and_abstract_search_engine
from search_engine_versions.max_sim_per_context_chunks_abstract_and_title import SearchEngine as max_sim_per_context_chunks_title_and_abstract_search_engine
from search_engine_versions.max_sim_cross_context_sentences_abstract_and_title import SearchEngine as max_sim_cross_context_sentences_title_and_abstract_search_engine
from search_engine_versions.max_sim_per_context_sentences_abstract_and_title import SearchEngine as max_sim_per_context_sentences_title_and_abstract_search_engine
# ...

from query_evaluator import evaluate_articles_boolean, evaluate_articles_levels
from query_generator import generate_and_save_queries
import numpy as np
import pandas as pd
import json


# For queries gen
# path = './app/data/example.json'
# with open(path, 'r', encoding='utf-8') as f:
#     data = f.readlines()
#     data = [json.loads(line) for line in data]
# generate_and_save_queries(data, num_queries=100, output_path='./app/data/queries.json')

DATA_DIR = "./data/"
DATA_FILES = ["example.json"]
SPLIT_SIZE_LIMIT = 100

search_engines = {}
'''
Alguns dos nossos modelos serão avaliados utilizando: 
    1. Apenas o abstract como informação para o vespa. O título de um dos
        documentos servirá como query
    2. O título e o abstract como informação para o vespa. Vamos utilizar
        queries geradas por LLM.
    3. Apenas o título como informação para o vespa. O título de um dos
        documentos servirá como query. Esse caso só será avaliado com o BM25.
Para facilitar a execução dos experimentos, criamos um dicionário com os 
modelos que serão avaliados em cada caso.
'''
models = {
    'title_query': [
        "bm25_abstract_only",
        "semantic_abstract_only",
        "hybrid_abstract_only",
        "max_sim_per_context_sentences_abstract_only",
        "max_sim_cross_context_sentences_abstract_only",
        "max_sim_per_context_chunks_abstract_only",
        "max_sim_cross_context_chunks_abstract_only",
    ],
    'llm_query': [
        "bm25_title_only",
        "bm25_title_and_abstract",
        "semantic_title_and_abstract",
        "hybrid_title_and_abstract",
        "max_sim_per_context_sentences_title_and_abstract",
        "max_sim_cross_context_sentences_title_and_abstract",
        "max_sim_per_context_chunks_title_and_abstract",
        "max_sim_cross_context_chunks_title_and_abstract",
    ]
}
search_engines_dict = {
        # "bm25_abstract_only": bm25_abstract_only_search_engine,
        # "semantic_abstract_only": semantic_abstract_only_search_engine,
        # "hybrid_abstract_only": hybrid_abstract_only_search_engine,
        # "max_sim_per_context_sentences_abstract_only": max_sim_per_context_sentences_abstract_only_search_engine,
        # "max_sim_cross_context_sentences_abstract_only": max_sim_cross_context_sentences_abstract_only_search_engine,
        # "max_sim_per_context_chunks_abstract_only": max_sim_per_context_chunks_abstract_only_search_engine,
        # "max_sim_cross_context_chunks_abstract_only": max_sim_cross_context_chunks_abstract_only_search_engine,
        "bm25_title_only": bm25_title_only_search_engine,
        "bm25_title_and_abstract": bm25_title_and_abstract_search_engine,
        "semantic_title_and_abstract": semantic_title_and_abstract_search_engine,
        "hybrid_title_and_abstract": hybrid_title_and_abstract_search_engine,
        "max_sim_per_context_sentences_title_and_abstract": max_sim_per_context_sentences_title_and_abstract_search_engine,
        "max_sim_cross_context_sentences_title_and_abstract": max_sim_cross_context_sentences_title_and_abstract_search_engine,
        "max_sim_per_context_chunks_title_and_abstract": max_sim_per_context_chunks_title_and_abstract_search_engine,
        "max_sim_cross_context_chunks_title_and_abstract": max_sim_cross_context_chunks_title_and_abstract_search_engine,
}

from sheets_connection import SaveMetrics

save_metrics = SaveMetrics()


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def average_precision(relevances):
    relevant = np.where(relevances == 1)[0] + 1  # get positions of relevant docs, +1 because index is zero-based
    if len(relevant) == 0:
        return 0
    precision_at_k = np.cumsum(relevances) / (np.arange(len(relevances)) + 1)
    return np.sum(precision_at_k[relevances == 1]) / len(relevant)

def dcg(relevances):
    k = len(relevances)
    relevances = np.array(relevances, dtype=float)
    if relevances.size:
        return relevances[0] + np.sum(relevances[1:] / np.log2(np.arange(2, relevances.size + 1)))
    return 0

def ndcg(relevances):
    k = len(relevances)
    dcg_val = dcg(relevances)
    ideal_relevances = sorted(relevances, reverse=True)
    idcg_val = dcg(ideal_relevances)
    return dcg_val / idcg_val if idcg_val > 0 else 0

with open(DATA_DIR + "queries.json") as f:
    queries = f.readlines()
    queries = [json.loads(line) for line in queries]


def test_search_engine(SearchEngine, name):
    search_engine = SearchEngine()
    search_engine.feed_json(DATA_DIR, DATA_FILES, SPLIT_SIZE_LIMIT)
    search_engines[name] = search_engine

    aps = {}
    ndcgs_levels = {}
    ndcgs_boolean = {}

    for query_obj in queries[:10]:
        query = query_obj['query']
        search_results = search_engine.search(query, 10)
        search_results.rename(columns={'body': 'abstract'}, inplace=True)

        evaluated_results_boolean = evaluate_articles_boolean(search_results.to_dict(orient="records"), query, verbose=False)
        evaluated_results_boolean = pd.DataFrame(evaluated_results_boolean)
        evaluated_results_boolean['eval'] = pd.to_numeric(evaluated_results_boolean['eval'])
        print(evaluated_results_boolean)

        evaluated_results_levels = evaluate_articles_levels(search_results.to_dict(orient="records"), query, verbose=False)
        evaluated_results_levels = pd.DataFrame(evaluated_results_levels)
        evaluated_results_levels['eval'] = pd.to_numeric(evaluated_results_levels['eval'])
        print(evaluated_results_levels)

        aps[query_obj['id']] = average_precision(evaluated_results_boolean['eval'])
        ndcgs_levels[query_obj['id']] = ndcg(evaluated_results_levels['eval'])
        ndcgs_boolean[query_obj['id']] = ndcg(evaluated_results_boolean['eval'])
    
    metrics = {
        'aps': aps,
        'ndcgs_levels': ndcgs_levels,
        'ndcgs_boolean': ndcgs_boolean
    }
    return metrics

def get_search_engine_results(SearchEngine, name):
    search_engine = SearchEngine()
    search_engine.feed_json(DATA_DIR, DATA_FILES, SPLIT_SIZE_LIMIT)
    search_engines[name] = search_engine

    # Cria o arquivo com o nome da search engine
    with open(f"{DATA_DIR}search_engine_results/{name}.json", "w") as f:
        for query_obj in queries[:20]:
            query = query_obj['query']
            search_results = search_engine.search(query, 10)
            search_results.rename(columns={'body': 'abstract'}, inplace=True)
            # f.write(query)
            # f.write("\n")
            f.write(search_results.to_json(orient="records"))
            f.write("\n")


def evaluate_and_save_metrics_from_json(search_results, name):
    for i, query_obj in enumerate(queries[:20]):
        query = query_obj['query']
        search_result = search_results[i]

        evaluated_results_boolean = evaluate_articles_boolean(search_result, query, verbose=False)
        evaluated_results_boolean = pd.DataFrame(evaluated_results_boolean)
        evaluated_results_boolean['eval'] = pd.to_numeric(evaluated_results_boolean['eval'])
        print(evaluated_results_boolean)

        evaluated_results_levels = evaluate_articles_levels(search_result, query, verbose=False)
        evaluated_results_levels = pd.DataFrame(evaluated_results_levels)
        evaluated_results_levels['eval'] = pd.to_numeric(evaluated_results_levels['eval'])
        print(evaluated_results_levels)

        # Avarage Precision
        range_binary_map_llm = save_metrics.set_metric_of_query_and_model(
            type_of_metric='binary_map_llm',
            query=query_obj['id'],
            model_name=name,
            result=average_precision(evaluated_results_boolean['eval'])
        )
        print(f"Resultado salvo para o modelo {name} e query {query_obj['id']} no local {range_binary_map_llm}")
        # NDCG Boolean
        range_binary_ndcg_llm = save_metrics.set_metric_of_query_and_model(
            type_of_metric='binary_ndcg_llm',
            query=query_obj['id'],
            model_name=name,
            result=ndcg(evaluated_results_boolean['eval'])
        )
        print(f"Resultado salvo para o modelo {name} e query {query_obj['id']} no local {range_binary_ndcg_llm}")
        # NDCG Levels
        range_4_levels_ndcg_llmm = save_metrics.set_metric_of_query_and_model(
            type_of_metric='4_levels_ndcg_llm',
            query=query_obj['id'],
            model_name=name,
            result=ndcg(evaluated_results_levels['eval'])
        )
        print(f"Resultado salvo para o modelo {name} e query {query_obj['id']} no local {range_4_levels_ndcg_llmm}")


In [4]:

for name, _ in search_engines_dict.items():
    try:
        with open(f"{DATA_DIR}{name}.json", "r") as f:
            search_results = f.readlines()
            search_results = [json.loads(line) for line in search_results]
        print('Começando a rodar para: ', name)
        evaluate_and_save_metrics_from_json(search_results, name)
        print('Terminou de rodar para: ', name)
    except Exception as e:
        print(f"Provavelmente você n tem o arquivo: ./data/{name}.json")
        print(e)
        continue


Começando a rodar para:  bm25_title_only


Evaluating articles:   0%|          | 0/10 [00:00<?, ?it/s]




Provavelmente você n tem o arquivo: ./data/bm25_title_only.json
'abstract'
Começando a rodar para:  bm25_title_and_abstract


Evaluating articles:  10%|█         | 1/10 [00:00<00:05,  1.77it/s]

id: 0704.0001


Evaluating articles:  20%|██        | 2/10 [00:00<00:03,  2.08it/s]

id: 0704.0060


Evaluating articles:  30%|███       | 3/10 [00:01<00:03,  2.28it/s]

id: 0704.0031


Evaluating articles:  40%|████      | 4/10 [00:01<00:02,  2.41it/s]

id: 0704.0092


Evaluating articles:  50%|█████     | 5/10 [00:02<00:02,  2.08it/s]

id: 0704.0055


Evaluating articles:  60%|██████    | 6/10 [00:02<00:01,  2.24it/s]

id: 0704.0056


Evaluating articles:  70%|███████   | 7/10 [00:03<00:01,  2.31it/s]

id: 0704.0077


Evaluating articles:  80%|████████  | 8/10 [00:04<00:01,  1.70it/s]

id: 0704.0093


Evaluating articles:  90%|█████████ | 9/10 [00:04<00:00,  1.64it/s]

id: 0704.0044


Evaluating articles: 100%|██████████| 10/10 [00:08<00:00,  1.19it/s]


id: 0704.0020
          id                                              title  \
0  0704.0001  Calculation of prompt diphoton production cros...   
1  0704.0060  Coulomb excitation of unstable nuclei at inter...   
2  0704.0031  Crystal channeling of LHC forward protons with...   
3  0704.0092  Energy density for chiral lattice fermions wit...   
4  0704.0055  Potassium intercalation in graphite: A van der...   
5  0704.0056            Phase diagram of Gaussian-core nematics   
6  0704.0077       Universal Forces and the Dark Energy Problem   
7  0704.0093  Aspects of Electron-Phonon Self-Energy Reveale...   
8  0704.0044  Astrophysical gyrokinetics: kinetic and fluid ...   
9  0704.0020  Measurement of the Hadronic Form Factor in D0 ...   

                                            abstract  eval  
0    A fully differential calculation in perturba...     1  
1    We investigate the Coulomb excitation of low...     0  
2    We show that crystal can trap a broad (x, x'...     1  
3   

Evaluating articles:  50%|█████     | 5/10 [00:52<00:51, 10.29s/it]

Error when parsing feedback: Expecting value: line 1 column 1 (char 0)
Error when parsing feedback: Expecting value: line 1 column 1 (char 0)


Evaluating articles:  80%|████████  | 8/10 [01:34<00:24, 12.10s/it]

Error when parsing feedback: Expecting value: line 1 column 1 (char 0)


Evaluating articles: 100%|██████████| 10/10 [02:10<00:00, 13.09s/it]


                                               title  \
0  Calculation of prompt diphoton production cros...   
1  Coulomb excitation of unstable nuclei at inter...   
2  Crystal channeling of LHC forward protons with...   
3  Energy density for chiral lattice fermions wit...   
4  Potassium intercalation in graphite: A van der...   
5            Phase diagram of Gaussian-core nematics   
6       Universal Forces and the Dark Energy Problem   
7  Aspects of Electron-Phonon Self-Energy Reveale...   
8  Astrophysical gyrokinetics: kinetic and fluid ...   
9  Measurement of the Hadronic Form Factor in D0 ...   

                                            abstract  eval  
0    A fully differential calculation in perturba...     3  
1    We investigate the Coulomb excitation of low...     1  
2    We show that crystal can trap a broad (x, x'...     1  
3    We study a recently proposed formulation of ...     0  
4    Potassium intercalation in graphite is inves...     0  
5    We study a s

Evaluating articles:  10%|█         | 1/10 [00:00<00:03,  2.51it/s]

id: 0704.0002


Evaluating articles:  20%|██        | 2/10 [00:03<00:18,  2.26s/it]

id: 0704.0010


Evaluating articles:  30%|███       | 3/10 [00:07<00:20,  2.90s/it]

id: 0704.0098


Evaluating articles:  40%|████      | 4/10 [00:10<00:16,  2.82s/it]

id: 0704.0049


Evaluating articles:  50%|█████     | 5/10 [00:14<00:16,  3.21s/it]

id: 0704.0062


Evaluating articles:  60%|██████    | 6/10 [00:19<00:15,  3.90s/it]

id: 0704.0072


Evaluating articles:  70%|███████   | 7/10 [00:23<00:11,  3.89s/it]

id: 0704.0079


Evaluating articles:  80%|████████  | 8/10 [00:27<00:07,  3.90s/it]

id: 0704.0029


Evaluating articles:  90%|█████████ | 9/10 [00:28<00:03,  3.18s/it]

id: 0704.0054


Evaluating articles: 100%|██████████| 10/10 [00:33<00:00,  3.37s/it]


id: 0704.0090
          id                                              title  \
0  0704.0002           Sparsity-certifying Graph Decompositions   
1  0704.0010  Partial cubes: structures, characterizations, ...   
2  0704.0098  Sparsely-spread CDMA - a statistical mechanics...   
3  0704.0049  An algorithm for the classification of smooth ...   
4  0704.0062  On-line Viterbi Algorithm and Its Relationship...   
5  0704.0072  The decomposition method and Maple procedure f...   
6  0704.0079  Operator algebras associated with unitary comm...   
7  0704.0029  Understanding the Flavor Symmetry Breaking and...   
8  0704.0054            The Hardy-Lorentz Spaces $H^{p,q}(R^n)$   
9  0704.0090          Real Options for Project Schedules (ROPS)   

                                            abstract  eval  
0    We describe a new algorithm, the $(k,\ell)$-...     1  
1    Partial cubes are isometric subgraphs of hyp...     0  
2    Sparse Code Division Multiple Access (CDMA),...     1  
3   

Evaluating articles: 100%|██████████| 10/10 [01:42<00:00, 10.29s/it]


                                               title  \
0           Sparsity-certifying Graph Decompositions   
1  Partial cubes: structures, characterizations, ...   
2  Sparsely-spread CDMA - a statistical mechanics...   
3  An algorithm for the classification of smooth ...   
4  On-line Viterbi Algorithm and Its Relationship...   
5  The decomposition method and Maple procedure f...   
6  Operator algebras associated with unitary comm...   
7  Understanding the Flavor Symmetry Breaking and...   
8            The Hardy-Lorentz Spaces $H^{p,q}(R^n)$   
9          Real Options for Project Schedules (ROPS)   

                                            abstract  eval  
0    We describe a new algorithm, the $(k,\ell)$-...     3  
1    Partial cubes are isometric subgraphs of hyp...     1  
2    Sparse Code Division Multiple Access (CDMA),...     1  
3    We present an algorithm that produces the cl...     1  
4    In this paper, we introduce the on-line Vite...     0  
5    In present p

Evaluating articles:  10%|█         | 1/10 [00:01<00:16,  1.87s/it]

id: 0704.0003


Evaluating articles:  20%|██        | 2/10 [00:03<00:13,  1.73s/it]

id: 0704.0077


Evaluating articles:  30%|███       | 3/10 [00:08<00:21,  3.06s/it]

id: 0704.0082


Evaluating articles:  40%|████      | 4/10 [00:13<00:22,  3.81s/it]

id: 0704.0094


Evaluating articles:  50%|█████     | 5/10 [00:18<00:22,  4.52s/it]

id: 0704.0057


Evaluating articles:  60%|██████    | 6/10 [00:23<00:17,  4.43s/it]

id: 0704.0045


Evaluating articles:  70%|███████   | 7/10 [00:30<00:16,  5.42s/it]

id: 0704.0044


Evaluating articles:  80%|████████  | 8/10 [00:33<00:09,  4.58s/it]

id: 0704.0070


Evaluating articles:  80%|████████  | 8/10 [00:37<00:09,  4.68s/it]


KeyboardInterrupt: 