In [1]:
import sys

sys.path.append("/home/jovyan/work/sem-covid/")
sys.path = list(set(sys.path))
import os

os.getcwd()
os.chdir('/home/jovyan/work/sem-covid/')

import pickle
from more_itertools import unique_everseen

import faiss
import pandas as pd
import numpy as np

from sem_covid.entrypoints.notebooks.legal_radar.services.split_documents_pipeline import DOCUMENT_ID_SOURCE
from sem_covid.services.store_registry import store_registry
from sem_covid.services.model_registry import embedding_registry
from sem_covid import config
from sem_covid.services.sc_wrangling.feature_selector import reduce_array_column

In [11]:
FAISS_BUCKET_NAME = 'faiss-index'
#FAISS_INDEX_FINREG_NAME = 'faiss_index_finreg.pkl'
FAISS_INDEX_FINREG_NAME = 'faiss_index_finreg'
FIN_REG_SPLITTED_ES_INDEX = 'ds_finreg_splitted'
DATES_DOCUMENT = 'dates_document'
HTML_LINKS = 'htmls_to_download'
DEFAULT_SEARCH = """The Semantic Interoperability Community develops solutions to help European public administrations perform seamless and meaningful cross-border and cross-domain data exchanges."""
TEXT_PIECE = 'text_piece'

In [3]:
def load_documents():
    """Read the data from ES."""
    es_store = store_registry.es_index_store()
    df = es_store.get_dataframe(index_name=config.EU_FINREG_CELLAR_ELASTIC_SEARCH_INDEX_NAME)
    df[DATES_DOCUMENT] = pd.to_datetime(df[DATES_DOCUMENT]).dt.date
    return df


def load_splitted_documents():
    """Read the data from ES."""
    es_store = store_registry.es_index_store()
    return es_store.get_dataframe(index_name=FIN_REG_SPLITTED_ES_INDEX)


def load_emb_model():
    return embedding_registry.sent2vec_universal_sent_encoding()


def load_faiss_index():
    """Load and deserialize the Faiss index."""
    minio_store = store_registry.minio_object_store(minio_bucket=FAISS_BUCKET_NAME)
    data = pickle.loads(minio_store.get_object(object_name=FAISS_INDEX_FINREG_NAME))
    return faiss.deserialize_index(data)


documents = load_documents()
model = load_emb_model()
splitted_documents = load_splitted_documents()
faiss_index = load_faiss_index()


100% (5791 of 5791) |####################| Elapsed Time: 0:00:04 Time:  0:00:04
100% (69866 of 69866) |##################| Elapsed Time: 0:00:24 Time:  0:00:24
INFO:absl:Using /tmp/tfhub_modules to cache modules.


In [16]:

def semantic_search(user_input: str):
    num_results = 100
    embeddings = model.encode(sentences=[user_input])
    D, I = faiss_index.search(np.array(embeddings).astype("float32"), k=num_results)
    document_parts = pd.DataFrame(splitted_documents.iloc[I.flatten().tolist()])
    document_parts['similarity'] = pd.Series(D.flatten().tolist()).values
    document_parts = document_parts.drop_duplicates(DOCUMENT_ID_SOURCE).reset_index(drop=True)
    documents_id = document_parts[DOCUMENT_ID_SOURCE].values
    result_documents = pd.DataFrame(documents.loc[documents_id]).reset_index(drop=True)
    result_documents['similarity'] = document_parts['similarity']
    result_documents['text_piece'] = document_parts['text_piece']

    return result_documents


In [5]:
# documents, celex number and url reference from sample question file
sample_questions = pd.read_csv('sem_covid/entrypoints/notebooks/legal_radar/docs/sample_questions_v4.csv')
sample_questions = sample_questions[sample_questions['Questions/Text Extracts'].notnull()]

In [6]:

def find_part_in_search_result(result_set: pd.DataFrame, reference_dataset_celex_number: str,
                               result_set_celex_number: str = 'celex_numbers') -> tuple:
    """Finds the position and the similarity of the documents parts from the result set

    Args:
        result_set (pd.DataFrame): the result dataset from semantic search execution
        reference_dataset_celex_number (str): celex numbers from test bed dataset
        result_set_celex_number (str, optional): [description]. Defaults to 'celex_numbers'.

    Returns:
        tuple: the position and the similarity from document part result set
    """
    reduced_array_dataset = reduce_array_column(result_set, result_set_celex_number).reset_index(drop=True)
    index = reduced_array_dataset[
        reduced_array_dataset[result_set_celex_number].isin([reference_dataset_celex_number])].index.to_list()
    position = reduced_array_dataset['text_piece'].loc[index].index.to_list()
    similarity = reduced_array_dataset['similarity'].apply(lambda x: 1 / (1 + x)).loc[index].to_list()

    return position, similarity


def find_document_in_search_result(result_set: pd.DataFrame, reference_dataset_celex_number: str,
                                   result_set_celex_number: str = 'celex_numbers') -> list:
    """Finds the position and the similarity of the documents from the result set

    Args:
        result_set (pd.DataFrame): the result dataset from semantic search execution
        reference_dataset_celex_number (str): celex numbers from test bed dataset
        result_set_celex_number (str, optional): [description]. Defaults to 'celex_numbers'.

    Returns:
        list: the position of the document from result set
    """
    reduced_array_dataset = reduce_array_column(result_set, result_set_celex_number).reset_index(drop=True)
    index = reduced_array_dataset[
        reduced_array_dataset[result_set_celex_number].isin([reference_dataset_celex_number])].index.to_list()
    position = reduced_array_dataset['title'].loc[index].index.to_list()

    return position


def evaluate_parts(test_bed: pd.DataFrame) -> list:
    """Executes each input query from the test bed dataset into semantic search and grabs the position and
        the similarity of the documents and documents' part of the result set.

    Args:
        test_bed (pd.DataFrame): test dataset with the input queries and comparable celex number

    Returns:
        list: the position and the similarity of the documents and documents' part
    """
    result = []
    for index, row in test_bed.iterrows():
        result_set = semantic_search(row['Questions/Text Extracts'])
        position_p, similarity = find_part_in_search_result(result_set, row['Celex No'])
        position_d = find_document_in_search_result(result_set, row['Celex No'])
        result.append({
            'position_part': position_p,
            'position_document': position_d,
            'similarity': similarity
        })

    return result


def merge_test_bed_with_result_set(test_bed: pd.DataFrame, result_set: list) -> pd.DataFrame:
    """Merge the test bed dataframe and the result set list into a single dataframe

    Args:
        test_bed (pd.DataFrame): test dataset with the input queries and comparable celex number 
        result_set (list): the result from evaluation part

    Returns:
        pd.DataFrame: merged dataframe from test bed and evaluation part
    """
    result = pd.DataFrame(result_set)
    result = result.assign(in_top_5_slices=result['position_part'].apply(lambda x: any(np.array(x) <= 5)),
                           in_top_10_slices=result['position_part'].apply(lambda x: any(np.array(x) <= 10)),
                           in_top_5_documents=result['position_document'].apply(
                               lambda x: any(np.array(x) <= 5)),
                           in_top_10_documents=result['position_document'].apply(
                               lambda x: any(np.array(x) <= 10)),
                           in_q3=result['similarity'].apply(lambda x: any(np.array(x) >= 0.75)))

    return pd.merge(test_bed, result, on=test_bed.index, how="inner")


In [7]:
evaluation = evaluate_parts(sample_questions)
result = merge_test_bed_with_result_set(sample_questions, evaluation)

In [8]:
result

Unnamed: 0,key_0,No,Questions/Text Extracts,Source Document,Article No,Celex No,Work Reference,ELI,position_part,position_document,similarity,in_top_5_slices,in_top_10_slices,in_top_5_documents,in_top_10_documents,in_q3
0,0,1,‘clearing’ means the process of establishing p...,REGULATION (EU) No 648/2012 OF THE EUROPEAN PA...,2(3),32012R0648,http://publications.europa.eu/resource/cellar/...,http://data.europa.eu/eli/reg/2012/648/oj,[27],[27],[0.4546839568469639],False,False,False,False,False
1,1,2,Incentives to promote the use of CCPs have not...,REGULATION (EU) No 648/2012 OF THE EUROPEAN PA...,Recital 13,32012R0648,http://publications.europa.eu/resource/cellar/...,http://data.europa.eu/eli/reg/2012/648/oj,[0],[0],[0.641506835303643],True,True,True,True,False
2,2,3,Ensuring that the clearing obligation reduces ...,REGULATION (EU) No 648/2012 OF THE EUROPEAN PA...,Recital 15,32012R0648,http://publications.europa.eu/resource/cellar/...,http://data.europa.eu/eli/reg/2012/648/oj,[0],[0],[0.659670742152862],True,True,True,True,False
3,3,4,In order to foster financial stability within ...,REGULATION (EU) No 648/2012 OF THE EUROPEAN PA...,Recital 23,32012R0648,http://publications.europa.eu/resource/cellar/...,http://data.europa.eu/eli/reg/2012/648/oj,[0],[0],[0.5587572455664014],True,True,True,True,False
4,4,5,\t\n\nIn order to allow for a comprehensive o...,REGULATION (EU) No 648/2012 OF THE EUROPEAN PA...,Recital 43,32012R0648,http://publications.europa.eu/resource/cellar/...,http://data.europa.eu/eli/reg/2012/648/oj,[0],[0],[0.5837262744871272],True,True,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
344,345,346,The person having suffered damage may bring hi...,Regulation (EC) No 864/2007 of the European Pa...,18,32007R0864,http://publications.europa.eu/resource/cellar/...,http://data.europa.eu/eli/reg/2007/864/oj,[1],[1],[0.5443353963542554],True,True,True,True,False
345,346,347,Where a person (the creditor) has a non-contra...,Regulation (EC) No 864/2007 of the European Pa...,19,32007R0864,http://publications.europa.eu/resource/cellar/...,http://data.europa.eu/eli/reg/2007/864/oj,[1],[1],[0.6015299322632307],True,True,True,True,False
346,347,348,"For the purposes of this Regulation, the habit...",Regulation (EC) No 864/2007 of the European Pa...,23.2,32007R0864,http://publications.europa.eu/resource/cellar/...,http://data.europa.eu/eli/reg/2007/864/oj,[0],[0],[0.5061504925302508],True,True,True,True,False
347,348,349,Where a State comprises several territorial un...,Regulation (EC) No 864/2007 of the European Pa...,25.1,32007R0864,http://publications.europa.eu/resource/cellar/...,http://data.europa.eu/eli/reg/2007/864/oj,[1],[1],[0.5840021464770055],True,True,True,True,False


In [9]:
EXPERIMENT_CONFIGS = [
    (1, 1),
    (2, 1),
    (5, 2),
    (10, 5),
    (20, 10),
    (50, 25),
    (100, 50)
]

In [15]:
es_store = store_registry.es_index_store()
documents = es_store.get_dataframe(index_name=config.EU_FINREG_CELLAR_ELASTIC_SEARCH_INDEX_NAME)
#documents = documents.head(10)
documents[DATES_DOCUMENT] = pd.to_datetime(documents[DATES_DOCUMENT]).dt.date
results = []
for split_window_size, split_window_step in EXPERIMENT_CONFIGS:
    fin_reg_es_index_name = '_'.join(map(str, (FIN_REG_SPLITTED_ES_INDEX, split_window_size, split_window_step)))
    faiss_index_finreg_name = '_'.join(
        map(str, (FAISS_INDEX_FINREG_NAME, split_window_size, split_window_step, '.pkl')))
    print(fin_reg_es_index_name, faiss_index_finreg_name)
    splitted_documents = es_store.get_dataframe(index_name=fin_reg_es_index_name)
    minio_store = store_registry.minio_object_store(minio_bucket=FAISS_BUCKET_NAME)
    data = pickle.loads(minio_store.get_object(object_name=faiss_index_finreg_name))
    faiss_index = faiss.deserialize_index(data)
    evaluation = evaluate_parts(sample_questions)
    results.append(merge_test_bed_with_result_set(sample_questions, evaluation))

100% (5791 of 5791) |####################| Elapsed Time: 0:00:04 Time:  0:00:04


ds_finreg_splitted_1_1 faiss_index_finreg_1_1_.pkl


100% (696 of 696) |######################| Elapsed Time: 0:00:00 Time:  0:00:00


['0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0bde6a1b44ce5c3e6fd84c58e8fbe9ace27a6f5f4c620c8d5f60e577c0e05663'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0c1996abe4d127f6b910c9e1c4b46fa90edfff6601b076236df4a5f1b066e3cb'
 '0c28564b3cd61ec2c897bd118b7e2db4d0e0ef7bbb37a324bee69a607eedb718'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915950cdb'
 '0c2affa5dcd380da7f39d5ee103607e1950a5912a22c8b984c0c2ab03dd537b3']
['0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b36

100% (733 of 733) |######################| Elapsed Time: 0:00:00 Time:  0:00:00


['0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0bde6a1b44ce5c3e6fd84c58e8fbe9ace27a6f5f4c620c8d5f60e577c0e05663'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915950cdb'
 '0c1996abe4d127f6b910c9e1c4b46fa90edfff6601b076236df4a5f1b066e3cb'
 '0c28564b3cd61ec2c897bd118b7e2db4d0e0ef7bbb37a324bee69a607eedb718']
['0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0bde6a1b44ce5c3e6fd84c58e8fbe9ace27a6f5f4c620

100% (361 of 361) |######################| Elapsed Time: 0:00:00 Time:  0:00:00


['0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0bde6a1b44ce5c3e6fd84c58e8fbe9ace27a6f5f4c620c8d5f60e577c0e05663'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915950cdb'
 '0c1996abe4d127f6b910c9e1c4b46fa90edfff6601b076236df4a5f1b066e3cb'
 '0c28564b3cd61ec2c897bd118b7e2db4d0e0ef7bbb37a324bee69a607eedb718']
['0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915950cdb'
 '0bf9397f8cc09647a33d1b23ab761d9ea814352d50c31

100% (143 of 143) |######################| Elapsed Time: 0:00:00 Time:  0:00:00


['0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bde6a1b44ce5c3e6fd84c58e8fbe9ace27a6f5f4c620c8d5f60e577c0e05663'
 '0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915950cdb'
 '0c1996abe4d127f6b910c9e1c4b46fa90edfff6601b076236df4a5f1b066e3cb'
 '0c2affa5dcd380da7f39d5ee103607e1950a5912a22c8b984c0c2ab03dd537b3'
 '0c28564b3cd61ec2c897bd118b7e2db4d0e0ef7bbb37a324bee69a607eedb718']
['0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8

100% (69 of 69) |########################| Elapsed Time: 0:00:00 Time:  0:00:00


['0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915950cdb'
 '0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0bde6a1b44ce5c3e6fd84c58e8fbe9ace27a6f5f4c620c8d5f60e577c0e05663'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0c2affa5dcd380da7f39d5ee103607e1950a5912a22c8b984c0c2ab03dd537b3'
 '0c1996abe4d127f6b910c9e1c4b46fa90edfff6601b076236df4a5f1b066e3cb'
 '0c28564b3cd61ec2c897bd118b7e2db4d0e0ef7bbb37a324bee69a607eedb718']
['0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b36

100% (26 of 26) |########################| Elapsed Time: 0:00:00 Time:  0:00:00


['0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0bde6a1b44ce5c3e6fd84c58e8fbe9ace27a6f5f4c620c8d5f60e577c0e05663'
 '0c1996abe4d127f6b910c9e1c4b46fa90edfff6601b076236df4a5f1b066e3cb'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0c2affa5dcd380da7f39d5ee103607e1950a5912a22c8b984c0c2ab03dd537b3'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915950cdb'
 '0c28564b3cd61ec2c897bd118b7e2db4d0e0ef7bbb37a324bee69a607eedb718']
ds_finreg_splitted_50_25 faiss_index_finreg_50_25_.pkl
['0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915

100% (14 of 14) |########################| Elapsed Time: 0:00:00 Time:  0:00:00


['0c26d7145a630b2b5abd1366f87e65ed161f8818f0aff06a3642ac53cf011735'
 '0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0c50d815fcd96032ea8193b19bbd7e1efd78cce79836390c98c4deddc1c0cb2f'
 '0bde6a1b44ce5c3e6fd84c58e8fbe9ace27a6f5f4c620c8d5f60e577c0e05663'
 '0c1996abe4d127f6b910c9e1c4b46fa90edfff6601b076236df4a5f1b066e3cb'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915950cdb'
 '0b851f858f6b2dda07973691d55c42078662737f5462c49f06ad10561e333350'
 '0c28564b3cd61ec2c897bd118b7e2db4d0e0ef7bbb37a324bee69a607eedb718'
 '0c2affa5dcd380da7f39d5ee103607e1950a5912a22c8b984c0c2ab03dd537b3']
['0bdbb8e97c0857c24437c61035e013bfad79d93de8b3616859cbbbb915950cdb'
 '0bde6a1b44ce5c3e6fd84c58e8fbe9ace27a6f5f4c620c8d5f60e577c0e05663'
 '0bc6b68e203413ca66815e465acda3f90fae1695b09c8bd51bbcd9311ad00764'
 '0bf9397f8cc09647a33d1b23ab761d9ea814352d50c3151534a1275a6926c2d0'
 '0c1996abe4d127f6b910c9e1c4b46fa90edfff6601b07

In [19]:
RESULT_COLUMNS = [
    'in_top_5_slices',	'in_top_10_slices',
    'in_top_5_documents',	'in_top_10_documents',	'in_q3'
]

In [22]:
for index in range(0,len(EXPERIMENT_CONFIGS)):
    print(EXPERIMENT_CONFIGS[index])
    for result_columns in RESULT_COLUMNS:
        print(results[index][result_columns].value_counts(normalize=True))


(1, 1)
False    1.0
Name: in_top_5_slices, dtype: float64
False    1.0
Name: in_top_10_slices, dtype: float64
False    1.0
Name: in_top_5_documents, dtype: float64
False    1.0
Name: in_top_10_documents, dtype: float64
False    1.0
Name: in_q3, dtype: float64
(2, 1)
False    1.0
Name: in_top_5_slices, dtype: float64
False    1.0
Name: in_top_10_slices, dtype: float64
False    1.0
Name: in_top_5_documents, dtype: float64
False    1.0
Name: in_top_10_documents, dtype: float64
False    1.0
Name: in_q3, dtype: float64
(5, 2)
False    1.0
Name: in_top_5_slices, dtype: float64
False    1.0
Name: in_top_10_slices, dtype: float64
False    1.0
Name: in_top_5_documents, dtype: float64
False    1.0
Name: in_top_10_documents, dtype: float64
False    1.0
Name: in_q3, dtype: float64
(10, 5)
False    1.0
Name: in_top_5_slices, dtype: float64
False    1.0
Name: in_top_10_slices, dtype: float64
False    1.0
Name: in_top_5_documents, dtype: float64
False    1.0
Name: in_top_10_documents, dtype: float64
