# Combine BioASQ with Our IR

In [1]:
import pandas as pd
import os
import json
import torch
import time
from opensearchpy import OpenSearch
from sentence_transformers import SentenceTransformer

import json
import time
from tqdm import tqdm  # Import tqdm for the progress bar

## Setting for queries

In [9]:
host = '3.23.103.76' #host = 'localhost' 
port = 9200
auth =('admin','IVIngi2024!') #auth = ('admin','admin') 
client_lexical = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
    timeout=500, 
    max_retries=1
    #connection_class=RequestsHttpConnection 
#    http_compress = True, # enables gzip compression for request bodies
#    use_ssl = False,
#   verify_certs = False,
#    ssl_assert_hostname = False,
#    ssl_show_warn = False
)
print("Connection opened...")

Connection opened...


In [10]:

query_text={
  "query": {
    "match": {
      "pmid": "15858239"
    }
  },
  "_source": ["text"]  
}

# Execute the query
response = client_lexical.search(
    index="medline-faiss-hnsw-lexical",
    body=query_text
)
print(response)

{'took': 1, 'timed_out': False, '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 15.348576, 'hits': [{'_index': 'medline-faiss-hnsw-lexical', '_id': '2289988', '_score': 15.348576, '_source': {'text': 'The role of ret gene in the pathogenesis of Hirschsprung disease. Hirschsprung disease is a congenital disorder with the incidence of 1 per 5000 live births, characterized by the absence of intestinal ganglion cells. In the etiology of Hirschsprung disease various genes play a role; these are: RET, EDNRB, GDNF, EDN3 and SOX10, NTN3, ECE1, Mutations in these genes may result in dominant, recessive or multifactorial patterns of inheritance. Diverse models of inheritance, co-existence of numerous genetic disorders and detection of numerous chromosomal aberrations together with involvement of various genes confirm the genetic heterogeneity of Hirschsprung disease. Hirschsprung disease might well serve as a mod

## Prepare data for generarive component

In [11]:
#Get text from our IR for given pmid

def get_document_text(pmid, client):
    # Format the query text with the correct PubMed ID
    query_text = {
        "query": {
            "match": {
                "pmid": pmid
            }
        },
        "_source": ["text"]
    }

    # Use the provided OpenSearch client to execute the query
    response = client.search(
        index="medline-faiss-hnsw-lexical",
        body=query_text
    )

    # Check if the response has hits and return the text
    if response['hits']['hits']:
        # Assuming that the first hit contains the relevant document
        return response['hits']['hits'][0]['_source']['text']
    else:
        # Return a default message or handle as needed
        return "No document found for PMID: {}".format(pmid)

In [5]:
#checking one example
pmid = "36237717"  # Example PubMed ID
document_text = get_document_text(pmid, client_lexical)
print(document_text)

Leigh Syndrome Mimicking Wernicke's Encephalopathy: A Case Report. Leigh syndrome or subacute necrotizing encephalomyelopathy is a rare, rapidly progressive neurodegenerative disorder. In general, symptoms such as shortness of breath and decreased cardiac function usually occur within 1 year of life. It is a serious disease with a mortality rate of 75% in 2-3 years. The cause of Leigh syndrome is DNA mutation. Approximately 75% of patients have nuclear DNA mutations while 25% have mitochondrial DNA mutations. Clinical symptoms vary depending on the affected brain area. Neuroimaging plays an important role in diagnosing patients with Leigh syndrome. Late-onset Leigh syndrome is rarer and progresses more slowly compared to the classic form. Here, we report a case of late-onset Leigh's syndrome mimicking Wernicke's encephalopathy.


In [7]:
json_structure = {
    "questions": [
        {
            "question": "Question here",
            "type": "summary",
            "exact_answer": ["Exact answer here"],
            "ideal_answer": "Ideal answer here",
            "abstracts": []
        }
    ]
}

In [6]:
"""def populate_abstracts(question, pmids, client):
   
    #Populate the 'abstracts' field for a given question with texts fetched using PubMed IDs. 
    #Args:
    #question (dict): The question dictionary to populate.
    #pmids (list of str): List of PubMed IDs.
    #client (OpenSearchClient): The OpenSearch client instance.
   
    # Clear existing abstracts (if this function is to be reused)
    question['abstracts'] = []

    for pmid in tqdm(pmids, desc="Fetching texts", leave=False):
        text = get_document_text(pmid, client)
        question['abstracts'].append({
            "id": pmid,
            "text": text
        })
"""
def populate_abstracts(question, pmids, client):
    # Initialize the list with None to reserve order
    abstracts = [None] * len(pmids)

    # Using tqdm to show progress for fetching document texts
    for index, pmid in enumerate(tqdm(pmids, desc="Fetching texts", leave=False)):
        text = get_document_text(pmid, client)
        abstracts[index] = {
            "id": pmid,
            "text": text
        }

    # Filter out None values in case any text could not be fetched
    question['abstracts'] = [abstract for abstract in abstracts if abstract]


In [7]:

import json
#evaluation_file = 'training12b_new.json'
#with open(evaluation_file, 'r') as f:
 #   data = json.load(f)

#print(len(data['questions']))
BioASQ_filepath = 'training12b_new.json'
#BioASQ_filepath = 'BioASQ_small.json'
def read_bioasq_file(filepath):
    """
    Read the BioASQ JSON file.
    
    Args:
    filepath (str): The path to the BioASQ file.
    
    Returns:
    list: A list of questions from the BioASQ file.
    """
    with open(filepath, 'r') as file:
        bioasq_data = json.load(file)
    return bioasq_data['questions']

In [20]:


def create_custom_json_structure(bioasq_questions, client):
    """
    Create a custom JSON structure based on BioASQ data.
    
    Args:
    bioasq_questions (list): List of questions from BioASQ data.
    client (OpenSearchClient): The OpenSearch client instance.
    
    Returns:
    dict: The custom JSON structure populated with abstracts.
    """
    json_structure = {"questions": []}

    for question in tqdm(bioasq_questions, desc="Processing questions"):
        # Create a new question entry
        new_question = {
            "id": question.get("id", "No ID"), 
            "question": question.get("body", "No question body"),
            "type": question.get("type", ""),
            "exact_answer": question.get("exact_answer", []),
            "ideal_answer": question.get("ideal_answer", "No ideal answer"),
            "abstracts": []
        }

        # Extract PubMed IDs from the 'documents' URLs
        pmids = [doc.split('/')[-1] for doc in question.get('documents', [])]
        
        # Populate abstracts
        populate_abstracts(new_question, pmids, client)
        
        # Add the populated question to the json structure
        json_structure['questions'].append(new_question)

    return json_structure

## Create custom BioASQ-ALL - with abstract's text from our IR for pmid from bioASQ

In [21]:
# Example usage
#bioasq_filepath = 'path_to_your_BioAsq.json'  # Set the path to your BioASQ file
start_time = time.time()

bioasq_questions = read_bioasq_file(BioASQ_filepath)
custom_json = create_custom_json_structure(bioasq_questions, client_lexical)

end_time = time.time()
elapsed_time = end_time - start_time
# Save the new JSON structure to a file
with open('custom_bioasq_output_ALL.json', 'w') as outfile:
    json.dump(custom_json, outfile, indent=4)

print(f"Time taken to create the custom JSON: {elapsed_time:.2f} seconds")

Processing questions: 100%|██████████| 5049/5049 [1:53:49<00:00,  1.35s/it]  


Time taken to create the custom JSON: 6829.41 seconds


## Create json file for BioASQ questions with our retrieval results - for 5 BioASW question with 5 or more abstracts


## Create custom BioASQ with only one unique Ideal answer

In [25]:

def process_ideal_answers(input_filepath, output_filepath):
    # Load the data from the input JSON file
    with open(input_filepath, 'r') as file:
        data = json.load(file)
    
    processed_questions = []

    for question in data['questions']:
        ideal_answers = question['ideal_answer']

        # Check if there's more than one ideal answer
        if isinstance(ideal_answers, list) and len(ideal_answers) > 1:
            # Use a set to identify unique answers
            unique_answers = set(ideal_answers)

            if len(unique_answers) == 1:
                # If all ideal answers are the same, keep one
                question['ideal_answer'] = unique_answers.pop()
            else:
                # If there are different ideal answers, skip this question
                continue
        # If there's only one ideal answer, or it has been processed to one, add to processed questions
        processed_questions.append(question)

    # Prepare the final structure with processed questions
    final_data = {'questions': processed_questions}

    # Save the processed data to a new JSON file
    with open(output_filepath, 'w') as outfile:
        json.dump(final_data, outfile, indent=4)

    print(f"Processed data has been saved to {output_filepath}")
    print("The number of question with one Ideal answer", len(processed_questions))

# Define file paths
input_filepath = 'custom_bioasq_output_ALL.json'
output_filepath = 'custom_bioasq_output_One_ideal.json'

# Process the file
process_ideal_answers(input_filepath, output_filepath)


Processed data has been saved to custom_bioasq_output_One_ideal.json
The number of question with one Ideal answer 3634


### <font color='red'>Kraj pripreme podataka za generativnu komponentu</font>

# Create subseto of BioASQ - question with 10 or more relevant abstract

In [32]:

def filter_questions_with_ten_or_more_documents(input_filepath, output_filepath):
    # Load the data from the BioASQ JSON file
    with open(input_filepath, 'r') as file:
        data = json.load(file)
    
    filtered_questions = []

    # Filter questions based on the number of documents
    for question in data['questions']:
        # Check if there are 10 or more documents
        if len(question['documents']) >= 10:
            filtered_questions.append(question)

    # Prepare the final structure with filtered questions
    final_data = {'questions': filtered_questions}
    # Save the filtered data to a new JSON file
    with open(output_filepath, 'w') as outfile:
        json.dump(final_data, outfile, indent=4)

    print(f"Filtered data has been saved to {output_filepath}")
    print("Number od questions with 10 or more abstracts:", len(filtered_questions))
# Define file paths
input_filepath = 'training12b_new.json'  # Set the correct path to your BioASQ file
output_filepath = 'BioASQ_10_or_more.json'

# Process the file
filter_questions_with_ten_or_more_documents(input_filepath, output_filepath)


Filtered data has been saved to BioASQ_10_or_more.json
Number od questions with 10 or more abstracts: 1787


In [8]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from qdrant_client.http.models import PointStruct
from qdrant_client.http import models


# 3.145.52.195
#client_semantic = QdrantClient(host, port=6333, timeout = 60)

verifai_ip='3.23.103.76'
qdrant_port=6333
TIMEOUT=60
url = f"https://{verifai_ip}:{qdrant_port}"
qdrant_api="8da7725d78141e19a9bf3d878f4cb333fedb56eed9727904b46ce4b32e1ce085"
client_semantic = QdrantClient(url=url, api_key=qdrant_api, timeout=TIMEOUT, https=True,**{'verify': False})
#client_semantic = QdrantClient(host, port=6333, timeout = 60)

# Select the type of lexical indexing: 

In [9]:
lexical_pmid = False

if lexical_pmid:
    index_name_lexical = 'medline-faiss-hnsw-lexical-pmid'
else:
    index_name_lexical ='medline-faiss-hnsw-lexical'

coll_name_semantic = "medline-faiss-hnsw"

In [10]:
model_card = 'sentence-transformers/msmarco-distilbert-base-tas-b'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device {device}")

Device cpu


In [11]:
model = SentenceTransformer(model_card)

  return self.fget.__get__(instance, owner)()


## Query

In [12]:
import nltk

# Ensure that the necessary NLTK data is downloaded
nltk.download('punkt')
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import pandas as pd




class QueryProcessor:
    def __init__(self, index_lexical:str = "medline-faiss-hnsw-lexical",lexical_pmid = False, index_name_semantic ="medline-faiss-hnsw", rescore = False, model=None, lexical_client=None, semantic_client=None, output_file_path="queries/queries.tsv", stopwords=set([])):
        self.index_lexical_name = index_lexical
        self.index_name_semantic = index_name_semantic
        # 2 index name (?)
        self.model = model
        #self.lexical_pmid = lexical_pmid
        self.lexical_client = lexical_client
        self.semantic_client = semantic_client
        self.output_file_path = output_file_path
        self.stop_words = stopwords
        self.query_result = []
        self.rescore = rescore
        self.lexical_query = self.lexical_query_pmid if lexical_pmid else self.lexical_query
    
    def set_rescore(self, rescore):
        self.rescore = rescore

    def preprocess_query(self, query_str):
        return ' '.join([word for word in word_tokenize(query_str) if word.lower() not in self.stop_words])

    def save_results(self):
        with open(self.output_file_path, "w") as file:
            json.dump(self.query_result, file, indent=4)
      
    
    def reorder_pmid(self, retrived_documents):
        pmid_scores = {}
        
        # Iterate through the set data
        for _, value in retrived_documents.items():
            pmid = value['pmid']
            score = value['score']
            
            # Check if pmid already exists in the dictionary
            if pmid in pmid_scores:
                pmid_scores[pmid] += score
            else:
                pmid_scores[pmid] = score
           
        return pmid_scores
    
    def lexical_query(self, query_str, limit=10):
        if self.lexical_client == None:
            raise ValueError("No Lexical client defined")
        
        query = {
                "size": limit,
                "query": {
                    "multi_match": {
                        "query": query_str,
                        "fields": ["text"]
                    }
                }
            }
       
        results = self.lexical_client.search(index=self.index_lexical_name, body=query) 
        retrived_documents = {}
        max_score = results['hits']['max_score']
     
        for hit in results["hits"]["hits"]:
            
            pmid = hit["_source"]["pmid"]
            score = hit["_score"]
            auto_id = hit["_id"]
            
            
            
            retrived_documents[auto_id] = {
                "score": round(score/max_score, 5),
                "pmid": pmid
                }
        
        retrived_documents = self.reorder_pmid(retrived_documents)
        return retrived_documents #adjust the return 
    
    def lexical_query_pmid(self, query_str, limit=10):
        #print("Lexical = ",query_str)
        if self.lexical_client == None:
            raise ValueError("No Lexical client defined")
        
        query = {
                "size": limit,
                "query": {
                    "multi_match": {
                        "query": query_str,
                        "fields": ["full_text"]
                    }
                }
            }
        
        results = self.lexical_client.search(index=self.index_lexical_name, body=query) 
        
        retrieved_documents = {}
        max_score = results['hits']['max_score']
        for hit in results["hits"]["hits"]:
            
            pmid = hit["_source"]["pmid"]
            score = hit["_score"] / max_score
            
            retrieved_documents[pmid] = score
            
        return retrieved_documents #adjust the return 

    def semantic_query(self, query, limit=10):
        #print("semantic = ",query)
        if self.semantic_client == None:
            raise ValueError("No Semantic client defined")
        if self.model == None:
            raise ValueError("No model defined")
        
        query_vector = self.model.encode(query).tolist()
    
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(rescore=self.rescore)
            )
        results = self.semantic_client.search(collection_name=self.index_name_semantic,query_vector=query_vector,search_params=search_params, limit=limit)
    
        #results = self.semantic_client.search(collection_name=self.index_name_semantic,query_vector=query_vector, limit=limit)
        
        retrived_documents = {}
        max_score = None
        for i,document in enumerate(results):
            
            pmid = document.payload['pmid']
            score = document.score
            if i == 0:
                # first score is the max
                max_score = score
            retrived_documents[document.id] = { 'pmid': pmid, 'score': round(score / max_score, 5) } 

        retrived_documents = self.reorder_pmid(retrived_documents)
        
        return retrived_documents
    

    def hybrid_query(self, query_lexical, query_semantic, lex_parameter = 0.5, semantic_parameter = 0.5, limit=10):
        if (lex_parameter + semantic_parameter) > 1:
            raise ValueError("Uncorrect parameters for Hybrid Queries")
        lexical_results = self.lexical_query(query_lexical, limit = limit) 
        semantic_results = self.semantic_query(query_semantic, limit)
        max_score = 0
        retrived_documents = {}
        
        for lex_pmid in lexical_results:
            score = lexical_results[lex_pmid] * lex_parameter
            if lex_pmid in semantic_results:
                score += semantic_results[lex_pmid] * semantic_parameter

            retrived_documents[lex_pmid] = score
            max_score = max(max_score, score)
            

        for semantic_pmid in semantic_results:
            if semantic_pmid not in lexical_results:
                score = semantic_results[semantic_pmid] * semantic_parameter
                retrived_documents[semantic_pmid] = score
                max_score = max(max_score, score)
                
        return retrived_documents # just to have a starting point


    def execute_query(self, query_str, query_type='lexical', lex_parameter = 0.5, semantic_parameter = 0.5,limit = 10,save = True, stopwords_preprocessing=True):
        #print("Before = ",query_str)
        text_query = self.preprocess_query(query_str) if stopwords_preprocessing else query_str
        
        if query_type == 'lexical':
            results = self.lexical_query(text_query, limit=limit) 
        
        elif query_type == 'semantic':
            results = self.semantic_query(query_str, limit=limit)

        elif query_type == 'hybrid':
            results = self.hybrid_query(text_query, query_str, lex_parameter, semantic_parameter, limit=limit)
        else:
            raise ValueError("Invalid query type specified. Choose 'lexical', 'semantic', or 'hybrid'.")
        
        
        
        document_retrived = sorted(results.items(), key=lambda x: x[1], reverse=True)
        document_retrived = document_retrived[:limit+1] # in the hybrid search we can return more documents
        #print("Results ", document_retrived)
        if save:
            self.process_results(document_retrived, query_str, query_type)

        return document_retrived
    
    # needs to be rewrited
    def process_results(self, results, query_str,query_type):
        
        retrieved_documents = []
        for element in results:
            
            pmid,_ = element
            query = {
                    "query": {
                        "term": {
                        "pmid": int(pmid)
                        }
                    }
                }

            results = self.lexical_client.search(index=self.index_lexical_name, body=query) 
            full_text = results['hits']['hits'][0]["_source"]['full_text']
            pmid = results['hits']['hits'][0]["_source"]['pmid']

            retrieved_documents.append({
                "pmid": pmid,
                "text": full_text
            })

        dict_to_save = {'query': query_str, 'query_type': query_type, 'abstracts' : retrieved_documents}
        self.query_result.append(dict_to_save)  
        
        



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Query parser


Doing some queries

In [13]:
model = SentenceTransformer(model_card)

In [14]:
query_parser = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic)

In [15]:
query_parser.execute_query(query_str="Which gene is responsible for disfunction in speech for children?", query_type='hybrid')

[('10797929', 0.5),
 ('15877281', 0.5),
 ('37560515', 0.49687),
 ('29597095', 0.494725),
 ('20870045', 0.49326),
 ('29922639', 0.49277),
 ('9462748', 0.490925),
 ('22303795', 0.49066),
 ('24914010', 0.490565),
 ('19332160', 0.49053),
 ('22106036', 0.49053)]

## Evaluation

## Loading the evaluation file - BioASQ_10_or_more.json

In [16]:
import json
evaluation_file = 'BioASQ_10_or_more.json'

with open(evaluation_file, 'r') as f:
    data = json.load(f)

print(len(data['questions']))

1787


## Evaluation Function

In [18]:
import numpy as np


def clean_documents(documents):
    output_documents = []  ## Changed from output_documents = set()
    for doc in documents:
        output_documents.append((doc.replace("http://www.ncbi.nlm.nih.gov/pubmed/","")))
    return output_documents

def average_precision(retrived_doc, true_doc):
    # Initialize variables
    precision_sum = 0
    num_retrieved_docs = 0
    
    # Calculate precision at each relevant document position
    for i, retrived in enumerate(retrived_doc, start=1):
        pmid,_ = retrived
        if pmid in true_doc:  # Check if the document is relevant
            num_retrieved_docs += 1
            precision_sum += num_retrieved_docs / i  # Calculate precision at cutoff i

    # Calculate average precision
    if num_retrieved_docs == 0:
        return 0
    else:
        return precision_sum / num_retrieved_docs


def evaluation(query_parser, data, query_type,alpha=0.5, beta=0.5, stopwords_preprocessing = False, path = "query_result.json"):
    avg_precisions_sum = [] # sum all average precision and divide with number of queries 
    precisions_sum = []
    queries_time = []
    for i,question in enumerate(data['questions']):
        dict_to_save = {}
        query = question['body']
        dict_to_save['query'] = query
        dict_to_save['query_type'] = query_type
        #relevant_documents = clean_documents(question['documents'])
        #(NEW_EVAL)Return only the first 10 abstracts
        relevant_documents = clean_documents(question['documents'])[:10]
        start_time = time.time()
        
        #results = query_parser.execute_query(query,query_type = query_type, lex_parameter = alpha, semantic_parameter = beta,limit = len(relevant_documents), save=False, stopwords_preprocessing = stopwords_preprocessing)
       
        #(NEW_EVAL)In the next line, limit is set to 10 for retrieving from our IR
        results = query_parser.execute_query(query,query_type = query_type, lex_parameter = alpha, semantic_parameter = beta,limit = 10, save=False, stopwords_preprocessing = stopwords_preprocessing)

        queries_time.append(time.time() - start_time)
        
        #results = [('20598273',1), ('4',1), ('6650562',1), ('2',1),('21995290',1),('15617541',1),('23001136',1),('8896569',1), ('12239580',1)]
        dict_to_save['true_documents'] = relevant_documents
        dict_to_save['retrieved_documents'] = results
       

    
        number_retrieved_documents = 0
        for pmid,_ in results:
            if pmid in relevant_documents:
                number_retrieved_documents +=1

        precision = number_retrieved_documents / len(relevant_documents)
        recall = number_retrieved_documents / len(relevant_documents)
        avg_precision = average_precision(results, relevant_documents)
        
        precisions_sum.append(precision)
        #recalls.append(recall)
        
        avg_precisions_sum.append(avg_precision)
        
        dict_to_save['precision'] = precision
        #dict_to_save['recall'] = recall
        dict_to_save['avg_precision'] = avg_precision
        with open(path, 'a') as output_file:
            output_file.write(json.dumps(dict_to_save) + '\n')
        if (i+1) % 500 == 0:
            print(f"Analyzed {i+1} queries")
            print("Actual Results...")
            print(f"Mean precision = {np.mean(precisions_sum):.3f}")
            #print(f"Mean recall = {np.mean(recalls):.3f}")
            print(f"Mean Average Precision = {np.mean(avg_precisions_sum):.3f}")
            print(f"Mean Time needed to execute a query = {np.mean(queries_time):.3f}")
    print("FINAL RESULTS ")
    print(f"Mean precision = {np.mean(precisions_sum):.3f}")
    #print(f"Mean recall = {np.mean(recalls):.3f}")
    print(f"Mean Average Precision = {np.mean(avg_precisions_sum):.3f}")
    print(f"Mean Time needed to execute a query = {np.mean(queries_time):.3f}")
    print('Relevantni',relevant_documents)
    print('Vraćeni',results)
    

## Result for Lexical Auto-id Stopwords False

In [20]:
evaluation(query_parser,data, query_type="lexical", path = "Eval_results_BioASQ_subset/lexical_results.json")


Analyzed 500 queries
Actual Results...
Mean precision = 0.139
Mean Average Precision = 0.289
Mean Time needed to execute a query = 0.222
Analyzed 1000 queries
Actual Results...
Mean precision = 0.142
Mean Average Precision = 0.289
Mean Time needed to execute a query = 0.218
Analyzed 1500 queries
Actual Results...
Mean precision = 0.165
Mean Average Precision = 0.321
Mean Time needed to execute a query = 0.217
FINAL RESULTS 
Mean precision = 0.162
Mean Average Precision = 0.322
Mean Time needed to execute a query = 0.217
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 1.0), ('31490656', 0.93217), ('34698396', 0.92664), ('28388412', 0.92544), ('30136239', 0.91816), ('35365636', 0.8957), ('34764207', 0.8733), ('37511074', 0.87262), ('37922978', 0.86799), ('33848465', 0.86518)]


## Result for Lexical Pmid Stopwords False

In [21]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
query_parser_pmid = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=True, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic)

In [22]:
evaluation(query_parser_pmid,data, query_type="lexical", path = "Eval_results_BioASQ_subset/lexical_results_pmid.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.143
Mean Average Precision = 0.294
Mean Time needed to execute a query = 0.202
Analyzed 1000 queries
Actual Results...
Mean precision = 0.146
Mean Average Precision = 0.296
Mean Time needed to execute a query = 0.204
Analyzed 1500 queries
Actual Results...
Mean precision = 0.168
Mean Average Precision = 0.330
Mean Time needed to execute a query = 0.206
FINAL RESULTS 
Mean precision = 0.166
Mean Average Precision = 0.331
Mean Time needed to execute a query = 0.208
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 1.0), ('31490656', 0.9305325716698989), ('34698396', 0.9270808026361854), ('28388412', 0.9269893363414594), ('30136239', 0.9132803947609708), ('35365636', 0.8995505315158622), ('37511074', 0.8739989553059898), ('34764207', 0.8685736036942159), ('37922978', 0.8652094389215201), ('37365800', 0.862840845313047)]


## Result lexical pmid with stopwords

In [20]:
english_stopwords = set(stopwords.words('english'))
print(english_stopwords)

{'more', 'those', 'not', "won't", 'these', "shouldn't", 'over', 'were', 'o', 'should', 'her', 'whom', 'too', 'when', 'no', 'our', "don't", "didn't", "mightn't", 'i', 'during', 'why', 'd', 'shan', 'then', 'by', 'is', 'can', 'any', "hasn't", 'against', 'll', 'there', "weren't", 'once', 'he', 'yourselves', "doesn't", 'and', 'just', 'about', 'here', 'such', 'ours', 'ourselves', "you'd", 'nor', 'of', "aren't", "mustn't", 'or', 'myself', "haven't", 'my', 'have', 'only', 'do', 'so', 've', 'own', 'was', 'what', 'again', 'until', 'ma', 'yours', 'each', 'wouldn', 'above', 'that', 'been', 'shouldn', 'below', 'a', "shan't", 'from', 'further', 'doing', 'few', 'did', 'with', 'mightn', "you'll", "isn't", 'but', 's', "that'll", "hadn't", "wasn't", 'to', "wouldn't", 'theirs', 'hadn', 'if', 'for', 'couldn', 'himself', 'does', 'into', "needn't", 'its', 'same', 'are', 'itself', 'she', 'ain', 'weren', 'most', 'it', 'after', 'out', "couldn't", 'had', 're', 'how', 'some', 'an', 'hasn', 'under', 'very', 'who'

In [21]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)


In [25]:
evaluation(query_parser_stopwords,data, query_type="lexical", path = "Eval_results_BioASQ_subset/lex_results_stopwords.json",stopwords_preprocessing=True)

Analyzed 500 queries
Actual Results...
Mean precision = 0.146
Mean Average Precision = 0.308
Mean Time needed to execute a query = 0.204
Analyzed 1000 queries
Actual Results...
Mean precision = 0.148
Mean Average Precision = 0.305
Mean Time needed to execute a query = 0.196
Analyzed 1500 queries
Actual Results...
Mean precision = 0.169
Mean Average Precision = 0.335
Mean Time needed to execute a query = 0.195
FINAL RESULTS 
Mean precision = 0.165
Mean Average Precision = 0.335
Mean Time needed to execute a query = 0.193
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 1.0), ('31490656', 0.9304463543344844), ('34698396', 0.9269975080842887), ('28388412', 0.9268917074116443), ('30136239', 0.9132068197774056), ('35365636', 0.8994613475709672), ('37511074', 0.8739083591091045), ('34764207', 0.8684875684070689), ('37922978', 0.8651145640364755), ('37365800', 0.8627494312692764)]


## Result lexical autoid stopword True

In [26]:
index_name_lexical = "medline-faiss-hnsw-lexical"
lexical_pmid = False
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
evaluation(query_parser_stopwords,data, query_type="lexical", path = "Eval_results_BioASQ_subset/lex_results_stopwords_auto_id.json",stopwords_preprocessing=True)

Analyzed 500 queries
Actual Results...
Mean precision = 0.143
Mean Average Precision = 0.300
Mean Time needed to execute a query = 0.195
Analyzed 1000 queries
Actual Results...
Mean precision = 0.145
Mean Average Precision = 0.297
Mean Time needed to execute a query = 0.188
Analyzed 1500 queries
Actual Results...
Mean precision = 0.165
Mean Average Precision = 0.326
Mean Time needed to execute a query = 0.186
FINAL RESULTS 
Mean precision = 0.162
Mean Average Precision = 0.325
Mean Time needed to execute a query = 0.185
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 1.0), ('31490656', 0.93192), ('34698396', 0.92639), ('28388412', 0.92514), ('30136239', 0.91794), ('35365636', 0.89543), ('34764207', 0.87305), ('37511074', 0.87235), ('37922978', 0.86771), ('37365800', 0.86354)]


## Result for Semantic without rescore

In [27]:
evaluation(query_parser,data, query_type="semantic", path = "Eval_results_BioASQ_subset/semantic_results.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.086
Mean Average Precision = 0.216
Mean Time needed to execute a query = 0.201
Analyzed 1000 queries
Actual Results...
Mean precision = 0.089
Mean Average Precision = 0.220
Mean Time needed to execute a query = 0.199
Analyzed 1500 queries
Actual Results...
Mean precision = 0.096
Mean Average Precision = 0.238
Mean Time needed to execute a query = 0.197
FINAL RESULTS 
Mean precision = 0.099
Mean Average Precision = 0.245
Mean Time needed to execute a query = 0.197
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('28388403', 1.0), ('29076500', 0.98992), ('16377620', 0.9868), ('32931899', 0.98448), ('22351266', 0.97915), ('22157748', 0.97701), ('15221519', 0.97636), ('33687702', 0.97629), ('20010783', 0.97575), ('16023831', 0.97531)]


# Result Semantic with rescore

In [28]:
query_parser.set_rescore(True)
evaluation(query_parser,data, query_type="semantic", path = "Eval_results_BioASQ_subset/semantic_results.json")
query_parser.set_rescore(False) # re insert the rescore to False

Analyzed 500 queries
Actual Results...
Mean precision = 0.087
Mean Average Precision = 0.230
Mean Time needed to execute a query = 0.205
Analyzed 1000 queries
Actual Results...
Mean precision = 0.090
Mean Average Precision = 0.232
Mean Time needed to execute a query = 0.205
Analyzed 1500 queries
Actual Results...
Mean precision = 0.097
Mean Average Precision = 0.249
Mean Time needed to execute a query = 0.202
FINAL RESULTS 
Mean precision = 0.100
Mean Average Precision = 0.253
Mean Time needed to execute a query = 0.204
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('28388403', 1.0), ('29076500', 0.9851), ('32931899', 0.98391), ('16377620', 0.98089), ('16023831', 0.97981), ('22157748', 0.97606), ('33687702', 0.9755), ('22351266', 0.97539), ('20010783', 0.975), ('10570775', 0.97352)]


## Result for Hybrid with lexical autoid Stopwords False

In [29]:
evaluation(query_parser, data, query_type="hybrid", path = "Eval_results_BioASQ_subset/hybrid_results.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.114
Mean Average Precision = 0.285
Mean Time needed to execute a query = 0.699
Analyzed 1000 queries
Actual Results...
Mean precision = 0.115
Mean Average Precision = 0.287
Mean Time needed to execute a query = 0.667
Analyzed 1500 queries
Actual Results...
Mean precision = 0.125
Mean Average Precision = 0.315
Mean Time needed to execute a query = 0.650
FINAL RESULTS 
Mean precision = 0.126
Mean Average Precision = 0.318
Mean Time needed to execute a query = 0.641
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 0.5), ('28388403', 0.5), ('29076500', 0.49496), ('16377620', 0.4934), ('32931899', 0.49224), ('22351266', 0.489575), ('22157748', 0.488505), ('15221519', 0.48818), ('33687702', 0.488145), ('20010783', 0.487875), ('16023831', 0.487655)]


## Result for Hybrid with lexical pmid Stopwords False

In [30]:
evaluation(query_parser_pmid, data, query_type="hybrid", path = "Eval_results_BioASQ_subset/hybrid_results_pmid.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.112
Mean Average Precision = 0.287
Mean Time needed to execute a query = 0.569
Analyzed 1000 queries
Actual Results...
Mean precision = 0.114
Mean Average Precision = 0.289
Mean Time needed to execute a query = 0.567
Analyzed 1500 queries
Actual Results...
Mean precision = 0.124
Mean Average Precision = 0.322
Mean Time needed to execute a query = 0.542
FINAL RESULTS 
Mean precision = 0.126
Mean Average Precision = 0.326
Mean Time needed to execute a query = 0.543
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 0.5), ('28388403', 0.5), ('29076500', 0.49496), ('16377620', 0.4934), ('32931899', 0.49224), ('22351266', 0.489575), ('22157748', 0.488505), ('15221519', 0.48818), ('33687702', 0.488145), ('20010783', 0.487875), ('16023831', 0.487655)]


## Result Hybrid pmid with stopwords 

In [31]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.5, beta=0.5, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_stopwords.json", stopwords_preprocessing=True)

Analyzed 500 queries
Actual Results...
Mean precision = 0.112
Mean Average Precision = 0.297
Mean Time needed to execute a query = 0.476
Analyzed 1000 queries
Actual Results...
Mean precision = 0.114
Mean Average Precision = 0.298
Mean Time needed to execute a query = 0.457
Analyzed 1500 queries
Actual Results...
Mean precision = 0.124
Mean Average Precision = 0.324
Mean Time needed to execute a query = 0.450
FINAL RESULTS 
Mean precision = 0.126
Mean Average Precision = 0.327
Mean Time needed to execute a query = 0.446
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 0.5), ('28388403', 0.5), ('29076500', 0.49496), ('16377620', 0.4934), ('32931899', 0.49224), ('22351266', 0.489575), ('22157748', 0.488505), ('15221519', 0.48818), ('33687702', 0.488145), ('20010783', 0.487875), ('16023831', 0.487655)]


## Result Hybrid autoid with stopwords 

In [33]:
index_name_lexical = "medline-faiss-hnsw-lexical"
lexical_pmid = False
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.5, beta=0.5, path = "Eval_results_BioASQ_subset/hybrid_results_autoid_stopwords.json", stopwords_preprocessing=True)

Analyzed 500 queries
Actual Results...
Mean precision = 0.114
Mean Average Precision = 0.294
Mean Time needed to execute a query = 0.542
Analyzed 1000 queries
Actual Results...
Mean precision = 0.115
Mean Average Precision = 0.293
Mean Time needed to execute a query = 0.499
Analyzed 1500 queries
Actual Results...
Mean precision = 0.124
Mean Average Precision = 0.321
Mean Time needed to execute a query = 0.480
FINAL RESULTS 
Mean precision = 0.127
Mean Average Precision = 0.322
Mean Time needed to execute a query = 0.479
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 0.5), ('28388403', 0.5), ('29076500', 0.49496), ('16377620', 0.4934), ('32931899', 0.49224), ('22351266', 0.489575), ('22157748', 0.488505), ('15221519', 0.48818), ('33687702', 0.488145), ('20010783', 0.487875), ('16023831', 0.487655)]


## Result Hybrid pmid with rescore

In [22]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.5, beta=0.5, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_recore.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.115
Mean Average Precision = 0.300
Mean Time needed to execute a query = 0.482
Analyzed 1000 queries
Actual Results...
Mean precision = 0.116
Mean Average Precision = 0.298
Mean Time needed to execute a query = 0.480
Analyzed 1500 queries
Actual Results...
Mean precision = 0.126
Mean Average Precision = 0.326
Mean Time needed to execute a query = 0.479
FINAL RESULTS 
Mean precision = 0.128
Mean Average Precision = 0.327
Mean Time needed to execute a query = 0.480
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 0.5), ('28388403', 0.5), ('29076500', 0.49255), ('32931899', 0.491955), ('16377620', 0.490445), ('16023831', 0.489905), ('22157748', 0.48803), ('33687702', 0.48775), ('22351266', 0.487695), ('20010783', 0.4875), ('10570775', 0.48676)]


## Result Hybrid 0.6 lexical and 0.4

In [23]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.6, beta=0.4, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_recore_06-04.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.154
Mean Average Precision = 0.306
Mean Time needed to execute a query = 0.500
Analyzed 1000 queries
Actual Results...
Mean precision = 0.157
Mean Average Precision = 0.308
Mean Time needed to execute a query = 0.481
Analyzed 1500 queries
Actual Results...
Mean precision = 0.178
Mean Average Precision = 0.339
Mean Time needed to execute a query = 0.469
FINAL RESULTS 
Mean precision = 0.175
Mean Average Precision = 0.340
Mean Time needed to execute a query = 0.466
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 0.6), ('31490656', 0.5582678126006906), ('34698396', 0.5561985048505732), ('28388412', 0.5561350244469866), ('30136239', 0.5479240918664433), ('35365636', 0.5396768085425803), ('37511074', 0.5243450154654626), ('34764207', 0.5210925410442413), ('37922978', 0.5190687384218853), ('37365800', 0.5176496587615658), ('283884

## Result Hybrid with alpha 0.7 and Beta 0.3

In [24]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.7, beta=0.3, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_recore_07-03.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.154
Mean Average Precision = 0.307
Mean Time needed to execute a query = 0.459
Analyzed 1000 queries
Actual Results...
Mean precision = 0.157
Mean Average Precision = 0.310
Mean Time needed to execute a query = 0.460
Analyzed 1500 queries
Actual Results...
Mean precision = 0.179
Mean Average Precision = 0.343
Mean Time needed to execute a query = 0.464
FINAL RESULTS 
Mean precision = 0.176
Mean Average Precision = 0.343
Mean Time needed to execute a query = 0.465
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 0.7), ('31490656', 0.651312448034139), ('34698396', 0.648898255659002), ('28388412', 0.6488241951881509), ('30136239', 0.6392447738441839), ('35365636', 0.6296229432996769), ('37511074', 0.6117358513763731), ('34764207', 0.6079412978849482), ('37922978', 0.6055801948255328), ('37365800', 0.6039246018884934), ('28388403

## Result Hybrid with 0.8 and 0.2

In [25]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.8, beta=0.2, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_recore_08-02.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.154
Mean Average Precision = 0.309
Mean Time needed to execute a query = 0.460
Analyzed 1000 queries
Actual Results...
Mean precision = 0.157
Mean Average Precision = 0.311
Mean Time needed to execute a query = 0.456
Analyzed 1500 queries
Actual Results...
Mean precision = 0.179
Mean Average Precision = 0.343
Mean Time needed to execute a query = 0.450
FINAL RESULTS 
Mean precision = 0.176
Mean Average Precision = 0.344
Mean Time needed to execute a query = 0.451
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 0.8), ('31490656', 0.7443570834675876), ('34698396', 0.741598006467431), ('28388412', 0.7415133659293155), ('30136239', 0.7305654558219246), ('35365636', 0.7195690780567738), ('37511074', 0.6991266872872837), ('34764207', 0.6947900547256551), ('37922978', 0.6920916512291804), ('37365800', 0.6901995450154211), ('2838840

## Result Hybrid with 0.9 lexical and 0.1 semantic

In [26]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.8, beta=0.2, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_recore_09-01.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.154
Mean Average Precision = 0.309
Mean Time needed to execute a query = 0.458
Analyzed 1000 queries
Actual Results...
Mean precision = 0.157
Mean Average Precision = 0.311
Mean Time needed to execute a query = 0.454
Analyzed 1500 queries
Actual Results...
Mean precision = 0.179
Mean Average Precision = 0.343
Mean Time needed to execute a query = 0.450
FINAL RESULTS 
Mean precision = 0.176
Mean Average Precision = 0.344
Mean Time needed to execute a query = 0.455
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('31138766', 0.8), ('31490656', 0.7443570834675876), ('34698396', 0.741598006467431), ('28388412', 0.7415133659293155), ('30136239', 0.7305654558219246), ('35365636', 0.7195690780567738), ('37511074', 0.6991266872872837), ('34764207', 0.6947900547256551), ('37922978', 0.6920916512291804), ('37365800', 0.6901995450154211), ('2838840

## Result Hybrid with 0.4 lexical and 0.6 semantic

In [27]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.4, beta=0.6, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_recore_04-06.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.109
Mean Average Precision = 0.263
Mean Time needed to execute a query = 0.461
Analyzed 1000 queries
Actual Results...
Mean precision = 0.111
Mean Average Precision = 0.258
Mean Time needed to execute a query = 0.471
Analyzed 1500 queries
Actual Results...
Mean precision = 0.120
Mean Average Precision = 0.278
Mean Time needed to execute a query = 0.479
FINAL RESULTS 
Mean precision = 0.122
Mean Average Precision = 0.281
Mean Time needed to execute a query = 0.480
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('28388403', 0.6), ('29076500', 0.5910599999999999), ('32931899', 0.5903459999999999), ('16377620', 0.588534), ('16023831', 0.5878859999999999), ('22157748', 0.585636), ('33687702', 0.5853), ('22351266', 0.5852339999999999), ('20010783', 0.585), ('10570775', 0.584112), ('31138766', 0.4)]


## Result Hybrid with 0.3 lexical and 0.7 semantic

In [28]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.3, beta=0.7, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_recore_03-07.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.109
Mean Average Precision = 0.263
Mean Time needed to execute a query = 0.496
Analyzed 1000 queries
Actual Results...
Mean precision = 0.111
Mean Average Precision = 0.258
Mean Time needed to execute a query = 0.479
Analyzed 1500 queries
Actual Results...
Mean precision = 0.120
Mean Average Precision = 0.277
Mean Time needed to execute a query = 0.476
FINAL RESULTS 
Mean precision = 0.122
Mean Average Precision = 0.279
Mean Time needed to execute a query = 0.471
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('28388403', 0.7), ('29076500', 0.6895699999999999), ('32931899', 0.6887369999999999), ('16377620', 0.686623), ('16023831', 0.6858669999999999), ('22157748', 0.683242), ('33687702', 0.68285), ('22351266', 0.682773), ('20010783', 0.6825), ('10570775', 0.681464), ('31138766', 0.3)]


## Result Hybrid with 0.2 lexical and 0.8 semantic

In [29]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.2, beta=0.8, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_recore_02-08.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.109
Mean Average Precision = 0.263
Mean Time needed to execute a query = 0.497
Analyzed 1000 queries
Actual Results...
Mean precision = 0.111
Mean Average Precision = 0.257
Mean Time needed to execute a query = 0.480
Analyzed 1500 queries
Actual Results...
Mean precision = 0.120
Mean Average Precision = 0.277
Mean Time needed to execute a query = 0.470
FINAL RESULTS 
Mean precision = 0.122
Mean Average Precision = 0.279
Mean Time needed to execute a query = 0.490
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('28388403', 0.8), ('29076500', 0.78808), ('32931899', 0.787128), ('16377620', 0.7847120000000001), ('16023831', 0.783848), ('22157748', 0.7808480000000001), ('33687702', 0.7804000000000001), ('22351266', 0.780312), ('20010783', 0.78), ('10570775', 0.7788160000000001), ('31138766', 0.2)]


## Result Hybrid with 0.1 lexical and 0.9 semantic

In [30]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.1, beta=0.9, path = "Eval_results_BioASQ_subset/hybrid_results_pmid_recore_01-09.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.109
Mean Average Precision = 0.260
Mean Time needed to execute a query = 0.529
Analyzed 1000 queries
Actual Results...
Mean precision = 0.111
Mean Average Precision = 0.256
Mean Time needed to execute a query = 0.531
Analyzed 1500 queries
Actual Results...
Mean precision = 0.120
Mean Average Precision = 0.278
Mean Time needed to execute a query = 0.528
FINAL RESULTS 
Mean precision = 0.122
Mean Average Precision = 0.279
Mean Time needed to execute a query = 0.521
Relevantni ['33848465', '27158445', '31138766', '35365636', '30709919', '28388412', '30148498', '31766571', '30344099', '32839552']
Vraćeni [('28388403', 0.9), ('29076500', 0.88659), ('32931899', 0.885519), ('16377620', 0.8828010000000001), ('16023831', 0.881829), ('22157748', 0.8784540000000001), ('33687702', 0.87795), ('22351266', 0.877851), ('20010783', 0.8775), ('10570775', 0.8761680000000001), ('31138766', 0.1)]


# Evaluation based on PubMed website

In [31]:
from Bio import Entrez
# Always tell NCBI who you are (your email address)
Entrez.email = "adela.ljajic@ivi.ac.rs"
def search_pubmed(query, limit = 10, mesh=True):
    if not mesh:
        query += "[Title/Abstract]"
    # Use Entrez.esearch to search for articles matching the query in PubMed
    handle = Entrez.esearch(db="pubmed", term=query, retmax=limit, sort="relevance",)
    
    record = Entrez.read(handle)
    handle.close()
    # Get the list of Ids returned by the search
    id_list = record["IdList"]
    return id_list

def fetch_details(id_list):
    # Use Entrez.efetch to get the article details from the list of Ids
    ids = ','.join(id_list)
    handle = Entrez.efetch(db="pubmed", id=ids, retmode="xml")
    records = Entrez.read(handle)
    handle.close()
    return records
# Example usage

query = "Is the protein Papilin secreted?"
id_list = search_pubmed(query)
print("Id list = ",id_list)


Id list =  ['11076767', '15094110', '38284126', '22242013', '30388611', '22937083', '30661986', '12666201', '36997062', '29678203']


In [32]:
query = "Is the protein Papilin secreted?"
id_list = search_pubmed(query, mesh=False)
print("Id list = ",id_list)

Id list =  ['38284126', '11076767', '15094122', '21784067', '7515725']


In [33]:
def evaluation_pubmed(query_type, data,mesh=True,path = "query_result.json"):
    avg_precisions_sum = [] # sum all average precision and divide with number of queries 
    precisions_sum = []
    queries_time = []
    for i,question in enumerate(data['questions']):
        dict_to_save = {}
        query = question['body']
        dict_to_save['query'] = query
        dict_to_save['query_type'] = query_type
        relevant_documents = clean_documents(question['documents'])
        start_time = time.time()
        
        results = search_pubmed(query, limit = len(relevant_documents),mesh=mesh)
        queries_time.append(time.time() - start_time)
        
        dict_to_save['true_documents'] = list(relevant_documents)
        dict_to_save['retrieved_documents'] = results
       
        number_retrieved_documents = 0
        for pmid in results:
            if pmid in relevant_documents:
                number_retrieved_documents +=1

        precision = number_retrieved_documents / len(relevant_documents)
        recall = number_retrieved_documents / len(relevant_documents)
        avg_precision = average_precision(results, relevant_documents)
       
        precisions_sum.append(precision)
        #recalls.append(recall)
        
        avg_precisions_sum.append(avg_precision)
        
        dict_to_save['precision'] = precision
        #dict_to_save['recall'] = recall
        dict_to_save['avg_precision'] = avg_precision
        with open(path, 'a') as output_file:
            output_file.write(json.dumps(dict_to_save) + '\n')
        if (i+1) % 500 == 0:
            print(f"Analyzed {i+1} queries")
            print("Actual Results...")
            print(f"Mean precision = {np.mean(precisions_sum):.3f}")
            #print(f"Mean recall = {np.mean(recalls):.3f}")
            print(f"Mean Average Precision = {np.mean(avg_precisions_sum):.3f}")
            print(f"Mean Time needed to execute a query = {np.mean(queries_time):.3f}")
    print("FINAL RESULTS ")
    print(f"Mean precision = {np.mean(precisions_sum):.3f}")
    #print(f"Mean recall = {np.mean(recalls):.3f}")
    print(f"Mean Average Precision = {np.mean(avg_precisions_sum):.3f}")
    print(f"Mean Time needed to execute a query = {np.mean(queries_time):.3f}")

## Evaluation Pubmed with Mesh Term

The mesh terms are applied automatically by PubMed website

In [34]:
evaluation_pubmed(data=data, query_type="PubMed website", path="Eval_results_BioASQ_subset/Pubmed_mesh.json")

ValueError: too many values to unpack (expected 2)

## Evaluation PubMed without mesh

In [None]:
evaluation_pubmed(data=data, query_type="PubMed website no mash", mesh = False, path="Eval_results_BioASQ_subset/Pubmed_no_mesh.json")