# Hybrid Query

In [1]:
import pandas as pd
import os
import json
import torch
import time
from opensearchpy import OpenSearch
from sentence_transformers import SentenceTransformer

In [2]:
model_card = 'sentence-transformers/msmarco-distilbert-base-tas-b'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device {device}")

Device cpu


## Setting for queries

In [3]:
host = '3.145.52.195' #host = 'localhost' 
port = 9200
auth =('admin','IVIngi2024!') #auth = ('admin','admin') 
client_lexical = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
    timeout=500, 
    max_retries=1
    #connection_class=RequestsHttpConnection 
#    http_compress = True, # enables gzip compression for request bodies
#    use_ssl = False,
#   verify_certs = False,
#    ssl_assert_hostname = False,
#    ssl_show_warn = False
)
print("Connection opened...")

Connection opened...


In [4]:
query_body = {
    "size": 10,
    "query": {
        "multi_match": {
            "query": "What are the Cancer Causes",
            "fields": ["text"]
        }
    }
}

# Execute the query
response = client_lexical.search(
    index="medline-faiss-hnsw-lexical",
    body=query_body
)
print(response)

{'took': 31, 'timed_out': False, '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 10000, 'relation': 'gte'}, 'max_score': 18.928892, 'hits': [{'_index': 'medline-faiss-hnsw-lexical', '_id': '2298154', '_score': 18.928892, '_source': {'pmid': '15868453', 'auto_id': 2298154, 'text': "What do cancer survivors believe causes cancer? (United States). OBJECTIVE To describe cancer survivors' beliefs about the causes of prostate, colorectal or breast cancers. METHODS A survey of beliefs about cancer causation was completed by 670 cancer survivors (416 with breast cancer, 165 with prostate cancer and 89 with colorectal cancer) enrolled in a population-based study in Colorado. Categorical analysis was used to describe associations between participant's beliefs about the cause of their cancer type, both in themselves and in others, and personal characteristics, including gender, age, and familial cancer risk. RESULTS Cancer survivors most frequently

In [5]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from qdrant_client.http.models import PointStruct
from qdrant_client.http import models


# 3.145.52.195
client_semantic = QdrantClient(host, port=6333, timeout = 60)

# Select the type of lexical indexing: 

In [6]:
lexical_pmid = False

if lexical_pmid:
    index_name_lexical = 'medline-faiss-hnsw-lexical-pmid'
else:
    index_name_lexical ='medline-faiss-hnsw-lexical'

coll_name_semantic = "medline-faiss-hnsw"

In [7]:
model = SentenceTransformer(model_card)

## Query

In [10]:
import nltk

# Ensure that the necessary NLTK data is downloaded
nltk.download('punkt')
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import pandas as pd




class QueryProcessor:
    def __init__(self, index_lexical:str = "medline-faiss-hnsw-lexical",lexical_pmid = False, index_name_semantic ="medline-faiss-hnsw", rescore = False, model=None, lexical_client=None, semantic_client=None, output_file_path="queries/queries.tsv", stopwords=set([])):
        self.index_lexical_name = index_lexical
        self.index_name_semantic = index_name_semantic
        # 2 index name (?)
        self.model = model
        #self.lexical_pmid = lexical_pmid
        self.lexical_client = lexical_client
        self.semantic_client = semantic_client
        self.output_file_path = output_file_path
        self.stop_words = stopwords
        self.query_result = []
        self.rescore = rescore
        self.lexical_query = self.lexical_query_pmid if lexical_pmid else self.lexical_query
    
    def set_rescore(self, rescore):
        self.rescore = rescore

    def preprocess_query(self, query_str):
        return ' '.join([word for word in word_tokenize(query_str) if word.lower() not in self.stop_words])

    def save_results(self):
        with open(self.output_file_path, "w") as file:
            json.dump(self.query_result, file, indent=4)
      
    
    def reorder_pmid(self, retrived_documents):
        pmid_scores = {}
        
        # Iterate through the set data
        for _, value in retrived_documents.items():
            pmid = value['pmid']
            score = value['score']
            
            # Check if pmid already exists in the dictionary
            if pmid in pmid_scores:
                pmid_scores[pmid] += score
            else:
                pmid_scores[pmid] = score
           
        return pmid_scores
    
    def lexical_query(self, query_str, limit=10):
        if self.lexical_client == None:
            raise ValueError("No Lexical client defined")
        
        query = {
                "size": limit,
                "query": {
                    "multi_match": {
                        "query": query_str,
                        "fields": ["text"]
                    }
                }
            }
       
        results = self.lexical_client.search(index=self.index_lexical_name, body=query) 
        retrived_documents = {}
        max_score = results['hits']['max_score']
     
        for hit in results["hits"]["hits"]:
            
            pmid = hit["_source"]["pmid"]
            score = hit["_score"]
            auto_id = hit["_id"]
            
            
            
            retrived_documents[auto_id] = {
                "score": round(score/max_score, 5),
                "pmid": pmid
                }
        
        retrived_documents = self.reorder_pmid(retrived_documents)
        return retrived_documents #adjust the return 
    
    def lexical_query_pmid(self, query_str, limit=10):
        #print("Lexical = ",query_str)
        if self.lexical_client == None:
            raise ValueError("No Lexical client defined")
        
        query = {
                "size": limit,
                "query": {
                    "multi_match": {
                        "query": query_str,
                        "fields": ["full_text"]
                    }
                }
            }
        
        results = self.lexical_client.search(index=self.index_lexical_name, body=query) 
        
        retrieved_documents = {}
        max_score = results['hits']['max_score']
        for hit in results["hits"]["hits"]:
            
            pmid = hit["_source"]["pmid"]
            score = hit["_score"] / max_score
            
            retrieved_documents[pmid] = score
            
        return retrieved_documents #adjust the return 

    def semantic_query(self, query, limit=10):
        #print("semantic = ",query)
        if self.semantic_client == None:
            raise ValueError("No Semantic client defined")
        if self.model == None:
            raise ValueError("No model defined")
        
        query_vector = self.model.encode(query).tolist()
    
        search_params=models.SearchParams(
            quantization=models.QuantizationSearchParams(rescore=self.rescore)
            )
        results = self.semantic_client.search(collection_name=self.index_name_semantic,query_vector=query_vector,search_params=search_params, limit=limit)
    
        #results = self.semantic_client.search(collection_name=self.index_name_semantic,query_vector=query_vector, limit=limit)
        
        retrived_documents = {}
        max_score = None
        for i,document in enumerate(results):
            
            pmid = document.payload['pmid']
            score = document.score
            if i == 0:
                # first score is the max
                max_score = score
            retrived_documents[document.id] = { 'pmid': pmid, 'score': round(score / max_score, 5) } 

        retrived_documents = self.reorder_pmid(retrived_documents)
        
        return retrived_documents
    

    def hybrid_query(self, query_lexical, query_semantic, lex_parameter = 0.5, semantic_parameter = 0.5, limit=10):
        if (lex_parameter + semantic_parameter) > 1:
            raise ValueError("Uncorrect parameters for Hybrid Queries")
        lexical_results = self.lexical_query(query_lexical, limit = limit) 
        semantic_results = self.semantic_query(query_semantic, limit)
        max_score = 0
        retrived_documents = {}
        
        for lex_pmid in lexical_results:
            score = lexical_results[lex_pmid] * lex_parameter
            if lex_pmid in semantic_results:
                score += semantic_results[lex_pmid] * semantic_parameter

            retrived_documents[lex_pmid] = score
            max_score = max(max_score, score)
            

        for semantic_pmid in semantic_results:
            if semantic_pmid not in lexical_results:
                score = semantic_results[semantic_pmid] * semantic_parameter
                retrived_documents[semantic_pmid] = score
                max_score = max(max_score, score)
                
        return retrived_documents # just to have a starting point


    def execute_query(self, query_str, query_type='lexical', lex_parameter = 0.5, semantic_parameter = 0.5,limit = 10,save = True, stopwords_preprocessing=True):
        #print("Before = ",query_str)
        text_query = self.preprocess_query(query_str) if stopwords_preprocessing else query_str
        
        if query_type == 'lexical':
            results = self.lexical_query(text_query, limit=limit) 
        
        elif query_type == 'semantic':
            results = self.semantic_query(query_str, limit=limit)

        elif query_type == 'hybrid':
            results = self.hybrid_query(text_query, query_str, lex_parameter, semantic_parameter, limit=limit)
        else:
            raise ValueError("Invalid query type specified. Choose 'lexical', 'semantic', or 'hybrid'.")
        
        
        
        document_retrived = sorted(results.items(), key=lambda x: x[1], reverse=True)
        document_retrived = document_retrived[:limit+1] # in the hybrid search we can return more documents
        #print("Results ", document_retrived)
        if save:
            self.process_results(document_retrived, query_str, query_type)

        return document_retrived
    
    # needs to be rewrited
    def process_results(self, results, query_str,query_type):
        
        retrieved_documents = []
        for element in results:
            
            pmid,_ = element
            query = {
                    "query": {
                        "term": {
                        "pmid": int(pmid)
                        }
                    }
                }

            results = self.lexical_client.search(index=self.index_lexical_name, body=query) 
            full_text = results['hits']['hits'][0]["_source"]['full_text']
            pmid = results['hits']['hits'][0]["_source"]['pmid']

            retrieved_documents.append({
                "pmid": pmid,
                "text": full_text
            })

        dict_to_save = {'query': query_str, 'query_type': query_type, 'abstracts' : retrieved_documents}
        self.query_result.append(dict_to_save)  
        
        



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\lcass\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lcass\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Query parser


Doing some queries

In [48]:
model = SentenceTransformer(model_card)

In [50]:
query_parser = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic)

In [20]:
query_parser.execute_query(query_str="Which gene is responsible for disfunction in speech for children?", query_type='hybrid')

[('10797929', 0.5),
 ('15877281', 0.5),
 ('37560515', 0.49687),
 ('29597095', 0.494725),
 ('20870045', 0.49326),
 ('29922639', 0.49277),
 ('9462748', 0.490925),
 ('22303795', 0.49066),
 ('24914010', 0.490565),
 ('19332160', 0.49053),
 ('22106036', 0.49053)]

## Evaluation

## Loading the evaluation file

In [3]:
import json
evaluation_file = 'training12b_new.json'

with open(evaluation_file, 'r') as f:
    data = json.load(f)

print(len(data['questions']))

5049


## Evaluation Function

In [2]:
import numpy as np


def clean_documents(documents):
    output_documents = set()
    for doc in documents:
        output_documents.add((doc.replace("http://www.ncbi.nlm.nih.gov/pubmed/","")))
    return output_documents

def average_precision(retrived_doc, true_doc):
    # Initialize variables
    precision_sum = 0
    num_retrieved_docs = 0
    
    # Calculate precision at each relevant document position
    for i, retrived in enumerate(retrived_doc, start=1):
        pmid,_ = retrived
        if pmid in true_doc:  # Check if the document is relevant
            num_retrieved_docs += 1
            precision_sum += num_retrieved_docs / i  # Calculate precision at cutoff i

    # Calculate average precision
    if num_retrieved_docs == 0:
        return 0
    else:
        return precision_sum / num_retrieved_docs


def evaluation(query_parser, data, query_type,alpha=0.5, beta=0.5, stopwords_preprocessing = False, path = "query_result.json"):
    avg_precisions_sum = [] # sum all average precision and divide with number of queries 
    precisions_sum = []
    queries_time = []
    for i,question in enumerate(data['questions']):
        dict_to_save = {}
        query = question['body']
        dict_to_save['query'] = query
        dict_to_save['query_type'] = query_type
        relevant_documents = clean_documents(question['documents'])
        start_time = time.time()
        results = query_parser.execute_query(query,query_type = query_type, lex_parameter = alpha, semantic_parameter = beta,limit = len(relevant_documents), save=False, stopwords_preprocessing = stopwords_preprocessing)
        queries_time.append(time.time() - start_time)
        
        #results = [('20598273',1), ('4',1), ('6650562',1), ('2',1),('21995290',1),('15617541',1),('23001136',1),('8896569',1), ('12239580',1)]
        dict_to_save['true_documents'] = list(relevant_documents)
        dict_to_save['retrieved_documents'] = results
       

    
        number_retrieved_documents = 0
        for pmid,_ in results:
            if pmid in relevant_documents:
                number_retrieved_documents +=1

        precision = number_retrieved_documents / len(relevant_documents)
        recall = number_retrieved_documents / len(relevant_documents)
        avg_precision = average_precision(results, relevant_documents)
        
        precisions_sum.append(precision)
        #recalls.append(recall)
        
        avg_precisions_sum.append(avg_precision)
        
        dict_to_save['precision'] = precision
        #dict_to_save['recall'] = recall
        dict_to_save['avg_precision'] = avg_precision
        with open(path, 'a') as output_file:
            output_file.write(json.dumps(dict_to_save) + '\n')
        if (i+1) % 500 == 0:
            print(f"Analyzed {i+1} queries")
            print("Actual Results...")
            print(f"Mean precision = {np.mean(precisions_sum):.3f}")
            #print(f"Mean recall = {np.mean(recalls):.3f}")
            print(f"Mean Average Precision = {np.mean(avg_precisions_sum):.3f}")
            print(f"Mean Time needed to execute a query = {np.mean(queries_time):.3f}")
    print("FINAL RESULTS ")
    print(f"Mean precision = {np.mean(precisions_sum):.3f}")
    #print(f"Mean recall = {np.mean(recalls):.3f}")
    print(f"Mean Average Precision = {np.mean(avg_precisions_sum):.3f}")
    print(f"Mean Time needed to execute a query = {np.mean(queries_time):.3f}")

## Result for Lexical Auto-id Stopwords False

In [41]:
evaluation(query_parser,data, query_type="lexical", path = "lexical_results.json")


Analyzed 500 queries
Actual Results...
Mean precision = 0.222
Mean Average Precision = 0.387
Mean Time needed to execute a query = 0.206
Analyzed 1000 queries
Actual Results...
Mean precision = 0.230
Mean Average Precision = 0.382
Mean Time needed to execute a query = 0.204
Analyzed 1500 queries
Actual Results...
Mean precision = 0.227
Mean Average Precision = 0.373
Mean Time needed to execute a query = 0.204
Analyzed 2000 queries
Actual Results...
Mean precision = 0.239
Mean Average Precision = 0.382
Mean Time needed to execute a query = 0.203
Analyzed 2500 queries
Actual Results...
Mean precision = 0.261
Mean Average Precision = 0.400
Mean Time needed to execute a query = 0.200
Analyzed 3000 queries
Actual Results...
Mean precision = 0.263
Mean Average Precision = 0.393
Mean Time needed to execute a query = 0.198
Analyzed 3500 queries
Actual Results...
Mean precision = 0.268
Mean Average Precision = 0.392
Mean Time needed to execute a query = 0.196
Analyzed 4000 queries
Actual Result

## Result for Lexical Pmid Stopwords False

In [19]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
query_parser_pmid = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=True, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic)

In [45]:
evaluation(query_parser_pmid,data, query_type="lexical", path = "lexical_results_pmid.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.224
Mean Average Precision = 0.394
Mean Time needed to execute a query = 0.226
Analyzed 1000 queries
Actual Results...
Mean precision = 0.233
Mean Average Precision = 0.388
Mean Time needed to execute a query = 0.219
Analyzed 1500 queries
Actual Results...
Mean precision = 0.230
Mean Average Precision = 0.379
Mean Time needed to execute a query = 0.215
Analyzed 2000 queries
Actual Results...
Mean precision = 0.243
Mean Average Precision = 0.388
Mean Time needed to execute a query = 0.212
Analyzed 2500 queries
Actual Results...
Mean precision = 0.265
Mean Average Precision = 0.406
Mean Time needed to execute a query = 0.208
Analyzed 3000 queries
Actual Results...
Mean precision = 0.268
Mean Average Precision = 0.399
Mean Time needed to execute a query = 0.204
Analyzed 3500 queries
Actual Results...
Mean precision = 0.272
Mean Average Precision = 0.396
Mean Time needed to execute a query = 0.201
Analyzed 4000 queries
Actual Result

## Result lexical pmid with stopwords

In [10]:
english_stopwords = set(stopwords.words('english'))
print(english_stopwords)

{'why', 'didn', 'd', 'of', 'their', 'any', 'on', 'few', 'me', 'having', 'o', 'such', 'through', 'shan', 'll', 'during', 're', 'hadn', 'this', 'should', 'mustn', "couldn't", 'yourselves', 'so', 'wouldn', 'who', 'y', 'before', 'only', 'its', "you're", "isn't", 'mightn', 'be', 'nor', 'further', 'and', 'herself', 'where', 'until', 'aren', 'an', 'himself', 'out', 'our', 'at', 'am', 'most', "that'll", 'needn', 'after', 'will', 'other', 'you', "hasn't", "wouldn't", "hadn't", 'whom', "shouldn't", 'shouldn', 'more', "she's", "doesn't", 'which', 'doing', 'very', 'i', 'his', 'down', 'being', 'by', 'above', "wasn't", 'hasn', "mightn't", 'yourself', 'do', 'weren', 'did', 'these', 'hers', 'for', 'him', "it's", 'here', "don't", "won't", 'yours', 'to', 'them', 'she', 'is', 'both', "needn't", 'with', 'haven', 'he', 'a', 'doesn', "weren't", 'm', 'ourselves', "aren't", 'does', 'themselves', 'below', 'as', 'the', 'couldn', 'my', 'not', 'we', 'were', 'has', 'when', 'wasn', 'are', 'but', 'about', 'in', "mus

In [84]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)


In [85]:
evaluation(query_parser_stopwords,data, query_type="lexical", path = "lex_results_stopwords.json",stopwords_preprocessing=True)

Analyzed 500 queries
Actual Results...
Mean precision = 0.234
Mean Average Precision = 0.412
Mean Time needed to execute a query = 0.203
Analyzed 1000 queries
Actual Results...
Mean precision = 0.240
Mean Average Precision = 0.399
Mean Time needed to execute a query = 0.202
Analyzed 1500 queries
Actual Results...
Mean precision = 0.239
Mean Average Precision = 0.391
Mean Time needed to execute a query = 0.202
Analyzed 2000 queries
Actual Results...
Mean precision = 0.250
Mean Average Precision = 0.399
Mean Time needed to execute a query = 0.205
Analyzed 2500 queries
Actual Results...
Mean precision = 0.271
Mean Average Precision = 0.416
Mean Time needed to execute a query = 0.200
Analyzed 3000 queries
Actual Results...
Mean precision = 0.274
Mean Average Precision = 0.409
Mean Time needed to execute a query = 0.194
Analyzed 3500 queries
Actual Results...
Mean precision = 0.277
Mean Average Precision = 0.405
Mean Time needed to execute a query = 0.189
Analyzed 4000 queries
Actual Result

## Result lexical autoid stopword True

In [86]:
index_name_lexical = "medline-faiss-hnsw-lexical"
lexical_pmid = False
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
evaluation(query_parser_stopwords,data, query_type="lexical", path = "lex_results_stopwords_auto_id.json",stopwords_preprocessing=True)

Analyzed 500 queries
Actual Results...
Mean precision = 0.228
Mean Average Precision = 0.402
Mean Time needed to execute a query = 0.258
Analyzed 1000 queries
Actual Results...
Mean precision = 0.235
Mean Average Precision = 0.391
Mean Time needed to execute a query = 0.258
Analyzed 1500 queries
Actual Results...
Mean precision = 0.234
Mean Average Precision = 0.381
Mean Time needed to execute a query = 0.256
Analyzed 2000 queries
Actual Results...
Mean precision = 0.245
Mean Average Precision = 0.389
Mean Time needed to execute a query = 0.259
Analyzed 2500 queries
Actual Results...
Mean precision = 0.267
Mean Average Precision = 0.408
Mean Time needed to execute a query = 0.250
Analyzed 3000 queries
Actual Results...
Mean precision = 0.270
Mean Average Precision = 0.401
Mean Time needed to execute a query = 0.240
Analyzed 3500 queries
Actual Results...
Mean precision = 0.274
Mean Average Precision = 0.400
Mean Time needed to execute a query = 0.230
Analyzed 4000 queries
Actual Result

## Result for Semantic without rescore

In [58]:
evaluation(query_parser,data, query_type="semantic", path = "semantic_results.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.134
Mean Average Precision = 0.276
Mean Time needed to execute a query = 0.261
Analyzed 1000 queries
Actual Results...
Mean precision = 0.126
Mean Average Precision = 0.265
Mean Time needed to execute a query = 0.262
Analyzed 1500 queries
Actual Results...
Mean precision = 0.126
Mean Average Precision = 0.262
Mean Time needed to execute a query = 0.261
Analyzed 2000 queries
Actual Results...
Mean precision = 0.130
Mean Average Precision = 0.268
Mean Time needed to execute a query = 0.260
Analyzed 2500 queries
Actual Results...
Mean precision = 0.137
Mean Average Precision = 0.272
Mean Time needed to execute a query = 0.254
Analyzed 3000 queries
Actual Results...
Mean precision = 0.139
Mean Average Precision = 0.266
Mean Time needed to execute a query = 0.252
Analyzed 3500 queries
Actual Results...
Mean precision = 0.138
Mean Average Precision = 0.259
Mean Time needed to execute a query = 0.251
Analyzed 4000 queries
Actual Result

# Result Semantic with rescore

In [20]:
query_parser.set_rescore(True)
evaluation(query_parser,data, query_type="semantic", path = "semantic_results.json")
query_parser.set_rescore(False) # re insert the rescore to False

Analyzed 500 queries
Actual Results...
Mean precision = 0.135
Mean Average Precision = 0.282
Mean Time needed to execute a query = 0.369
Analyzed 1000 queries
Actual Results...
Mean precision = 0.129
Mean Average Precision = 0.268
Mean Time needed to execute a query = 0.358
Analyzed 1500 queries
Actual Results...
Mean precision = 0.130
Mean Average Precision = 0.266
Mean Time needed to execute a query = 0.352
Analyzed 2000 queries
Actual Results...
Mean precision = 0.134
Mean Average Precision = 0.273
Mean Time needed to execute a query = 0.349
Analyzed 2500 queries
Actual Results...
Mean precision = 0.139
Mean Average Precision = 0.277
Mean Time needed to execute a query = 0.336
Analyzed 3000 queries
Actual Results...
Mean precision = 0.143
Mean Average Precision = 0.271
Mean Time needed to execute a query = 0.323
Analyzed 3500 queries
Actual Results...
Mean precision = 0.142
Mean Average Precision = 0.263
Mean Time needed to execute a query = 0.310
Analyzed 4000 queries
Actual Result

## Result for Hybrid with lexical autoid Stopwords False

In [48]:
evaluation(query_parser, data, query_type="hybrid", path = "hybrid_results.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.184
Mean Average Precision = 0.388
Mean Time needed to execute a query = 0.703
Analyzed 1000 queries
Actual Results...
Mean precision = 0.181
Mean Average Precision = 0.381
Mean Time needed to execute a query = 0.691
Analyzed 1500 queries
Actual Results...
Mean precision = 0.180
Mean Average Precision = 0.368
Mean Time needed to execute a query = 0.697
Analyzed 2000 queries
Actual Results...
Mean precision = 0.189
Mean Average Precision = 0.380
Mean Time needed to execute a query = 0.698
Analyzed 2500 queries
Actual Results...
Mean precision = 0.211
Mean Average Precision = 0.396
Mean Time needed to execute a query = 0.684
Analyzed 3000 queries
Actual Results...
Mean precision = 0.220
Mean Average Precision = 0.393
Mean Time needed to execute a query = 0.666
Analyzed 3500 queries
Actual Results...
Mean precision = 0.230
Mean Average Precision = 0.393
Mean Time needed to execute a query = 0.648
Analyzed 4000 queries
Actual Result

## Result for Hybrid with lexical pmid Stopwords False

In [49]:
evaluation(query_parser_pmid, data, query_type="hybrid", path = "hybrid_results_pmid.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.184
Mean Average Precision = 0.392
Mean Time needed to execute a query = 0.701
Analyzed 1000 queries
Actual Results...
Mean precision = 0.180
Mean Average Precision = 0.383
Mean Time needed to execute a query = 0.695
Analyzed 1500 queries
Actual Results...
Mean precision = 0.180
Mean Average Precision = 0.370
Mean Time needed to execute a query = 0.690
Analyzed 2000 queries
Actual Results...
Mean precision = 0.189
Mean Average Precision = 0.384
Mean Time needed to execute a query = 0.689
Analyzed 2500 queries
Actual Results...
Mean precision = 0.211
Mean Average Precision = 0.399
Mean Time needed to execute a query = 0.673
Analyzed 3000 queries
Actual Results...
Mean precision = 0.221
Mean Average Precision = 0.397
Mean Time needed to execute a query = 0.652
Analyzed 3500 queries
Actual Results...
Mean precision = 0.229
Mean Average Precision = 0.396
Mean Time needed to execute a query = 0.632
Analyzed 4000 queries
Actual Result

## Result Hybrid pmid with stopwords 

In [18]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.5, beta=0.5, path = "hybrid_results_pmid_stopwords.json", stopwords_preprocessing=True)

Analyzed 500 queries
Actual Results...
Mean precision = 0.190
Mean Average Precision = 0.406
Mean Time needed to execute a query = 0.501
Analyzed 1000 queries
Actual Results...
Mean precision = 0.185
Mean Average Precision = 0.390
Mean Time needed to execute a query = 0.499
Analyzed 1500 queries
Actual Results...
Mean precision = 0.184
Mean Average Precision = 0.378
Mean Time needed to execute a query = 0.498
Analyzed 2000 queries
Actual Results...
Mean precision = 0.193
Mean Average Precision = 0.390
Mean Time needed to execute a query = 0.498
Analyzed 2500 queries
Actual Results...
Mean precision = 0.214
Mean Average Precision = 0.404
Mean Time needed to execute a query = 0.484
Analyzed 3000 queries
Actual Results...
Mean precision = 0.224
Mean Average Precision = 0.402
Mean Time needed to execute a query = 0.468
Analyzed 3500 queries
Actual Results...
Mean precision = 0.231
Mean Average Precision = 0.399
Mean Time needed to execute a query = 0.454
Analyzed 4000 queries
Actual Result

## Result Hybrid autoid with stopwords 

In [20]:
index_name_lexical = "medline-faiss-hnsw-lexical"
lexical_pmid = False
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.5, beta=0.5, path = "hybrid_results_autoid_stopwords.json", stopwords_preprocessing=True)

Analyzed 500 queries
Actual Results...
Mean precision = 0.192
Mean Average Precision = 0.406
Mean Time needed to execute a query = 0.652
Analyzed 1000 queries
Actual Results...
Mean precision = 0.187
Mean Average Precision = 0.387
Mean Time needed to execute a query = 0.642
Analyzed 1500 queries
Actual Results...
Mean precision = 0.185
Mean Average Precision = 0.375
Mean Time needed to execute a query = 0.625
Analyzed 2000 queries
Actual Results...
Mean precision = 0.194
Mean Average Precision = 0.386
Mean Time needed to execute a query = 0.625
Analyzed 2500 queries
Actual Results...
Mean precision = 0.215
Mean Average Precision = 0.403
Mean Time needed to execute a query = 0.604
Analyzed 3000 queries
Actual Results...
Mean precision = 0.225
Mean Average Precision = 0.400
Mean Time needed to execute a query = 0.578
Analyzed 3500 queries
Actual Results...
Mean precision = 0.234
Mean Average Precision = 0.399
Mean Time needed to execute a query = 0.556
Analyzed 4000 queries
Actual Result

## Result Hybrid with rescore

In [21]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.5, beta=0.5, path = "hybrid_results_pmid_recore.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.192
Mean Average Precision = 0.403
Mean Time needed to execute a query = 0.538
Analyzed 1000 queries
Actual Results...
Mean precision = 0.189
Mean Average Precision = 0.389
Mean Time needed to execute a query = 0.542
Analyzed 1500 queries
Actual Results...
Mean precision = 0.187
Mean Average Precision = 0.378
Mean Time needed to execute a query = 0.542
Analyzed 2000 queries
Actual Results...
Mean precision = 0.197
Mean Average Precision = 0.392
Mean Time needed to execute a query = 0.548
Analyzed 2500 queries
Actual Results...
Mean precision = 0.218
Mean Average Precision = 0.405
Mean Time needed to execute a query = 0.533
Analyzed 3000 queries
Actual Results...
Mean precision = 0.228
Mean Average Precision = 0.403
Mean Time needed to execute a query = 0.516
Analyzed 3500 queries
Actual Results...
Mean precision = 0.235
Mean Average Precision = 0.400
Mean Time needed to execute a query = 0.501
Analyzed 4000 queries
Actual Result

## Result Hybrid with alpha 0.7 and Beta 0.3

In [22]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.7, beta=0.3, path = "hybrid_results_pmid_recore.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.246
Mean Average Precision = 0.412
Mean Time needed to execute a query = 0.560
Analyzed 1000 queries
Actual Results...
Mean precision = 0.253
Mean Average Precision = 0.406
Mean Time needed to execute a query = 0.543
Analyzed 1500 queries
Actual Results...
Mean precision = 0.251
Mean Average Precision = 0.397
Mean Time needed to execute a query = 0.539
Analyzed 2000 queries
Actual Results...
Mean precision = 0.262
Mean Average Precision = 0.408
Mean Time needed to execute a query = 0.539
Analyzed 2500 queries
Actual Results...
Mean precision = 0.285
Mean Average Precision = 0.424
Mean Time needed to execute a query = 0.526
Analyzed 3000 queries
Actual Results...
Mean precision = 0.291
Mean Average Precision = 0.420
Mean Time needed to execute a query = 0.512
Analyzed 3500 queries
Actual Results...
Mean precision = 0.296
Mean Average Precision = 0.418
Mean Time needed to execute a query = 0.496
Analyzed 4000 queries
Actual Result

## Result Hybrid with 0.8 and 0.2

In [23]:
index_name_lexical = "medline-faiss-hnsw-lexical-pmid"
lexical_pmid = True
query_parser_stopwords = QueryProcessor(index_lexical=index_name_lexical, lexical_pmid=lexical_pmid, index_name_semantic = coll_name_semantic, model= model, lexical_client=client_lexical, semantic_client=client_semantic, stopwords=english_stopwords)
query_parser_stopwords.set_rescore(True)
evaluation(query_parser_stopwords, data, query_type="hybrid",alpha=0.8, beta=0.2, path = "hybrid_results_pmid_recore.json", stopwords_preprocessing=True)
query_parser_stopwords.set_rescore(False)

Analyzed 500 queries
Actual Results...
Mean precision = 0.246
Mean Average Precision = 0.413
Mean Time needed to execute a query = 0.551
Analyzed 1000 queries
Actual Results...
Mean precision = 0.253
Mean Average Precision = 0.406
Mean Time needed to execute a query = 0.561
Analyzed 1500 queries
Actual Results...
Mean precision = 0.251
Mean Average Precision = 0.398
Mean Time needed to execute a query = 0.550
Analyzed 2000 queries
Actual Results...
Mean precision = 0.262
Mean Average Precision = 0.409
Mean Time needed to execute a query = 0.549
Analyzed 2500 queries
Actual Results...
Mean precision = 0.285
Mean Average Precision = 0.424
Mean Time needed to execute a query = 0.536
Analyzed 3000 queries
Actual Results...
Mean precision = 0.292
Mean Average Precision = 0.420
Mean Time needed to execute a query = 0.522
Analyzed 3500 queries
Actual Results...
Mean precision = 0.296
Mean Average Precision = 0.418
Mean Time needed to execute a query = 0.508
Analyzed 4000 queries
Actual Result

# Evaluation based on PubMed website

In [41]:
from Bio import Entrez
# Always tell NCBI who you are (your email address)
Entrez.email = "lcassano00@gmail.com"
def search_pubmed(query, limit = 10, mesh=True):
    if not mesh:
        query += "[Title/Abstract]"
    # Use Entrez.esearch to search for articles matching the query in PubMed
    handle = Entrez.esearch(db="pubmed", term=query, retmax=limit, sort="relevance",)
    
    record = Entrez.read(handle)
    handle.close()
    # Get the list of Ids returned by the search
    id_list = record["IdList"]
    return id_list

def fetch_details(id_list):
    # Use Entrez.efetch to get the article details from the list of Ids
    ids = ','.join(id_list)
    handle = Entrez.efetch(db="pubmed", id=ids, retmode="xml")
    records = Entrez.read(handle)
    handle.close()
    return records
# Example usage

query = "Is the protein Papilin secreted?"
id_list = search_pubmed(query)
print("Id list = ",id_list)


Id list =  ['11076767', '30388611', '30661986', '15094110', '38284126', '12666201', '36997062', '22242013', '22937083', '19297413']


In [42]:
query = "Is the protein Papilin secreted?"
id_list = search_pubmed(query, mesh=False)
print("Id list = ",id_list)

Id list =  ['11076767', '38284126', '15094122', '21784067', '7515725']


In [46]:
def evaluation_pubmed(query_type, data,mesh=True,path = "query_result.json"):
    avg_precisions_sum = [] # sum all average precision and divide with number of queries 
    precisions_sum = []
    queries_time = []
    for i,question in enumerate(data['questions']):
        dict_to_save = {}
        query = question['body']
        dict_to_save['query'] = query
        dict_to_save['query_type'] = query_type
        relevant_documents = clean_documents(question['documents'])
        start_time = time.time()
        
        results = search_pubmed(query, limit = len(relevant_documents),mesh=mesh)
        queries_time.append(time.time() - start_time)
        
        dict_to_save['true_documents'] = list(relevant_documents)
        dict_to_save['retrieved_documents'] = results
       
        number_retrieved_documents = 0
        for pmid in results:
            if pmid in relevant_documents:
                number_retrieved_documents +=1

        precision = number_retrieved_documents / len(relevant_documents)
        recall = number_retrieved_documents / len(relevant_documents)
        avg_precision = average_precision(results, relevant_documents)
       
        precisions_sum.append(precision)
        #recalls.append(recall)
        
        avg_precisions_sum.append(avg_precision)
        
        dict_to_save['precision'] = precision
        #dict_to_save['recall'] = recall
        dict_to_save['avg_precision'] = avg_precision
        with open(path, 'a') as output_file:
            output_file.write(json.dumps(dict_to_save) + '\n')
        if (i+1) % 500 == 0:
            print(f"Analyzed {i+1} queries")
            print("Actual Results...")
            print(f"Mean precision = {np.mean(precisions_sum):.3f}")
            #print(f"Mean recall = {np.mean(recalls):.3f}")
            print(f"Mean Average Precision = {np.mean(avg_precisions_sum):.3f}")
            print(f"Mean Time needed to execute a query = {np.mean(queries_time):.3f}")
    print("FINAL RESULTS ")
    print(f"Mean precision = {np.mean(precisions_sum):.3f}")
    #print(f"Mean recall = {np.mean(recalls):.3f}")
    print(f"Mean Average Precision = {np.mean(avg_precisions_sum):.3f}")
    print(f"Mean Time needed to execute a query = {np.mean(queries_time):.3f}")

## Evaluation Pubmed with Mesh Term

The mesh terms are applied automatically by PubMed website

In [47]:
evaluation_pubmed(data=data, query_type="PubMed website", path="Pubmed_mesh.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.087
Mean Average Precision = 0.172
Mean Time needed to execute a query = 0.744
Analyzed 1000 queries
Actual Results...
Mean precision = 0.086
Mean Average Precision = 0.175
Mean Time needed to execute a query = 0.748
Analyzed 1500 queries
Actual Results...
Mean precision = 0.084
Mean Average Precision = 0.165
Mean Time needed to execute a query = 0.746
Analyzed 2000 queries
Actual Results...
Mean precision = 0.093
Mean Average Precision = 0.174
Mean Time needed to execute a query = 0.745
Analyzed 2500 queries
Actual Results...
Mean precision = 0.108
Mean Average Precision = 0.188
Mean Time needed to execute a query = 0.741
Analyzed 3000 queries
Actual Results...
Mean precision = 0.109
Mean Average Precision = 0.184
Mean Time needed to execute a query = 0.743
Analyzed 3500 queries
Actual Results...
Mean precision = 0.109
Mean Average Precision = 0.180
Mean Time needed to execute a query = 0.739
Analyzed 4000 queries
Actual Result

## Evaluation PubMed without mesh

In [48]:
evaluation_pubmed(data=data, query_type="PubMed website no mash", mesh = False, path="Pubmed_no_mesh.json")

Analyzed 500 queries
Actual Results...
Mean precision = 0.058
Mean Average Precision = 0.128
Mean Time needed to execute a query = 0.775
Analyzed 1000 queries
Actual Results...
Mean precision = 0.059
Mean Average Precision = 0.131
Mean Time needed to execute a query = 0.739
Analyzed 1500 queries
Actual Results...
Mean precision = 0.056
Mean Average Precision = 0.124
Mean Time needed to execute a query = 0.723
Analyzed 2000 queries
Actual Results...
Mean precision = 0.068
Mean Average Precision = 0.137
Mean Time needed to execute a query = 0.719
Analyzed 2500 queries
Actual Results...
Mean precision = 0.081
Mean Average Precision = 0.150
Mean Time needed to execute a query = 0.714
Analyzed 3000 queries
Actual Results...
Mean precision = 0.083
Mean Average Precision = 0.146
Mean Time needed to execute a query = 0.708
Analyzed 3500 queries
Actual Results...
Mean precision = 0.085
Mean Average Precision = 0.145
Mean Time needed to execute a query = 0.705
Analyzed 4000 queries
Actual Result