In [9]:
from elasticsearch import Elasticsearch
import pandas as pd

### Import sentence transformer model

In [10]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/msmarco-MiniLM-L-12-v3')
query_embedding = model.encode("What are results of combining two or more drugs?")
#print(query_embedding)

In [11]:
df = pd.read_json('../data/pubmed23n0001.json')
#pmids=df[:1000]["pmid"]

### Create dataframe for 1000 abstracts

In [12]:
df_abstracts = df[['pmid', 'abstract']].copy()
df_abstracts=df_abstracts[0:1000]
df_abstracts.tail()

Unnamed: 0,pmid,abstract
995,996,The pH-dependence of the kinetic parameters fo...
996,997,An enzyme system from Datura innoxia roots oxi...
997,998,1. 2-Oxoaldehyde dehydrogenase was purified fr...
998,999,A number of parameters affecting the adsorptio...
999,1000,The NADP-specific glutamate dehydrogenase of N...


In [5]:
#df_abstracts[df_abstracts["abstract"]!=""]

### Create elasticsearcg client es and connect to cluster

In [13]:
es = Elasticsearch("http://localhost:9200", basic_auth=("elastic","fcN0n1_TzrIc8TCZP_3y"))

### Create mapping schema

In [64]:
settings= {
    "number_of_shards": 1,
  }
mappings_python = {
    "properties": {
         "embedding_abstract": { 
                        "type": "dense_vector",
                        "dims": 384,
                        "index": True,
                        "similarity": "cosine" },
          "abstract":{"type":"text"},
          "pmid":{"type":"text"}
    }
  }




 ### Create index

In [70]:
es.indices.create(index="medline_python", settings=settings, mappings=mappings_python)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'medline_python'})

In [72]:
for i, row in df_abstracts.iterrows():
    embedding = model.encode(row["abstract"])
    doc = {
        "pmid": row["pmid"],
        "abstract": row["abstract"],
        "embedding_abstract":embedding
    }
            
    es.index(index="medline_python", id=i, document=doc)

### Define query and its embedding

In [15]:
query="What are results of combining two or more drugs?"
query_embedding = model.encode(query)
#query_embedding

### Query for semantic search

In [26]:
query_semantic = {
    "field": "embedding_abstract",
    "query_vector": query_embedding,
    "k": 10,
    "num_candidates": 100,
    }

query_lexical={
   "match": {
       "abstract": query
}
}

### Lexical search

In [25]:
es.search(index="medline_python", query=query_lexical,source=["pmid","abstract"])

ObjectApiResponse({'took': 3, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 482, 'relation': 'eq'}, 'max_score': 9.742066, 'hits': [{'_index': 'medline_python', '_id': '724', '_score': 9.742066, '_source': {'pmid': 725, 'abstract': '78 patients suffering from various functional abdominal complaints have been trated in a 2 x 2 double-blind design: (a) psychotherapy with Ro 5-3350 (TH/Ro); (b) psychotherapy with placebo (TH/P); (c) Ro 5-3350 without psychotherapy (NIH/Ro); (d) placebo without psychotherapy (NTH/P). Results show that a considerable amount of improvement cannot be ascribed to the two critical factors or the interaction of both, but are due to unspecific influences in the course of treatment. Some of the results concerning the combination of TH and the psychotropic drug pose interesting questions for further research and bare implications for double-blind trials of psychotropic drugs. The results suggest

### Semantic search

In [27]:
es.search(index="medline_python", knn=query_semantic,source=["pmid","abstract"])

ObjectApiResponse({'took': 3, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 10, 'relation': 'eq'}, 'max_score': 0.6711177, 'hits': [{'_index': 'medline_python', '_id': '933', '_score': 0.6711177, '_source': {'pmid': 934, 'abstract': 'The rates at which pentobarbital, salicylate, antipyrine, and quinine were transferred from the rumen of intact, conscious goats were measured. The rates at which the same drugs diffused from the blood plasma (under conditions of constant drug concentration) into the ruminal solution were also evaluated. These compounds were absorbed by simple diffusion, and the rates of transfer were a function of pH of the intraruminal solution. The diffusion of drugs from plasma into the reticulorumen allowed steady-state distributions to be established in some goats. The theoretical and observed steady-state distributions were compared. There were good correlations for pentobarbital and antipyrine, 

## Hybrid search - combined Semantic and Lexical search

In [29]:
es.search(index="medline_python",query=query_lexical, knn=query_semantic,source=["pmid","abstract"], rank={"rrf":{}})

ObjectApiResponse({'took': 13, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 482, 'relation': 'eq'}, 'max_score': None, 'hits': [{'_index': 'medline_python', '_id': '724', '_score': None, '_rank': 1, '_source': {'pmid': 725, 'abstract': '78 patients suffering from various functional abdominal complaints have been trated in a 2 x 2 double-blind design: (a) psychotherapy with Ro 5-3350 (TH/Ro); (b) psychotherapy with placebo (TH/P); (c) Ro 5-3350 without psychotherapy (NIH/Ro); (d) placebo without psychotherapy (NTH/P). Results show that a considerable amount of improvement cannot be ascribed to the two critical factors or the interaction of both, but are due to unspecific influences in the course of treatment. Some of the results concerning the combination of TH and the psychotropic drug pose interesting questions for further research and bare implications for double-blind trials of psychotropic drugs. The results su