### MeSH Based Query Search

This notebook shows how to search documents based on the MeSH descriptors for disease described in the CVD tree.

In [1]:
import csv

In [2]:
import pandas as pd
import json
from neo4j import GraphDatabase

#### Authentication to access covidgraph.org graph

In [3]:
covid_browser = "https://covid.petesis.com:7473"
covid_url = "bolt://covid.petesis.com:7687"
user = "public"
password = "corona"

driver = GraphDatabase.driver(uri = covid_url,\
                              auth = (user,password))

#### Created a dictionary that matches MeSH descriptor to its entity list
Ex. ```C14.260.500: [syphilis, cardiovascular]```

In [5]:
MeSH_id_dict = {}
with open('input/mesh/cvd.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        disease = row['name'].lower()
        if "," in disease:
            entities = disease.split(", ")
            for entity in entities:
                entity.strip()
            MeSH_id_dict[row['ID']] = entities
        else:
            MeSH_id_dict[row['ID']] = row['name'].strip().lower()

#### Prints MeSH descriptor to entity list dictionary created above

In [6]:
#MeSH_id_dict

#### MeSH mapping
- Create a dictionary where the key is a MeSH descriptor, and the value is a list of papers (publications) that contains mention of the MeSH terms in its body text
- Each paper is represented as dictionary linking each attribute name in the paper (cord_uid, journal, title, etc.) with its actual information

##### Example of a paper node in the covid graph

In [13]:
paper_query = "MATCH (n:Paper) RETURN n LIMIT 1"
Data = []
with driver.session() as session:
    info = session.run(paper_query)
    for item in info:
        print(item)

<Record n=<Node id=3198 labels={'Paper'} properties={'cord_uid': 'zrmkq3mz', 'cord19-fulltext_hash': '41c7a01f11ed47591d99f45774e43e45aeba0619', 'journal': 'BMC Microbiol', 'publish_time': '2009-08-12', 'source': 'PMC', 'title': 'CAPIH: A Web interface for comparative analyses and visualization of host-HIV protein-protein interactions', '_hash_id': '3c4b2ee1430dc9ac53aca87c0fc0f7eb', 'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2782265/'}>>


In [14]:
ff = open("data/mesh_search/mesh_search.json", 'w')

In [15]:
MeSH_to_result = {}
for desc, entities  in MeSH_id_dict.items():
    mesh_to_paper = {}
    
    #Builds each part of the query based on the MeSH descriptor entity list
    query = "MATCH (p:Paper)-[:PAPER_HAS_BODYTEXTCOLLECTION]-(:BodyTextCollection)-" \
                                        "[:BODYTEXTCOLLECTION_HAS_BODYTEXT]-(a:BodyText)"
    if isinstance(entities, list):
        for i in range(len(entities)):
            if i == len(entities)-1:
                query += "LOWER(a.text) CONTAINS '" + entities[i] + "') RETURN DISTINCT p"
            elif i > 0:
                query += "LOWER(a.text) CONTAINS '" + entities[i] + "' AND "
            else:
                query += "WHERE (LOWER(a.text) CONTAINS '" + entities[i] + "' AND "
    else:
        query += "WHERE (LOWER(a.text) CONTAINS '" + entities + "') RETURN DISTINCT p"

    MeSH_result = []
    
    with driver.session() as session:
        info = session.run(query)
        for item in info:
            try:
                node_keys = list((item.values(0)[0]).keys())
                node_values = list((item.values(0)[0]).values())
                paper = {}
                for i in range(len(node_keys)):
                    paper[node_keys[i]] = node_values[i]
                MeSH_result.append(paper)
            except:
                continue
    
    try:
        MeSH_to_result[desc] = MeSH_result
    except:
        continue
        
        
    json.dump(MeSH_result,ff)
    
    
    
ff.close()

#### Prints MeSH descriptor to publication list dictionary created above

In [None]:
MeSH_to_result.items()