### MeSH Based Query Search

In this notebook, we will discuss how to search documents based on the signs and symptoms of coronavirus.

In [1]:
import pandas as pd
import json
from neo4j import GraphDatabase
import csv

#### Authentication to access covidgraph.org graph

In [2]:
covid_browser = "https://covid.petesis.com:7473"
covid_url = "bolt://covid.petesis.com:7687"
user = "public"
password = "corona"

#driver = GraphDatabase.driver(uri, auth=(user, password))
driver = GraphDatabase.driver(uri = covid_url,\
                              auth = (user,password))

#### MeSH descriptor to its entity list
- Ex. ```C01.925.782.600.550.200.360: [feline infectious peritonitis]```
- Pandas Dataframe is very convenient for handeling a CSV file specifically for data transformation with ```lambda``` mapping functon.

In [3]:
MeSH = pd.read_csv("../Input/mesh/signs.csv")
MeSH = MeSH.set_index('ID')
MeSH.head()

Unnamed: 0_level_0,name
ID,Unnamed: 1_level_1
C01.748.561,Pharyngitis
C01.757.800,"Shock, Septic"
C05.651.542,Myalgia
C06.552.308.500,Liver Failure
C07.550.781,Pharyngitis


- Implementing ```lambda``` function to map one column to another column

In [4]:
MeSH['phrases'] = MeSH['name'].apply(lambda x: x.lower().strip())

In [5]:
MeSH.head()

Unnamed: 0_level_0,name,phrases
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C01.748.561,Pharyngitis,pharyngitis
C01.757.800,"Shock, Septic","shock, septic"
C05.651.542,Myalgia,myalgia
C06.552.308.500,Liver Failure,liver failure
C07.550.781,Pharyngitis,pharyngitis


In [6]:
MeSH['phrases'] = MeSH['phrases'].apply(lambda x:x.split(','))

In [7]:
for x in MeSH['phrases']:
    for val in x:
        val = val.strip()

In [8]:
MeSH.head()

Unnamed: 0_level_0,name,phrases
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C01.748.561,Pharyngitis,[pharyngitis]
C01.757.800,"Shock, Septic","[shock, septic]"
C05.651.542,Myalgia,[myalgia]
C06.552.308.500,Liver Failure,[liver failure]
C07.550.781,Pharyngitis,[pharyngitis]


In [9]:
MeSH.index[0]

'C01.748.561'

In [10]:
id2name = {}
for ID,name in zip(MeSH.index, MeSH['name']):
    id2name.update({ID:name})
    
with open("../Data/mesh/statistics/corona_signs_id2name.json", 'w') as wf:
    json.dump(id2name,wf)

#### MeSH to Doc Mapping
- Create a dictionary where the key is a MeSH descriptor, and the value is a list of papers (publications) that contains mention of the MeSH terms in its body text
- Each paper is represented as dictionary linking each attribute name in the paper (cord_uid, journal, title, etc.) with its actual information

##### Example of a paper node in the covid graph

In [11]:
paper_query = "MATCH (n:Paper) RETURN n LIMIT 1"
Data = []
with driver.session() as session:
    info = session.run(paper_query)
    for item in info:
        print(item)

<Record n=<Node id=2385529 labels={'Paper'} properties={'cord_uid': 'ocp6yodg', 'cord19-fulltext_hash': 'b8957d48b6bcf17b7b51e004d19314ce77f653a1', 'journal': 'BMC Infect Dis', 'publish_time': '2011-12-28', 'source': 'PMC', 'title': 'Timeliness of contact tracing among flight passengers for influenza A/H1N1 2009', '_hash_id': '84b069ab23fb0ecebe6925af9c2b18ae', 'url': 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3265549/'}>>


#### Writes data to file

In [13]:
MeSH_to_result = {}
MeSH_number = {}
for desc, entities  in zip(MeSH.index, MeSH['phrases']):
    #Builds each part of the query based on the MeSH descriptor entity list
    query = "MATCH (p:Paper)-[:PAPER_HAS_BODYTEXTCOLLECTION]-(:BodyTextCollection)-" \
                                        "[:BODYTEXTCOLLECTION_HAS_BODYTEXT]-(a:BodyText) WHERE ("
    for i in range(len(entities)):
        if i == len(entities)-1:
            query += "LOWER(a.text) CONTAINS '" + entities[i] + "') RETURN DISTINCT p"
        else:
            query += "LOWER(a.text) CONTAINS '" + entities[i] + "' AND "

    MeSH_result = []
        
    with driver.session() as session:
        info = session.run(query)
        for item in info:
            try:
                node_keys = list((item.values(0)[0]).keys())
                node_values = list((item.values(0)[0]).values())
                for i in range(len(node_keys)):
                    if (node_keys[i] == 'cord_uid'):
                        MeSH_result.append(node_values[i])    
            except:
                continue
    
    try:
        name = id2name[desc]
        MeSH_to_result[name] = MeSH_result
        MeSH_number[desc] = len(MeSH_result)
    except:
        continue

In [14]:
ff = open("../Data/mesh/corona_signs_paper_ids.json", 'w')

json.dump(MeSH_to_result, ff)

ff.close()

#### Prints MeSH descriptor to publication list dictionary created above

In [15]:
MeSH_number.items()

dict_items([('C01.748.561', 75), ('C01.757.800', 300), ('C05.651.542', 227), ('C06.552.308.500', 119), ('C07.550.781', 75), ('C08.381.348', 45), ('C08.381.742', 166), ('C08.618.248', 792), ('C08.618.326', 285), ('C08.618.846', 51), ('C08.730.561', 75), ('C09.775.649', 75), ('C10.597.606.337', 223), ('C10.597.751.600', 0), ('C10.668.491.525', 227), ('C12.777.419.780.050', 108), ('C13.351.968.419.780.050', 108), ('C14.907.514', 191), ('C15.378.553.546', 133), ('C15.378.553.546.184.564', 135), ('C23.550.414.896', 45), ('C23.550.470.790.500.800', 300), ('C23.550.835.525', 101), ('C23.550.835.900.712', 300), ('C23.888.119.344', 1694), ('C23.888.208', 128), ('C23.888.369', 248), ('C23.888.592.604.339', 223), ('C23.888.592.612.054', 158), ('C23.888.592.612.233', 110), ('C23.888.592.612.441', 365), ('C23.888.592.612.547.249', 227), ('C23.888.592.763.237', 74), ('C23.888.592.763.550', 0), ('C23.888.821.030', 158), ('C23.888.821.108', 148), ('C23.888.821.214', 700), ('C23.888.821.712', 199), ('C