# Example queries on COVID-19 Knowledge Graph

In [1]:
import os
from py2neo import Graph

In [2]:
graph = Graph(password="neo4jbinder")

### How many people have died of COVID-19?

In [3]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(:Country)
WHERE o.name = 'COVID-19'
RETURN sum(i.deaths) as Deaths
"""
graph.run(query).to_data_frame()

Unnamed: 0,Deaths
0,2007


### How many patients are confirmed, have died, or have recovered by country?

In [4]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(c:Country)
WHERE o.name = 'COVID-19'
RETURN c.name as Country, i.confirmed as Confirmed, i.deaths as Deaths, i.recovered as Recovered
ORDER BY i.confirmed DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,Country,Confirmed,Deaths,Recovered
0,China,74139,2002,14199
1,Others,542,0,0
2,Singapore,81,0,29
3,Japan,74,1,13
4,Hong Kong,62,1,2
5,Thailand,35,0,15
6,South Korea,31,0,12
7,USA,29,0,3
8,Malaysia,22,0,13
9,Taiwan,22,1,2


### Which pathogen causes the COVID-19 outbreak?

In [5]:
query = """
MATCH (p:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN p.name as Pathogen, p.taxonomyId as TaxonomyId
"""
graph.run(query).to_data_frame()

Unnamed: 0,Pathogen,TaxonomyId
0,Wuhan seafood market pneumonia virus,269749


### What are the known strains of the COVID-19 virus?

In [6]:
query = """
MATCH (s:Strain)<-[:HAS_STRAIN]-(:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN s.name as Strain
"""
graph.run(query).to_data_frame()

Unnamed: 0,Strain
0,Singapore/2/2020
1,Germany/BavPat1/2020
2,Kanagawa/1/2020
3,HongKong/VM20001988/2020
4,England/02/2020
...,...
149,Foshan/20SF211/2020
150,Wuhan/IPBCAMS-WH-05/2020
151,Guangzhou/20SF206/2020
152,Chongqing/YC01/2020


### Which strains are found in each state?

In [7]:
query = """
MATCH (st:State)<-[:FOUND_IN]-(s:Strain)<-[:HAS_STRAIN]-(:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN st.name as State, s.name as Strain
ORDER BY st.name
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Strain
0,Arizona,USA/AZ1/2020
1,Beijing,Beijing/IVDC-BJ-005/2020
2,California,USA/CA5/2020
3,California,USA/CA2/2020
4,California,USA/CA6/2020
...,...,...
88,Yunnan,Yunnan/IVDC-YN-003/2020
89,Zhejiang,Zhejiang/WZ-02/2020
90,Zhejiang,Zhejiang/WZ-01/2020
91,Zhejiang,Hangzhou/HZ-1/2020


### Which strain is the most deadly?
Unfortunately, we can't answer that question. Patient data are not available for specific strains.

In [8]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(st:State)<-[:FOUND_IN]-(s:Strain)
WHERE o.name = 'COVID-19' AND i.confirmed > 0 AND i.deaths > 0 AND i.recovered > 0
RETURN st.name as State,  s.name as Strain, i.confirmed as Confirmed, i.deaths as Death, i.recovered as Recovered, toFloat(i.deaths)/toFloat(i.confirmed) as `Death/Confirmed Ratio`, toFloat(i.deaths)/toFloat(i.recovered) as `Death/Recovered Ratio`
ORDER BY toFloat(i.deaths)/toFloat(i.recovered) DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Strain,Confirmed,Death,Recovered,Death/Confirmed Ratio,Death/Recovered Ratio
0,Taiwan,Taiwan/NTU01/2020,22,1,2,0.045455,0.500000
1,Taiwan,Taiwan/NTU02/2020,22,1,2,0.045455,0.500000
2,Taiwan,Taiwan/2/2020,22,1,2,0.045455,0.500000
3,Hubei,Wuhan/WH03/2020,61682,1921,9128,0.031144,0.210451
4,Hubei,Wuhan/IVDC-HB-04/2020,61682,1921,9128,0.031144,0.210451
...,...,...,...,...,...,...,...
59,Guangdong,Guangdong/20SF174/2020,1328,4,565,0.003012,0.007080
60,Guangdong,Foshan/20SF211/2020,1328,4,565,0.003012,0.007080
61,Guangdong,Guangzhou/20SF206/2020,1328,4,565,0.003012,0.007080
62,Shanghai,Shanghai/IVDC-SH-001/2020,333,1,177,0.003003,0.005650


### What is the number of confirmed cases in China as function of the distance from Hubei?
There is no clear relationship based on distance. We need to include other factors, such as travel information.

In [9]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(st:State)-[:LOCATED_IN]->(k:Country)
MATCH (ref:State)
WHERE ref.name = "Hubei" AND k.name = 'China' AND o.name = 'COVID-19'
RETURN st.name as State, round(distance(ref.location, st.location)/1000.0) AS `Distance [km]`, i.confirmed as Confirmed
ORDER by round(distance(ref.location, st.location))
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Distance [km],Confirmed
0,Hubei,0.0,61682
1,Henan,347.0,1257
2,Hunan,379.0,1007
3,Chongqing,434.0,555
4,Anhui,480.0,982
5,Jiangxi,502.0,933
6,Shaanxi,566.0,240
7,Guizhou,701.0,146
8,Jiangsu,714.0,629
9,Shanxi,735.0,131
