# Example queries on COVID-19 Knowledge Graph

In [1]:
import os
from py2neo import Graph

In [2]:
graph = Graph(password="neo4jbinder")

### How many people have died of COVID-19?

In [3]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(:Country)
WHERE o.name = 'COVID-19'
RETURN sum(i.deaths) as Deaths
"""
graph.run(query).to_data_frame()

Unnamed: 0,Deaths
0,3348


### How many patients are confirmed, have died, or have recovered by country?

In [4]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(c:Country)
WHERE o.name = 'COVID-19'
RETURN c.name as Country, i.confirmed as Confirmed, i.deaths as Deaths, i.recovered as Recovered
ORDER BY i.confirmed DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,Country,Confirmed,Deaths,Recovered
0,China,80422,3013,52240
1,South Korea,6088,35,41
2,Italy,3858,148,414
3,Iran,3513,107,739
4,Others,706,6,10
...,...,...,...,...
85,Tunisia,1,0,0
86,North Macedonia,1,0,0
87,Cambodia,1,0,1
88,South Africa,1,0,0


### Which pathogen causes the COVID-19 outbreak?

In [5]:
query = """
MATCH (p:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN p.name as Pathogen, p.taxonomyId as TaxonomyId
"""
graph.run(query).to_data_frame()

Unnamed: 0,Pathogen,TaxonomyId
0,Wuhan seafood market pneumonia virus,269749


### What are the known strains of the COVID-19 virus?

In [6]:
query = """
MATCH (s:Strain)<-[:HAS_STRAIN]-(:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN s.name as Strain
"""
graph.run(query).to_data_frame()

Unnamed: 0,Strain
0,Korea/KCDC12/2020
1,Japan/TY-WK-012/2020
2,Switzerland/1000477377/2020
3,Brazil/SPBR-01/2020
4,Germany/Baden-Wuerttemberg-1/2020
...,...
204,Wuhan/IPBCAMS-WH-05/2020
205,Australia/VIC01/2020
206,pangolin/Guangdong/P2S/2019
207,China/WH-09/2020


### Which strains are found in each state?

In [7]:
query = """
MATCH (st:State)<-[:FOUND_IN]-(s:Strain)<-[:HAS_STRAIN]-(:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN st.name as State, s.name as Strain
ORDER BY st.name
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Strain
0,Anhui,Hefei/2/2020
1,Anhui,Anhui/SZ005/2020
2,Arizona,USA/AZ1/2020
3,Beijing,Beijing/IVDC-BJ-005/2020
4,British Columbia,Canada/BC_37_0-2/2020
...,...,...
132,Yunnan,Yunnan/IVDC-YN-003/2020
133,Zhejiang,Hangzhou/HZ-1/2020
134,Zhejiang,Hangzhou/HZCDC0001/2020
135,Zhejiang,Zhejiang/WZ-01/2020


### Which strain is the most deadly?
Unfortunately, we can't answer that question. Patient data are not available for specific strains.

In [8]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(st:State)<-[:FOUND_IN]-(s:Strain)
WHERE o.name = 'COVID-19' AND i.confirmed > 0 AND i.deaths > 0 AND i.recovered > 0
RETURN st.name as State,  s.name as Strain, i.confirmed as Confirmed, i.deaths as Death, i.recovered as Recovered, toFloat(i.deaths)/toFloat(i.confirmed) as `Death/Confirmed Ratio`, toFloat(i.deaths)/toFloat(i.recovered) as `Death/Recovered Ratio`
ORDER BY toFloat(i.deaths)/toFloat(i.recovered) DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Strain,Confirmed,Death,Recovered,Death/Confirmed Ratio,Death/Recovered Ratio
0,Washington,USA/WA2/2020,70,11,1,0.157143,11.00000
1,Washington,USA/WA6-UW3/2020,70,11,1,0.157143,11.00000
2,Washington,USA/WA8-UW5/2020,70,11,1,0.157143,11.00000
3,Washington,USA/WA9-UW6/2020,70,11,1,0.157143,11.00000
4,Washington,USA/WA4-UW2/2020,70,11,1,0.157143,11.00000
...,...,...,...,...,...,...,...
114,Jiangxi,Jiangxi/IVDC-JX-002/2020,935,1,901,0.001070,0.00111
115,Zhejiang,Hangzhou/HZCDC0001/2020,1215,1,1124,0.000823,0.00089
116,Zhejiang,Zhejiang/WZ-01/2020,1215,1,1124,0.000823,0.00089
117,Zhejiang,Hangzhou/HZ-1/2020,1215,1,1124,0.000823,0.00089


### What is the number of confirmed cases in China as function of the distance from Hubei?
There is no clear relationship based on distance. We need to include other factors, such as travel information.

In [9]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(st:State)-[:LOCATED_IN]->(k:Country)
MATCH (ref:State)
WHERE ref.name = "Hubei" AND k.name = 'China' AND o.name = 'COVID-19'
RETURN st.name as State, round(distance(ref.location, st.location)/1000.0) AS `Distance [km]`, i.confirmed as Confirmed
ORDER by round(distance(ref.location, st.location))
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Distance [km],Confirmed
0,Hubei,0.0,67466
1,Henan,347.0,1272
2,Hunan,379.0,1018
3,Chongqing,434.0,576
4,Anhui,480.0,990
5,Jiangxi,502.0,935
6,Shaanxi,566.0,245
7,Guizhou,701.0,146
8,Jiangsu,714.0,631
9,Shanxi,735.0,133
