# Example queries on COVID-19 Knowledge Graph

In [1]:
import os
from py2neo import Graph

In [2]:
graph = Graph(password="neo4jbinder")

### How many people have died of COVID-19?

In [3]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(:Country)
WHERE o.name = 'COVID-19'
RETURN sum(i.deaths) as Deaths
"""
graph.run(query).to_data_frame()

Unnamed: 0,Deaths
0,1115


### How many patients are confirmed, have died, or have recovered by country?

In [4]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(c:Country)
WHERE o.name = 'COVID-19'
RETURN c.name as Country, i.confirmed as Confirmed, i.deaths as Deaths, i.recovered as Recovered
ORDER BY i.confirmed DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,Country,Confirmed,Deaths,Recovered
0,China,44641,1113,4730
1,Others,135,0,0
2,Hong Kong,49,1,0
3,Singapore,47,0,9
4,Thailand,33,0,10
5,South Korea,28,0,4
6,Japan,26,0,9
7,Malaysia,18,0,3
8,Taiwan,18,0,1
9,Germany,16,0,0


### Which pathogen causes the COVID-19 outbreak?

In [5]:
query = """
MATCH (p:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN p.name as Pathogen, p.taxonomyId as TaxonomyId
"""
graph.run(query).to_data_frame()

Unnamed: 0,Pathogen,TaxonomyId
0,Wuhan seafood market pneumonia virus,269749


### What are the known strains of the COVID-19 virus?

In [6]:
query = """
MATCH (s:Strain)<-[:HAS_STRAIN]-(:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN s.name as Strain
"""
graph.run(query).to_data_frame()

Unnamed: 0,Strain
0,France/IDF0372/2020
1,Finland/1/2020
2,Belgium/GHB-03021/2020
3,Singapore/3/2020
4,Japan/TY-WK-012/2020
...,...
96,Shenzhen/SZTH-001/2020
97,Wuhan/IPBCAMS-WH-05/2020
98,Shenzhen/SZTH-003/2020
99,Yunnan/IVDC-YN-003/2020


### Which strains are found in each state?

In [7]:
query = """
MATCH (st:State)<-[:FOUND_IN]-(s:Strain)<-[:HAS_STRAIN]-(:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN st.name as State, s.name as Strain
ORDER BY st.name
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Strain
0,Arizona,USA/AZ1/2020
1,Beijing,Beijing/IVDC-BJ-005/2020
2,California,USA/CA6/2020
3,California,USA/CA3/2020
4,California,USA/CA1/2020
...,...,...
75,Wisconsin,USA/WI1/2020
76,Yunnan,Yunnan/IVDC-YN-003/2020
77,Zhejiang,Zhejiang/WZ-01/2020
78,Zhejiang,Zhejiang/Hangzhou-1/2020


### Which strain is the most deadly?
Unfortunately, we can't answer that question. Patient data are not available for specific strains.

In [8]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(st:State)<-[:FOUND_IN]-(s:Strain)
WHERE o.name = 'COVID-19' AND i.confirmed > 0 AND i.deaths > 0 AND i.recovered > 0
RETURN st.name as State,  s.name as Strain, i.confirmed as Confirmed, i.deaths as Death, i.recovered as Recovered, toFloat(i.deaths)/toFloat(i.confirmed) as `Death/Confirmed Ratio`, toFloat(i.deaths)/toFloat(i.recovered) as `Death/Recovered Ratio`
ORDER BY toFloat(i.deaths)/toFloat(i.recovered) DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Strain,Confirmed,Death,Recovered,Death/Confirmed Ratio,Death/Recovered Ratio
0,Hubei,Wuhan/HBCDC-HB-01/2019,33366,1068,2639,0.032009,0.404699
1,Hubei,Wuhan/IVDC-HB-envF13-21/2020,33366,1068,2639,0.032009,0.404699
2,Hubei,Wuhan/IPBCAMS-WH-04/2019,33366,1068,2639,0.032009,0.404699
3,Hubei,Wuhan/IPBCAMS-WH-01/2019,33366,1068,2639,0.032009,0.404699
4,Hubei,Wuhan/IVDC-HB-04/2020,33366,1068,2639,0.032009,0.404699
5,Hubei,Wuhan/IPBCAMS-WH-02/2019,33366,1068,2639,0.032009,0.404699
6,Hubei,Wuhan/WH04/2020,33366,1068,2639,0.032009,0.404699
7,Hubei,Wuhan/WH05/2020,33366,1068,2639,0.032009,0.404699
8,Hubei,Wuhan/WIV05/2019,33366,1068,2639,0.032009,0.404699
9,Hubei,Wuhan/WIV07/2019,33366,1068,2639,0.032009,0.404699


### What is the number of confirmed cases in China as function of the distance from Hubei?
There is no clear relationship based on distance. We need to include other factors, such as travel information.

In [9]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(st:State)-[:LOCATED_IN]->(k:Country)
MATCH (ref:State)
WHERE ref.name = "Hubei" AND k.name = 'China' AND o.name = 'COVID-19'
RETURN st.name as State, round(distance(ref.location, st.location)/1000.0) AS `Distance [km]`, i.confirmed as Confirmed
ORDER by round(distance(ref.location, st.location))
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Distance [km],Confirmed
0,Hubei,0.0,33366
1,Henan,347.0,1135
2,Hunan,379.0,946
3,Chongqing,434.0,505
4,Anhui,480.0,889
5,Jiangxi,502.0,844
6,Shaanxi,566.0,219
7,Guizhou,701.0,131
8,Jiangsu,714.0,543
9,Shanxi,735.0,124
