# Example queries on COVID-19 Knowledge Graph

In [1]:
import os
from py2neo import Graph

In [2]:
graph = Graph(password="neo4jbinder")

### How many people have died of COVID-19?

In [3]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(:Country)
WHERE o.name = 'COVID-19'
RETURN sum(i.deaths) as Deaths
"""
graph.run(query).to_data_frame()

Unnamed: 0,Deaths
0,1868


### How many patients are confirmed, have died, or have recovered by country?

In [4]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(c:Country)
WHERE o.name = 'COVID-19'
RETURN c.name as Country, i.confirmed as Confirmed, i.deaths as Deaths, i.recovered as Recovered
ORDER BY i.confirmed DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,Country,Confirmed,Deaths,Recovered
0,China,72364,1863,12455
1,Others,454,0,0
2,Singapore,77,0,24
3,Japan,66,1,12
4,Hong Kong,60,1,2
5,Thailand,35,0,15
6,South Korea,30,0,10
7,Malaysia,22,0,7
8,Taiwan,22,1,2
9,Germany,16,0,1


### Which pathogen causes the COVID-19 outbreak?

In [5]:
query = """
MATCH (p:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN p.name as Pathogen, p.taxonomyId as TaxonomyId
"""
graph.run(query).to_data_frame()

Unnamed: 0,Pathogen,TaxonomyId
0,Wuhan seafood market pneumonia virus,269749


### What are the known strains of the COVID-19 virus?

In [6]:
query = """
MATCH (s:Strain)<-[:HAS_STRAIN]-(:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN s.name as Strain
"""
graph.run(query).to_data_frame()

Unnamed: 0,Strain
0,France/IDF0626/2020
1,Philippines/026N/2020
2,HongKong/VM20001218/2020
3,Italy/INMI2/2020
4,HongKong/VM20002343/2020
...,...
141,USA/CA2/2020
142,Wuhan/IPBCAMS-WH-01/2019
143,Taiwan/NTU01/2020
144,Shenzhen/SZTH-004/2020


### Which strains are found in each state?

In [7]:
query = """
MATCH (st:State)<-[:FOUND_IN]-(s:Strain)<-[:HAS_STRAIN]-(:Pathogen)-[:CAUSES]->(o:InfectiousDiseaseOutbreak)
WHERE o.name = 'COVID-19'
RETURN st.name as State, s.name as Strain
ORDER BY st.name
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Strain
0,Arizona,USA/AZ1/2020
1,Beijing,Beijing/IVDC-BJ-005/2020
2,California,USA/CA3/2020
3,California,USA/CA6/2020
4,California,USA/CA1/2020
...,...,...
85,Yunnan,bat/Yunnan/RaTG13/2013
86,Zhejiang,Zhejiang/WZ-02/2020
87,Zhejiang,Zhejiang/WZ-01/2020
88,Zhejiang,Hangzhou/HZCDC0001/2020


### Which strain is the most deadly?
Unfortunately, we can't answer that question. Patient data are not available for specific strains.

In [8]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(st:State)<-[:FOUND_IN]-(s:Strain)
WHERE o.name = 'COVID-19' AND i.confirmed > 0 AND i.deaths > 0 AND i.recovered > 0
RETURN st.name as State,  s.name as Strain, i.confirmed as Confirmed, i.deaths as Death, i.recovered as Recovered, toFloat(i.deaths)/toFloat(i.confirmed) as `Death/Confirmed Ratio`, toFloat(i.deaths)/toFloat(i.recovered) as `Death/Recovered Ratio`
ORDER BY toFloat(i.deaths)/toFloat(i.recovered) DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Strain,Confirmed,Death,Recovered,Death/Confirmed Ratio,Death/Recovered Ratio
0,Taiwan,Taiwan/NTU02/2020,22,1,2,0.045455,0.500000
1,Taiwan,Taiwan/NTU01/2020,22,1,2,0.045455,0.500000
2,Taiwan,Taiwan/2/2020,22,1,2,0.045455,0.500000
3,Hubei,Wuhan/IPBCAMS-WH-01/2019,59989,1789,7862,0.029822,0.227550
4,Hubei,Wuhan/WH03/2020,59989,1789,7862,0.029822,0.227550
...,...,...,...,...,...,...,...
58,Guangdong,Guangdong/20SF014/2020,1322,4,524,0.003026,0.007634
59,Guangdong,Guangdong/20SF028/2020,1322,4,524,0.003026,0.007634
60,Guangdong,Foshan/20SF207/2020,1322,4,524,0.003026,0.007634
61,Shanghai,Shanghai/IVDC-SH-001/2020,333,1,161,0.003003,0.006211


### What is the number of confirmed cases in China as function of the distance from Hubei?
There is no clear relationship based on distance. We need to include other factors, such as travel information.

In [9]:
query = """
MATCH (o:InfectiousDiseaseOutbreak)-[i:OCCURED_IN]->(st:State)-[:LOCATED_IN]->(k:Country)
MATCH (ref:State)
WHERE ref.name = "Hubei" AND k.name = 'China' AND o.name = 'COVID-19'
RETURN st.name as State, round(distance(ref.location, st.location)/1000.0) AS `Distance [km]`, i.confirmed as Confirmed
ORDER by round(distance(ref.location, st.location))
"""
graph.run(query).to_data_frame()

Unnamed: 0,State,Distance [km],Confirmed
0,Hubei,0.0,59989
1,Henan,347.0,1246
2,Hunan,379.0,1006
3,Chongqing,434.0,553
4,Anhui,480.0,973
5,Jiangxi,502.0,930
6,Shaanxi,566.0,240
7,Guizhou,701.0,146
8,Jiangsu,714.0,626
9,Shanxi,735.0,130
