In [3]:
### Loading Credentials from local file; 
### this cell is meant to be deleted before publishing
import yaml

with open("../creds.yml", 'r') as ymlfile:
    cfg = yaml.safe_load(ymlfile)

uri = cfg["sonar_creds"]["uri"]
user = cfg["sonar_creds"]["user"]
password = cfg["sonar_creds"]["pass"]

## Check Physiology related topic terms

Search for "hysiolog" as substring for Physiology to retrieve every possible string containing.


In [4]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver(uri, auth=(user, password))

query = """
MATCH (t:TopicTerm)
WHERE t.Name CONTAINS "hysiolog"
RETURN DISTINCT(t.Name)
"""

with driver.session() as session:
    result = session.run(query).data()
    
result

[{'(t.Name)': 'Arbeitsphysiologie'},
 {'(t.Name)': 'Neurophysiologie'},
 {'(t.Name)': 'Pathophysiologie'},
 {'(t.Name)': 'Pflanzenphysiologie'},
 {'(t.Name)': 'Physiologie'},
 {'(t.Name)': 'Sinnesphysiologie'},
 {'(t.Name)': 'Tierphysiologie'},
 {'(t.Name)': 'Physiologische Chemie'},
 {'(t.Name)': 'Physiologische Psychologie'},
 {'(t.Name)': 'Sprachphysiologie'},
 {'(t.Name)': 'Sportphysiologie'},
 {'(t.Name)': 'Leistungsphysiologie'},
 {'(t.Name)': 'Physiologische Psychiatrie'},
 {'(t.Name)': 'Elektrophysiologie'},
 {'(t.Name)': 'Altersphysiologie'},
 {'(t.Name)': 'Bewegungsphysiologie'},
 {'(t.Name)': 'Entwicklungsphysiologie'},
 {'(t.Name)': 'Ernährungsphysiologie'},
 {'(t.Name)': 'Ertragsphysiologie'},
 {'(t.Name)': 'Histophysiologie'},
 {'(t.Name)': 'Höhenphysiologie'},
 {'(t.Name)': 'Nacherntephysiologie'},
 {'(t.Name)': 'Physiologische Optik'},
 {'(t.Name)': 'Physiologische Uhr'},
 {'(t.Name)': 'Psychophysiologische Diagnostik'},
 {'(t.Name)': 'Stoffwechselphysiologie'},
 {'(t

With this in mind we can create a full network that contains every person connected to any kind of Physiological topic term. Also, we use the function `apoc.algo.cover(n)` to retrieve any kind of relationship between the persons connected to Physiological topic terms.

In [6]:
from helper_functions.helper_fun import to_nx_graph

query = """
MATCH (t:TopicTerm)-[r]-(n:PerName)
WHERE t.Name CONTAINS "hysiolog"
RETURN *
"""

driver = GraphDatabase.driver(uri, auth=(user, password))

G = to_nx_graph(neo4j_driver = driver, 
                query = query)

Check which topic terms aren't present in query result:

In [7]:
import numpy as np

type_list = []
for node in list(G.nodes):
    if G.nodes[node]["type"] == "TopicTerm":
        type_list.append((G.nodes[node]["label"]))

np.setdiff1d([d["(t.Name)"] for d in result], type_list)

array(['Altersphysiologie', 'Bewegungsphysiologie',
       'Elektrophysiologische Untersuchung', 'Ertragsphysiologie',
       'Experimentelle Physiologie', 'Histophysiologie',
       'Ignaz-L.-Lieben-Preis für Physik, Chemie und Physiologie',
       'Muskelphysiologie', 'Physiologische Optik', 'Physiologische Uhr',
       'Psychophysiologische Diagnostik', 'Reizphysiologie',
       'Sprachphysiologie', 'Tauchphysiologie', 'Umweltphysiologie',
       'Vergleichende Neurophysiologie', 'Vergleichende Physiologie',
       'Zellphysiologie'], dtype='<U57')

In [7]:
from helper_functions.helper_fun import to_nx_graph
from pyvis.network import Network

nt = Network('750px', '100%', notebook=True, directed = True)
nt.from_nx(G)
nt.set_edge_smooth("dynamic")
#nt.show('./html_networks/physiological_net.html')

# Full network query

In [42]:
query = """
MATCH (t:TopicTerm),
      (n:PerName)-[r]-(t)
WHERE t.Name CONTAINS "hysiolog"
WITH [x in collect(t)+collect(n)|id(x)] as collectedIds MATCH (n2) WHERE id(n2) in collectedIds
WITH DISTINCT(n2) as nodes
MATCH (nodes)-[r:RelationToPerName|RelationToResource|RelationToGeoName|RelationToTopicTerm|SocialRelation]-(friends)
WITH COLLECT(nodes)+COLLECT(friends) as final_nodes, r
RETURN DISTINCT(final_nodes), r
LIMIT 100
"""

driver = GraphDatabase.driver(uri, auth=(user, password))

G = to_nx_graph(neo4j_driver = driver, 
                query = query)

KeyboardInterrupt: 

In [35]:
query = """
MATCH (t:TopicTerm), (n:PerName)--(t)
WHERE t.Name CONTAINS "hysiolog"
WITH [x in collect(t)+collect(n)|id(x)] as collectedIds MATCH (n2) WHERE id(n2) in collectedIds
MATCH (startNodes)-[r:RelationToPerName|RelationToResource|RelationToGeoName|RelationToTopicTerm|SocialRelation]-(x)
WHERE id(startNodes) IN collectedIds
RETURN DISTINCT(startNodes), r, x
LIMIT 1000
"""

##########################

query = """
MATCH (t:TopicTerm)-[rel1:RelationToPerName|RelationToResource|RelationToGeoName|RelationToTopicTerm|SocialRelation]-(friends)- [rel2:RelationToTopicTerm|RelationToGeoName|RelationToCorpName|RelationToMeetName|RelationToUniTitle]-(friendsfriends)
WHERE (t.Name CONTAINS "Physiolog" AND friends:PerName)
OPTIONAL MATCH (t)-[rel1]-(p:PerName)-[rel2]-(p2:PerName)--(t)
OPTIONAL MATCH (t)-[rel1]-(r:Resource)-[rel3:RelationToPerName]-(p)-[rel2]-(p2)--(r:Resource)
OPTIONAL MATCH (t)--(p)--(r)
RETURN *
"""


######################

query = """
MATCH (t:TopicTerm)
WHERE (t.Name CONTAINS "Physiolog")
OPTIONAL MATCH (t)-[rel1:RelationToPerName|RelationToResource|RelationToGeoName|RelationToTopicTerm|SocialRelation]-(friends)- [rel2:RelationToTopicTerm|RelationToGeoName|RelationToCorpName|RelationToMeetName|RelationToUniTitle]-(friendsfriends)
RETURN *
LIMIT 5000
"""


driver = GraphDatabase.driver(uri, auth=(user, password))

G = to_nx_graph(neo4j_driver = driver, 
                query = query)

In [38]:
from helper_functions.helper_fun import to_nx_graph
from pyvis.network import Network

nt = Network('750px', '100%', notebook=True, directed = True)
nt.from_nx(G)
nt.set_edge_smooth("dynamic")
#nt.show('./html_networks/test.html')

In [36]:
print("edges", G.number_of_edges())
print("nodes", G.number_of_nodes())

edges 5002
nodes 4671


In [31]:
print("edges", G.number_of_edges())
print("nodes", G.number_of_nodes())

edges 4070
nodes 2044


## Cypher Projection of Full Network

In [None]:
query = """
CALL gds.graph.create.cypher(
    'name',
    'MATCH QUERY',
    'MATCH QUERY RETURN *'
)
"""

## Betweenness Centrality