In [1]:
from neo4j import GraphDatabase, basic_auth
from dotenv import load_dotenv
import os

load_dotenv()

neo4jUser = os.getenv("NEO4J_USER")
neo4jPwd = os.getenv("NEO4J_PASSWORD_DS")
neo4jUrl = os.getenv("NEO4j_BOLT_DS")

driver = GraphDatabase.driver(neo4jUrl,auth=basic_auth(neo4jUser, neo4jPwd))

## Create the graph projection

Create a projection of analyses connected with brain region and cell type, per specie

For simplicity, we first add a special named relationship between the analyzes and brain region, cell type and specie.
This relationship is cleaned up after the algorithm is complete.

In [4]:
with driver.session() as session:
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(:RegionRecord)-->(b:BrainRegion)
        MERGE (n)-[:SIMILARITY_ALGORITHM]->(b)
    """)
    session.run("""
        MATCH (n:Analysis)-->(c:CellType)
        MERGE (n)-[:SIMILARITY_ALGORITHM]->(c)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:Specimen)-->(s:Specie)
        MERGE (n)-[:SIMILARITY_ALGORITHM]->(s)
    """)



In [5]:
with driver.session() as session:
    session.run("""
        CALL gds.graph.create.cypher(
            'analyses-rat',
            'MATCH (n:Analysis)-[:SIMILARITY_ALGORITHM]->(m) WHERE (n)-->(:Specie {id: "1"}) WITH collect(n)+collect(m) as nodes UNWIND nodes as x RETURN id(x) as id',
            'MATCH (n:Analysis)-[:SIMILARITY_ALGORITHM]->(m) WHERE (n)-->(:Specie {id: "1"}) RETURN id(n) AS source, id(m) AS target'
        )
    """)
    session.run("""
        CALL gds.graph.create.cypher(
            'analyses-mouse',
            'MATCH (n:Analysis)-[:SIMILARITY_ALGORITHM]->(m) WHERE (n)-->(:Specie {id: "2"}) WITH collect(n)+collect(m) as nodes UNWIND nodes as x RETURN id(x) as id',
            'MATCH (n:Analysis)-[:SIMILARITY_ALGORITHM]->(m) WHERE (n)-->(:Specie {id: "2"}) RETURN id(n) AS source, id(m) AS target'
        )
    """)



In [19]:
import pandas as pd

def run_similarity_store_database_and_csv(specie_id):
    similarity_rows = []
    projection = "analyses-rat" if specie_id == 1 else "analyses-mouse"
    with driver.session() as session:
        res = session.run("""
            CALL gds.nodeSimilarity.stream(
                '%s',
                {
                    degreeCutoff: 3,
                    similarityCutoff: 1.0
                }
            )
            YIELD node1, node2, similarity
            RETURN gds.util.asNode(node1).id as id1, gds.util.asNode(node2).id as id2, similarity
        """ % projection)
        for record in res:
            similarity_rows.append([record["id1"], record["id1"], record["similarity"]])


    # Stores the result in a CSV file for the genereal database import
    similairty_df = pd.DataFrame(similarity_rows, columns = ["id1", "id2", "score"])
    similairty_df.to_csv("..\Data/csvs/basal_ganglia/regions/analysis_similarity_%s.csv" % ("rat" if specie_id == 1 else "mouse"), index=1)    

run_similarity_store_database_and_csv(1)
run_similarity_store_database_and_csv(2)

<Record id1='65' id2='57' similarity=1.0>
<Record id1='65' id2='70' similarity=1.0>
<Record id1='65' id2='64' similarity=1.0>
<Record id1='65' id2='58' similarity=1.0>
<Record id1='65' id2='66' similarity=1.0>
<Record id1='65' id2='69' similarity=1.0>
<Record id1='69' id2='57' similarity=1.0>
<Record id1='69' id2='70' similarity=1.0>
<Record id1='69' id2='64' similarity=1.0>
<Record id1='69' id2='58' similarity=1.0>
<Record id1='69' id2='66' similarity=1.0>
<Record id1='69' id2='65' similarity=1.0>
<Record id1='66' id2='57' similarity=1.0>
<Record id1='66' id2='70' similarity=1.0>
<Record id1='66' id2='64' similarity=1.0>
<Record id1='66' id2='58' similarity=1.0>
<Record id1='66' id2='69' similarity=1.0>
<Record id1='66' id2='65' similarity=1.0>
<Record id1='57' id2='66' similarity=1.0>
<Record id1='57' id2='70' similarity=1.0>
<Record id1='57' id2='64' similarity=1.0>
<Record id1='57' id2='58' similarity=1.0>
<Record id1='57' id2='69' similarity=1.0>
<Record id1='57' id2='65' similari

<Record id1='277' id2='430' similarity=1.0>
<Record id1='277' id2='429' similarity=1.0>
<Record id1='277' id2='391' similarity=1.0>
<Record id1='277' id2='111' similarity=1.0>
<Record id1='277' id2='186' similarity=1.0>
<Record id1='277' id2='419' similarity=1.0>
<Record id1='277' id2='492' similarity=1.0>
<Record id1='277' id2='278' similarity=1.0>
<Record id1='277' id2='408' similarity=1.0>
<Record id1='277' id2='470' similarity=1.0>
<Record id1='391' id2='430' similarity=1.0>
<Record id1='391' id2='429' similarity=1.0>
<Record id1='391' id2='277' similarity=1.0>
<Record id1='391' id2='111' similarity=1.0>
<Record id1='391' id2='186' similarity=1.0>
<Record id1='391' id2='419' similarity=1.0>
<Record id1='391' id2='492' similarity=1.0>
<Record id1='391' id2='278' similarity=1.0>
<Record id1='391' id2='408' similarity=1.0>
<Record id1='391' id2='470' similarity=1.0>
<Record id1='419' id2='430' similarity=1.0>
<Record id1='419' id2='429' similarity=1.0>
<Record id1='419' id2='277' simi

<Record id1='145' id2='250' similarity=1.0>
<Record id1='474' id2='351' similarity=1.0>
<Record id1='474' id2='275' similarity=1.0>
<Record id1='474' id2='229' similarity=1.0>
<Record id1='474' id2='228' similarity=1.0>
<Record id1='474' id2='472' similarity=1.0>
<Record id1='474' id2='485' similarity=1.0>
<Record id1='474' id2='357' similarity=1.0>
<Record id1='235' id2='681' similarity=1.0>
<Record id1='235' id2='651' similarity=1.0>
<Record id1='235' id2='552' similarity=1.0>
<Record id1='235' id2='742' similarity=1.0>
<Record id1='235' id2='719' similarity=1.0>
<Record id1='235' id2='557' similarity=1.0>
<Record id1='235' id2='701' similarity=1.0>
<Record id1='235' id2='226' similarity=1.0>
<Record id1='235' id2='238' similarity=1.0>
<Record id1='235' id2='690' similarity=1.0>
<Record id1='238' id2='681' similarity=1.0>
<Record id1='238' id2='651' similarity=1.0>
<Record id1='238' id2='552' similarity=1.0>
<Record id1='238' id2='742' similarity=1.0>
<Record id1='238' id2='719' simi

<Record id1='251' id2='145' similarity=1.0>
<Record id1='717' id2='238' similarity=1.0>
<Record id1='717' id2='226' similarity=1.0>
<Record id1='717' id2='701' similarity=1.0>
<Record id1='717' id2='557' similarity=1.0>
<Record id1='717' id2='742' similarity=1.0>
<Record id1='717' id2='552' similarity=1.0>
<Record id1='717' id2='651' similarity=1.0>
<Record id1='717' id2='681' similarity=1.0>
<Record id1='717' id2='235' similarity=1.0>
<Record id1='717' id2='690' similarity=1.0>
<Record id1='524' id2='439' similarity=1.0>
<Record id1='524' id2='434' similarity=1.0>
<Record id1='524' id2='428' similarity=1.0>
<Record id1='524' id2='435' similarity=1.0>
<Record id1='524' id2='407' similarity=1.0>
<Record id1='524' id2='426' similarity=1.0>
<Record id1='524' id2='438' similarity=1.0>
<Record id1='524' id2='427' similarity=1.0>
<Record id1='524' id2='436' similarity=1.0>
<Record id1='524' id2='437' similarity=1.0>
<Record id1='472' id2='485' similarity=1.0>
<Record id1='472' id2='275' simi

<Record id1='229' id2='275' similarity=1.0>
<Record id1='229' id2='351' similarity=1.0>
<Record id1='229' id2='357' similarity=1.0>
<Record id1='229' id2='474' similarity=1.0>
<Record id1='407' id2='439' similarity=1.0>
<Record id1='407' id2='434' similarity=1.0>
<Record id1='407' id2='438' similarity=1.0>
<Record id1='407' id2='426' similarity=1.0>
<Record id1='407' id2='435' similarity=1.0>
<Record id1='407' id2='428' similarity=1.0>
<Record id1='407' id2='524' similarity=1.0>
<Record id1='407' id2='427' similarity=1.0>
<Record id1='407' id2='436' similarity=1.0>
<Record id1='407' id2='437' similarity=1.0>
<Record id1='269' id2='238' similarity=1.0>
<Record id1='269' id2='226' similarity=1.0>
<Record id1='269' id2='701' similarity=1.0>
<Record id1='269' id2='557' similarity=1.0>
<Record id1='269' id2='742' similarity=1.0>
<Record id1='269' id2='552' similarity=1.0>
<Record id1='269' id2='651' similarity=1.0>
<Record id1='269' id2='681' similarity=1.0>
<Record id1='269' id2='235' simi

FileNotFoundError: [Errno 2] No such file or directory: '..\\Data/csvs/basal_ganglia/regions/analysis_similarity_rat.csv'

## Clean-up
We remove the projected graph, and delete the created relationship `SIMILARITY_ALGORITHM`

In [3]:
with driver.session() as session:
    session.run("call gds.graph.drop('analyses-rat')")
    session.run("call gds.graph.drop('analyses-mouse')")
    session.run("""
        MATCH (n:Analysis)-[r:SIMILARITY_ALGORITHM]->()
        DELETE r
    """)

In [4]:
  ### MATCH All these for part 2 (looking at method differences)
with driver.session() as session:   
    session.run("""
        MATCH (n:Analysis)-->(:ReporterIncubation)-->(r:Reporter)
        MERGE (n)-[:ANA_DATA {strength: 1}]->(r)
    """)
  
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(:RegionRecord)-->(b:BrainRegion)
        MERGE (n)-[:ANA_DATA {strength: 1}]->(b)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(r:CellularRegion)
        MERGE (n)-[:ANA_DATA {strength: 1}]->(r)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:Experiment)-[:ANAESTHETIC]->(r:Solution)
        MERGE (n)-[:ANESTHETIC]->(r)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:Experiment)-[:PERFUSION_FIX_MEDIUM]->(r:Solution)
        MERGE (n)-[:PERFUSION]->(r)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:Experiment)-->(:Specimen)-->(s:Specie)
        MERGE (n)-[:ANA_DATA]->(s)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(s:Software)
        MERGE (n)-[:ANA_DATA]->(s)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(s:RegionZone)
        MERGE (n)-[:ANA_DATA]->(s)
    """)

Failed to read from defunct connection Address(host='100.26.227.192', port=34124) (Address(host='100.26.227.192', port=34124))


ServiceUnavailable: Failed to read from defunct connection Address(host='100.26.227.192', port=34124) (Address(host='100.26.227.192', port=34124))

In [15]:
## Get "real" analysis similarity
# This is ONLY to see if we find anything interesting.
# The "real" similarities will be set on the website
# the "_all.csv" are based on Reporter, CellTYpe and BrainRegion only
import pandas as pd

df = pd.read_csv("Data/csvs/graph/analyses_similarity_mouse_all.csv")

new_table = []
for index, row in df.iterrows():
    if(pd.isnull(row["id1"])):
        continue
    
    name1 = row["analysis1"].split("_")[:2]
    name2 = row["analysis2"].split("_")[:2]
    if(name1 != name2 and float(row["similarity"] > 0.5)):
        new_table.append([row["id1"], row["id2"], row["analysis1"], row["analysis2"], row["similarity"]])

# remove duplicated
for row1 in new_table:
    for row2 in new_table:
        if(row1[0] == row2[0] and row1[1] == row2[1] and row1[4] == row2[4]):
            new_table.remove(row2)
            
        
new_df =  pd.DataFrame(new_table, columns=["id1", "id2", "analysis1", "analysis2", "similarity"])
new_df.to_csv("Data/csvs/graph/reffined_analyses_similarity_mouse_all.csv")


In [2]:
## Is there somehting in the methods used that make a difference in the results?

cell_ids = []
region_ids = []

with driver.session() as session:    
    res = session.run("""
        Match (b:BrainRegion)
        WHERE size((:Analysis)-[:DATA_TYPE]->(:Quantitation)-[:REGION_RECORD]->(:RegionRecord)-[:PRIMARY_REGION]->(b)) >=20
        return b.id
    
    """)
    for record in res:
        region_ids.append(record["b.id"])
    res = session.run("""
        Match (c:CellType)
        WHERE size((c)<-[:CELL_TYPE_PUTATIVE]-(:Analysis)) >=20
        return c.id
    """)
    for record in res:
        cell_ids.append(record["c.id"])

print(len(cell_ids), len(region_ids))
queries = []
        
for cell_id in cell_ids:
    for region_id in region_ids:
        q = """
        MATCH (c:CellType)<-[:CELL_TYPE_PUTATIVE]-(n:Analysis)-[:DATA_TYPE]->(:Quantitation)-[:REGION_RECORD]->(:RegionRecord)-[:PRIMARY_REGION]->(b:BrainRegion)
        WHERE c.id ="%s" AND b.id = "%s"
        return COUNT(DISTINCT n.id) as analysisCount, c.id, c.name, b.id, b.name
        
        """ % (cell_id, region_id)
        queries.append(q)
        
print(len(queries), queries[0])
results = []
for query in queries:
    with driver.session() as session:  
        print(".", end =" ")
        res = session.run(query)
        for rec in res:
            if(rec["analysisCount"] > 20):
                print(rec["analysisCount"], rec["c.id"], rec["c.name"], rec["b.id"], rec["b.name"])
                results.append((rec["analysisCount"], rec["c.id"], rec["c.name"], rec["b.id"], rec["b.name"]))
            
print(results)



8 11
88 
        MATCH (c:CellType)<-[:CELL_TYPE_PUTATIVE]-(n:Analysis)-[:DATA_TYPE]->(:Quantitation)-[:REGION_RECORD]->(:RegionRecord)-[:PRIMARY_REGION]->(b:BrainRegion)
        WHERE c.id ="8" AND b.id = "1"
        return COUNT(DISTINCT n.id) as analysisCount, c.id, c.name, b.id, b.name
        
        
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 72 14 Tyrosine hydroxylase expressing 10 Pars compacta
. . . . . 25 14 Tyrosine hydroxylase expressing 26 Substantia nigra
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37 42 Neuron 1 Caudoputamen
. 29 42 Neuron 2 Caudoputamen
. . . . 27 42 Neuron 10 Pars compacta
. . . . . [(72, '14', 'Tyrosine hydroxylase expressing', '10', 'Pars compacta'), (25, '14', 'Tyrosine hydroxylase expressing', '26', 'Substantia nigra'), (37, '42', 'Neuron', '1', 'Caudoputamen'), (29, '42', 'Neuron', '2', 'Caudoputamen'), (27, '42', 'Neuron', '10', 'Pars compacta')]


In [None]:
results = [
    (72, '14', 'Tyrosine hydroxylase expressing', '10', 'Pars compacta'), 
    (25, '14', 'Tyrosine hydroxylase expressing', '26', 'Substantia nigra'), 
    (37, '42', 'Neuron', '1', 'Caudoputamen'), 
    (29, '42', 'Neuron', '2', 'Caudoputamen'), 
    (27, '42', 'Neuron', '10', 'Pars compacta')
]

analyses_groups = []

with driver.session() as session:    
    for res in results:
        _, cellId, _, regId, _ = res
        ## I'M Making these into subgraphs in neo3j directly instead and perform node similarity, as it's only really two..
        analyses_query = """
        MATCH (a:Analysis)-->(r:Reporter)
        MATCH (a)-->(s:Strain)
        MATCH (a)-->(v:VisualizationProtocol)
        MATCH (a)-->(soft:Software)
        MATCH (a)-->()-->(m:Microscope)
        MATCH (a)-[:PERFUSION]->(perf:Solution)
        MATCH (a)-[:ANESTHETIC]->(ane:Solution)
        WHERE (a)-->(:CellType {id: "%s"}) AND (a)-->(:BrainRegion {id: "%s"})
        RETURN a.id, a.name, s.name, r.name
        """ % (cellId, regId)
        neo_res = session.run(query)
        ana = []
        for rec in neo_res:
            ana.push((rec["a.id"], rec[a.name])
        analyses_groups.push(ana)
                     

for group in analyses_groups:
    
    




In [None]:
## Graph for networkx
from neo4j import GraphDatabase, basic_auth
from dotenv import load_dotenv
import os

load_dotenv()

neo4jUser = os.getenv("NEO4J_USER")
neo4jPwd = os.getenv("NEO4J_PASSWORD")
neo4jUrl = os.getenv("NEO4j_BOLT")

driver = GraphDatabase.driver(neo4jUrl,auth=basic_auth(neo4jUser, neo4jPwd))

  ### MATCH All these for part 2 (looking at method differences)
with driver.session() as session:   
    session.run("""
        MATCH (n:Analysis)-->(i:ReporterIncubation)-->(r:Reporter)
        MERGE (n)-[:ANA_DATA:USED {weight: 1, type: i.Order}]->(r)
    """)
  
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(:RegionRecord)-[:PRIMARY_REGION]->(b:BrainRegion)
        MERGE (n)-[:ANA_DATA:OBSERVED_REGION {weight: 1, type: "primary"}]->(b)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(:RegionRecord)-[:SECONDARY_REGION]->(b:BrainRegion)
        MERGE (n)-[:ANA_DATA:OBSERVED_REGION {weight: 1, type: "secondary"}]->(b)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(r:CellularRegion)
        MERGE (n)-[:ANA_DATA:CELLULAR_REGION {weight: 1}]->(r)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:Experiment)-[:ANAESTHETIC]->(r:Solution)
        MERGE (n)-[:ANA_DATA:ANESTHETIC]->(r)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:Experiment)-[:PERFUSION_FIX_MEDIUM]->(r:Solution)
        MERGE (n)-[:ANA_DATA:PERFUSION]->(r)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:Experiment)-->(:Specimen)-->(s:Specie)
        MERGE (n)-[:ANA_DATA]->(s)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(s:Software)
        MERGE (n)-[:ANA_DATA]->(s)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(s:RegionZone)
        MERGE (n)-[:ANA_DATA]->(s)
    """)