In [4]:
from neo4j import GraphDatabase, basic_auth
from dotenv import load_dotenv
import os

load_dotenv()

neo4jUser = os.getenv("NEO4J_USER")
neo4jPwd = os.getenv("NEO4J_PASSWORD_DS")
neo4jUrl = os.getenv("NEO4j_BOLT_DS")

driver = GraphDatabase.driver(neo4jUrl,auth=basic_auth(neo4jUser, neo4jPwd))

In [5]:
## Prepear Analysis Similarity Algorithm

# Connect Analysis to quantitation, distribution and morphology properties

with driver.session() as session:   
    session.run("""
        MATCH (n:Analysis)-->(:ReporterIncubation)-->(r:Reporter)
        MERGE (n)-[:ANALYSIS_REL {strength: 1}]->(r)
    """)
  
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(:RegionRecord)-->(b:BrainRegion)
        MERGE (n)-[:ANALYSIS_REL {strength: 1}]->(b)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(r:CellularRegion)
        MERGE (n)-[:ANALYSIS_REL {strength: 1}]->(r)
    """)


In [None]:
  ### Not match on these yet
    
    session.run("""
        MATCH (n:Analysis)-->(:Experiment)-[:ANAESTHETIC]->(r:Solution)
        MERGE (n)-[:ANALYSIS_REL]->(r)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:Experiment)-[:PERFUSION_FIX_MEDIUM]->(r:Solution)
        MERGE (n)-[:ANALYSIS_REL]->(r)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:Experiment)-->(:Specimen)-->(s:Specie)
        MERGE (n)-[:ANALYSIS_REL]->(s)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(s:Software)
        MERGE (n)-[:ANALYSIS_REL]->(s)
    """)
    session.run("""
        MATCH (n:Analysis)-->(:DataType)-->(s:RegionZone)
        MERGE (n)-[:ANALYSIS_REL]->(s)
    """)

In [15]:
## Get "real" analysis similarity
# This is ONLY to see if we find anything interesting.
# The "real" similarities will be set on the website
# the "_all.csv" are based on Reporter, CellTYpe and BrainRegion only
import pandas as pd

df = pd.read_csv("Data/csvs/graph/analyses_similarity_mouse_all.csv")

new_table = []
for index, row in df.iterrows():
    if(pd.isnull(row["id1"])):
        continue
    
    name1 = row["analysis1"].split("_")[:2]
    name2 = row["analysis2"].split("_")[:2]
    if(name1 != name2 and float(row["similarity"] > 0.5)):
        new_table.append([row["id1"], row["id2"], row["analysis1"], row["analysis2"], row["similarity"]])

# remove duplicated
for row1 in new_table:
    for row2 in new_table:
        if(row1[0] == row2[0] and row1[1] == row2[1] and row1[4] == row2[4]):
            new_table.remove(row2)
            
        
new_df =  pd.DataFrame(new_table, columns=["id1", "id2", "analysis1", "analysis2", "similarity"])
new_df.to_csv("Data/csvs/graph/reffined_analyses_similarity_mouse_all.csv")


In [2]:
## Is there somehting in the methods used that make a difference in the results?

cell_ids = []
region_ids = []

with driver.session() as session:    
    res = session.run("""
        Match (b:BrainRegion)
        WHERE size((:Analysis)-[:DATA_TYPE]->(:Quantitation)-[:REGION_RECORD]->(:RegionRecord)-[:PRIMARY_REGION]->(b)) >=20
        return b.id
    
    """)
    for record in res:
        region_ids.append(record["b.id"])
    res = session.run("""
        Match (c:CellType)
        WHERE size((c)<-[:CELL_TYPE_PUTATIVE]-(:Analysis)) >=20
        return c.id
    """)
    for record in res:
        cell_ids.append(record["c.id"])

print(len(cell_ids), len(region_ids))
queries = []
        
for cell_id in cell_ids:
    for region_id in region_ids:
        q = """
        MATCH (c:CellType)<-[:CELL_TYPE_PUTATIVE]-(n:Analysis)-[:DATA_TYPE]->(:Quantitation)-[:REGION_RECORD]->(:RegionRecord)-[:PRIMARY_REGION]->(b:BrainRegion)
        WHERE c.id ="%s" AND b.id = "%s"
        return COUNT(DISTINCT n.id) as analysisCount, c.id, c.name, b.id, b.name
        
        """ % (cell_id, region_id)
        queries.append(q)
        
print(len(queries), queries[0])
results = []
for query in queries:
    with driver.session() as session:  
        print(".", end =" ")
        res = session.run(query)
        for rec in res:
            if(rec["analysisCount"] > 20):
                print(rec["analysisCount"], rec["c.id"], rec["c.name"], rec["b.id"], rec["b.name"])
                results.append((rec["analysisCount"], rec["c.id"], rec["c.name"], rec["b.id"], rec["b.name"]))
            
print(results)



8 11
88 
        MATCH (c:CellType)<-[:CELL_TYPE_PUTATIVE]-(n:Analysis)-[:DATA_TYPE]->(:Quantitation)-[:REGION_RECORD]->(:RegionRecord)-[:PRIMARY_REGION]->(b:BrainRegion)
        WHERE c.id ="8" AND b.id = "1"
        return COUNT(DISTINCT n.id) as analysisCount, c.id, c.name, b.id, b.name
        
        
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 72 14 Tyrosine hydroxylase expressing 10 Pars compacta
. . . . . 25 14 Tyrosine hydroxylase expressing 26 Substantia nigra
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37 42 Neuron 1 Caudoputamen
. 29 42 Neuron 2 Caudoputamen
. . . . 27 42 Neuron 10 Pars compacta
. . . . . [(72, '14', 'Tyrosine hydroxylase expressing', '10', 'Pars compacta'), (25, '14', 'Tyrosine hydroxylase expressing', '26', 'Substantia nigra'), (37, '42', 'Neuron', '1', 'Caudoputamen'), (29, '42', 'Neuron', '2', 'Caudoputamen'), (27, '42', 'Neuron', '10', 'Pars compacta')]


In [None]:
results = [
    (72, '14', 'Tyrosine hydroxylase expressing', '10', 'Pars compacta'), 
    (25, '14', 'Tyrosine hydroxylase expressing', '26', 'Substantia nigra'), 
    (37, '42', 'Neuron', '1', 'Caudoputamen'), 
    (29, '42', 'Neuron', '2', 'Caudoputamen'), 
    (27, '42', 'Neuron', '10', 'Pars compacta')
]


