In [1]:
from utils.graph_utils import Neo4jConnection
import numpy as np


def fetch_imaging_sessions_by_mouse(session_count_min=1, contains_tbd=False):
    _substr = ""
    if not contains_tbd:
        _substr = "NOT"
    return neocon.query_to_dataframe(f"""
        MATCH (m:Mouse)<-[i:IMAGED]-(ims:ImagingSession)
        WITH m, COUNT(DISTINCT ims) AS session_count
        WHERE session_count >= {session_count_min}
        AND {_substr} ( m.uid CONTAINS 'TBD' or m.uid CONTAINS 'nan' ) 
        MATCH (m)<-[i:IMAGED]-(ims)  // Re-match after filtering
        RETURN m.uid AS uid, COUNT(DISTINCT ims.filepath) AS session_count, COLLECT(ims.filepath) AS filepaths
        ORDER BY session_count DESC 
    """)

def fetch_imaging_session_count_across_mice(session_count_min=1, contains_tbd=False, tbd_filter=True):
    _filter = ""
    if tbd_filter:
        _substr = ""
        if not contains_tbd:
            _substr = "NOT"
        _filter = f"        AND {_substr} ( m.uid CONTAINS 'TBD' or m.uid CONTAINS 'nan' )"
    return neocon.query_to_value(f"""
        MATCH (m:Mouse)<-[i:IMAGED]-(ims:ImagingSession)
        WITH m, COUNT(DISTINCT ims) AS session_count
        WHERE session_count >= {session_count_min}
        {_filter}
        MATCH (m)<-[i:IMAGED]-(ims)  // Re-match after filtering
        RETURN COUNT(DISTINCT ims.filepath) AS session_count
    """)

def push_dataframe_to_label_matches(df, 
                                    label_col='Label', 
                                    property_cols=[],
                                    match_cols=[]):
    
    return neocon.push_dataframe(
        df=df,
        label_col=label_col,
        property_cols=property_cols,
        match_cols=match_cols
    )


db_config = {
    'uri': 'bolt://localhost:7689',
    'user': 'neo4j',
    'password': 'neo4jiscool',
    'database': 'neo4j'
}

neocon = Neo4jConnection(**db_config)
neocon.test_connection()

Connection successful!


True

In [2]:
d = {}

d['Mouse'] = {}
d['Mouse']['ImagingSessions'] = {}
d['Mouse']['ImagingSessions']['unique'] = fetch_imaging_sessions_by_mouse(contains_tbd=False)
d['Mouse']['ImagingSessions']['ambiguity'] = fetch_imaging_sessions_by_mouse(contains_tbd=True)

d['ImagingSession'] = {}
d['ImagingSession']['count'] = fetch_imaging_session_count_across_mice(session_count_min=1, tbd_filter=False, contains_tbd=True)
d['ImagingSession']['count_w_unique_mouse_uid'] = fetch_imaging_session_count_across_mice(session_count_min=1, tbd_filter=True, contains_tbd=False)
d['ImagingSession']['count_w_ambiguous_mouse_uid'] = fetch_imaging_session_count_across_mice(session_count_min=1, tbd_filter=True, contains_tbd=True)

count_total = d['ImagingSession']['count']
count_combined = d['ImagingSession']['count_w_unique_mouse_uid'] + d['ImagingSession']['count_w_ambiguous_mouse_uid']
if not count_total == count_combined:
    print(f"WARNING: Counts not matching!")
    print(f"            TOTAL: {count_total:5d}")
    print(f"         COMBINED: {count_total:5d}")
    
else:
    print(f"Success! (Counts match)")
    for _k, _v in d['ImagingSession'].items():
        print(f"{_k:>30s}: {_v:5d}")



Success! (Counts match)
                         count:  2765
      count_w_unique_mouse_uid:  1818
   count_w_ambiguous_mouse_uid:   947


In [3]:
mice = d['Mouse']['ImagingSessions']['unique']

_filt = mice.apply(lambda row: len(row['filepaths']) == row['session_count'], axis=1)
if len(mice[~_filt]):
    print("WARNING: Session counts do not match found filepaths.")

mice = mice[mice['session_count'] >= 3].reset_index(drop=True)

## An example pull, enrich, and then push

In [4]:
def fetch_imaging_session_by_mouse_and_date(session_count_min=1, contains_tbd=False, tbd_filter=True):
    _filter = ""
    if tbd_filter:
        _substr = ""
        if not contains_tbd:
            _substr = "NOT"
        _filter = f"        AND {_substr} ( m.uid CONTAINS 'TBD' or m.uid CONTAINS 'nan' )"
    return neocon.query_to_dataframe(f"""
        MATCH (m:Mouse)<-[i:IMAGED]-(ims:ImagingSession)
        WITH m, COUNT(DISTINCT ims) AS session_count
        WHERE session_count >= {session_count_min}
        {_filter}
        MATCH (m)<-[i:IMAGED]-(ims)  // Re-match after filtering
        RETURN m.uid AS uid, ims.Date AS date, ims.filepath AS filepath
    """)

sessions = fetch_imaging_session_by_mouse_and_date(session_count_min=3)
sessions['Label'] = "Analysis"
sessions['is'] = 'ml_model_12345_v2' ## change this by measurement
sessions['value'] = np.random.rand(len(sessions)) ## edit this for whatever you want to push back in
sessions['units'] = 'probability (0-1)' # edit this to describe the measurement clearlyy

push_dataframe_to_label_matches(sessions, 
                                label_col='Label',
                                property_cols=['uid', 'date', 'is', 'value', 'units'], 
                                match_cols=['uid', 'date', 'is'])

In [5]:
sessions['Link_Label'] = 'ImagingSession'

In [8]:
neocon.push_and_link_dataframe(sessions,
                               label_col='Label', property_cols=['uid', 'date', 'is', 'value', 'units'], match_cols=['uid', 'date', 'is'],
                               node_match_label='Link_Label', node_match_properties=['uid', 'date'], node_match_relationship_type='ANALYZED')

NOTE: This can be run repeatedly on the same match_cols and property calls, following the rules of Cypher