In [1]:
import numpy as np
import matplotlib.pyplot as plt

from zfa.core.default_dirs import INTER_ANIMAL_RESULTS_DIR ,INTER_ANIMAL_RESULTS_DIR_2

import pickle


In [2]:
INTER_ANIMAL_RESULTS_DIR_2

'/om/weka/yanglab/leokoz8/zfa/inter_animal_results_81723/'

In [4]:
import re

def parse_string(s):
    # Use regex to extract all key=value patterns
    matches = re.findall(r'([a-zA-Z0-9_]+)=([a-zA-Z0-9]+)', s)

    # Convert matches into a dictionary
    result = dict(matches)

    # Convert values that are digits into integers
    for key, value in result.items():
        if value.isdigit():
            result[key] = int(value)

    # Adjust keys to remove undesired underscores and adjust 'jobID' to 'job_ID'
    keys_to_adjust = ['_target_cell_type', '_jobID', '_source_animal', '_target_animal']
    adjustments = {
        '_target_cell_type': 'target_cell_type',
        '_jobID': 'job_ID',
        '_source_animal': 'source_animal',
        '_target_animal': 'target_animal'
    }
    for old_key, new_key in adjustments.items():
        if old_key in result:
            result[new_key] = result.pop(old_key)

    return result


def contains_substring(s,sub_string = "source_cell_type"):
    return sub_string in s


In [5]:
# Test
s = 'source_cell_type=neural_target_cell_type=neural_jobID=10_source_animal=1_target_animal=0_inter-animal-consistency.pickle'
split_filename = parse_string(s)
print(split_filename)

{'source_cell_type': 'neural', 'target_cell_type': 'neural', 'job_ID': 10, 'source_animal': 1, 'target_animal': 0}


In [6]:
import os

def extract_file_paths(directory):
    file_dicts = []

    # Loop through each file in the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Construct the full path of the file
            #full_path = os.path.join(root, file)
            if contains_substring(file,sub_string = "source_cell_type"):
                file_dict = parse_string(file)
                file_dict['filename'] = file
                file_dicts.append(file_dict)


    return file_dicts

# Test the function
directory_path = INTER_ANIMAL_RESULTS_DIR_2  # replace with your directory path
file_dicts = extract_file_paths(directory_path)
for dictionary in file_dicts:
    print(dictionary)


{'source_cell_type': 'neural', 'target_cell_type': 'neural', 'job_ID': 7, 'source_animal': 1, 'target_animal': 0, 'filename': 'source_cell_type=neural_target_cell_type=neural_jobID=7_source_animal=1_target_animal=0_inter-animal-consistency.pickle'}
{'source_cell_type': 'neural', 'target_cell_type': 'neural', 'job_ID': 20, 'source_animal': 0, 'target_animal': 1, 'filename': 'source_cell_type=neural_target_cell_type=neural_jobID=20_source_animal=0_target_animal=1_inter-animal-consistency.pickle'}
{'source_cell_type': 'neural', 'target_cell_type': 'glial', 'job_ID': 25, 'source_animal': 1, 'target_animal': 0, 'filename': 'source_cell_type=neural_target_cell_type=glial_jobID=25_source_animal=1_target_animal=0_inter-animal-consistency.pickle'}
{'source_cell_type': 'glial', 'target_cell_type': 'neural', 'job_ID': 22, 'source_animal': 0, 'target_animal': 1, 'filename': 'source_cell_type=glial_target_cell_type=neural_jobID=22_source_animal=0_target_animal=1_inter-animal-consistency.pickle'}
{'

In [8]:
def pickle_load(file_path):
    with open(file_path, 'rb') as handle:
        data = pickle.load(handle)
    return data

def recursive_items(dictionary):
    for key, value in dictionary.items():
        if type(value) is dict:
            yield (key, value)
            yield from recursive_items(value)
        else:
            yield (key, value)

def get_keys_for_data(data):
    keys_for_data = []
    for key, value in recursive_items(data):
        keys_for_data.append(key)
    return keys_for_data

from itertools import groupby

def group_dictionaries(dicts):
    # First, sort the dictionaries based on the key fields
    key_fields = ["source_cell_type", "target_cell_type", "source_animal", "target_animal"]
    sorted_dicts = sorted(dicts, key=lambda d: tuple(d[k] for k in key_fields))

    # Then, group the dictionaries by the key fields
    grouped_dicts = {}
    for key, group in groupby(sorted_dicts, key=lambda d: tuple(d[k] for k in key_fields)):
        grouped_dicts[key] = list(group)

    return grouped_dicts




In [9]:
from tqdm import tqdm

grouped_dicts = group_dictionaries(file_dicts)

condition_and_consistencies = []
key_list = list(grouped_dicts.keys())
for key in tqdm(key_list):        
    consistency = []
    for job_dict in grouped_dicts[key]:
        full_file_path = INTER_ANIMAL_RESULTS_DIR_2 + job_dict['filename']
        data = pickle_load(full_file_path)
        kfd = get_keys_for_data(data)
        median_consistency = np.nanmedian(data[kfd[0]][kfd[1]][kfd[2]]['test']['r_xy_n_sb'])
        consistency.append(median_consistency)
    condition_and_consistencies.append(np.nanmedian(np.asarray(consistency)))
    condition_and_consistencies.append(key)


100%|██████████| 10/10 [00:51<00:00,  5.13s/it]


In [10]:
condition_and_consistencies

[0.9990432883946876,
 ('glial', 'glial', 0, 1),
 1.0021058719604938,
 ('glial', 'glial', 1, 0),
 0.9848951403927206,
 ('glial', 'neural', 0, 1),
 0.9890895870124468,
 ('glial', 'neural', 1, 0),
 1.0110662276828077,
 ('neural', 'glial', 0, 0),
 1.0000538421544674,
 ('neural', 'glial', 0, 1),
 1.0042896208409315,
 ('neural', 'glial', 1, 0),
 1.000461866025634,
 ('neural', 'glial', 1, 1),
 0.9937596129687196,
 ('neural', 'neural', 0, 1),
 1.0004515515451449,
 ('neural', 'neural', 1, 0)]

In [19]:
INTER_ANIMAL_RESULTS_DIR_2

'/om/weka/yanglab/leokoz8/zfa/inter_animal_results_81723/'

In [18]:
condition_and_consistencies

[1.0042896208409315,
 ('neural', 'glial', 1, 0),
 0.9937586332735941,
 ('neural', 'neural', 0, 1)]

In [32]:
data = pickle_load(full_file_path)

In [8]:
condition_and_consistencies

[0.9989841770879964,
 ('glial', 'glial', 0, 1),
 0.9960665666480155,
 ('glial', 'glial', 1, 0),
 0.9888643563077528,
 ('glial', 'neural', 0, 1),
 0.9898118954874808,
 ('glial', 'neural', 1, 0),
 0.059522575458537466,
 ('glial', 'neural', 1, 1),
 0.9993677293525316,
 ('neural', 'glial', 0, 0),
 1.0000538421544674,
 ('neural', 'glial', 0, 1),
 0.9803588899601834,
 ('neural', 'glial', 1, 0),
 0.9939857433754704,
 ('neural', 'glial', 1, 1),
 0.9898003590319604,
 ('neural', 'neural', 0, 1),
 0.989439909055687,
 ('neural', 'neural', 1, 0)]

In [52]:
keys_for_data

['sub-20170228-3_ses-20170228T165730_ophys',
 (('map_type', 'percentile'),),
 '(90000, 95000)',
 'train',
 'test']

0.9930127579129043

In [87]:
full_file_path = INTER_ANIMAL_RESULTS_DIR + file_dicts[1]['filename']

In [86]:
file_dicts[1]

{'source_cell_type': 'neural',
 'target_cell_type': 'neural',
 'job_ID': 13,
 'source_animal': 0,
 'target_animal': 1,
 'filename': 'source_cell_type=neural_target_cell_type=neural_jobID=13_source_animal=0_target_animal=1_inter-animal-consistency.pickle'}

In [29]:
with open(INTER_ANIMAL_RESULTS_DIR + file_dicts[0]['filename'], 'rb') as handle:
    neural_to_glial = pickle.load(handle)

with open(INTER_ANIMAL_RESULTS_DIR + file_dicts[1]['filename'], 'rb') as handle:
    neural_to_neural = pickle.load(handle)



In [106]:
neural_to_glial_test_scores = neural_to_glial['sub-20170228-4_ses-20170228T185002_ophys'][(('map_type', 'percentile'),)]['(30000, 35000)']['test']['r_xy_n_sb']
neural_to_glial_test_scores_median = np.nanmedian(neural_to_glial_test_scores)


neural_to_neural_test_scores = neural_to_neural['sub-20170228-4_ses-20170228T185002_ophys'][(('map_type', 'percentile'),)]['(65000, 70000)']['test']['r_xy_n_sb']
neural_to_neural_test_scores_median = np.nanmedian(neural_to_neural_test_scores)


print(f'Neural to glial median test score is  {neural_to_glial_test_scores_median} and neural to neural median test score is {neural_to_neural_test_scores_median}')

Neural to glial median test score is  0.992942680128476 and neural to neural median test score is 0.9903531531805126


In [107]:
neural_to_glial['sub-20170228-4_ses-20170228T185002_ophys'][(('map_type', 'percentile'),)]['(30000, 35000)']['test'].keys()

dict_keys(['r_xy_n_sb', 'r_xx', 'r_xx_sb', 'r_yy', 'r_yy_sb', 'r_xy', 'denom_sb'])