## Compare the partioning of the cell-type latent factors

Using cell-type and model-type

In [None]:
!date

#### import libraries

In [None]:
from pandas import read_csv, DataFrame
from json import load as json_load

#### set notebook variables

In [None]:
# parameters
project = 'aging_phase2'

# directories
wrk_dir = '/labshare/raph/datasets/adrd_neuro/brain_aging/phase2'
results_dir = f'{wrk_dir}/results'
figures_dir = f'{wrk_dir}/figures'

# in files
assoc_file = f'{results_dir}/{project}.latent.age_glm.csv'

# out files
heatmap_figure = f'{figures_dir}/{project}.latent.partitioned_factors_compare.heatmap.png'

# constants and variables
DEBUG = True
ALPHA = 0.05

### load input data

#### load the latent factor age associations

In [None]:
age_glm_df = read_csv(assoc_file, index_col=0)
print(f'shape of age_glm_df is {age_glm_df.shape}')
age_glm_df['key_name'] = age_glm_df.cell_type + ':' + age_glm_df.feature
if DEBUG:
    display(age_glm_df.sample(4))
    print(f'age_glm_df has {age_glm_df.key_name.nunique()} keys')

#### subset the latent factor to only those with a statistically significant age association

In [None]:
age_glm_df = age_glm_df.loc[(age_glm_df.fdr_bh <= ALPHA)]
print(f'shape of age_glm_df is {age_glm_df.shape}')
if DEBUG:
    display(age_glm_df.sample(4))

In [None]:
if DEBUG:
    display(age_glm_df.groupby('cell_type').model_type.value_counts())

### load the partitioned latent factors and fill in an adjacency matrix

In [None]:
names = age_glm_df.cell_type.unique()
# names = age_glm_df.key_name.unique()
adjacency_df = DataFrame(0, index=names, columns=names)
print(f'shape of empty adjacency_df is {adjacency_df.shape}')
if DEBUG:
    display(adjacency_df.head())

In [None]:
len(cell_types)

In [None]:
from itertools import combinations
for model_type in age_glm_df.model_type.unique():
    print(f'### {model_type} ###')
    communities_file = f'{figures_dir}/{project}.latents.{model_type}.partitioned_factors.json'
    with open(communities_file, 'r') as in_file:
        partitioned_factors = json_load(in_file)
        for group, factors in partitioned_factors.items():
            print(group, factors)
            # cell_types = set([element.split(':')[0] for element in factors])
            cell_types = [element.split(':')[0] for element in factors]
            # cell_types = set(factors)
            if len(cell_types) > 1:
                pairs = list(combinations(cell_types, 2))
            else:
                # single element identity pair
                pairs = [(list(cell_types)[0], list(cell_types)[0])]
            print(pairs)
            # add the pairs count to the adjacency matrix
            for factor1, factor2 in pairs:
                adjacency_df.at[factor1, factor2] += 1

In [None]:
print(f'shape of empty adjacency_df is {adjacency_df.shape}')
if DEBUG:
    display(adjacency_df.head())

In [None]:
from seaborn import clustermap
from matplotlib import pyplot as plt
from matplotlib.pyplot import rc_context
clustermap(adjacency_df, cmap='Purples', linecolor='lightgrey', linewidths=0.01)
plt.show()