## Create a network of the latent factor analysis

In [None]:
!date

#### import libraries

In [None]:
from pandas import read_csv
from pickle import load as pkl_load
from networkx import Graph, draw, spring_layout, write_gml, write_graphml
import matplotlib.pyplot as plt
from IPython.display import Image
from re import match

#### set notebook variables

In [None]:
# parameters
project = 'aging_phase2'
model_type = 'nmf'

# directories
wrk_dir = '/labshare/raph/datasets/adrd_neuro/brain_aging/phase2'
results_dir = f'{wrk_dir}/results'
figures_dir = f'{wrk_dir}/figures'

# in files
assoc_file = f'{results_dir}/{project}.latent.age_glm.csv'
loadings_file = f'{results_dir}/{project}.latent.loadings.csv'
loadings_pickle = f'{results_dir}/{project}.latent.loadings.pkl'
metrics_file = f'{results_dir}/{project}.latent.metrics.csv'

# out files
graphml_file = f'{figures_dir}/{project}.latents.{model_type}.graphml'
gml_file = f'{figures_dir}/{project}.latents.{model_type}.gml'
image_file = f'{figures_dir}/{project}.latents.{model_type}.png'

# constants and variables
DEBUG = True
ALPHA = 0.05

### load input data

#### load the latent factor age associations

In [None]:
age_glm_df = read_csv(assoc_file, index_col=0)
print(f'shape of age_glm_df is {age_glm_df.shape}')
age_glm_df['key_name'] = age_glm_df.cell_type + ':' + age_glm_df.feature
if DEBUG:
    display(age_glm_df.sample(4))
    print(f'age_glm_df has {age_glm_df.key_name.nunique()} keys')

#### load the latent factor's feature loadings

In [None]:
with open(loadings_pickle, 'rb') as pkl_file:
    feature_loadings = pkl_load(pkl_file)
print(f'loadings_pickle has {len(feature_loadings)} entries')

### subset the latent factor to only those with a statistically significant age association

In [None]:
age_glm_df = age_glm_df.loc[(age_glm_df.fdr_bh <= ALPHA) & (age_glm_df.model_type == model_type)]
print(f'shape of age_glm_df is {age_glm_df.shape}')
if DEBUG:
    display(age_glm_df.sample(4))

### convert the latent factor age associations into a weighted graph

In [None]:
age_graph = Graph()

# add the nodes
# age_graph.add_node('Age')
for factor in age_glm_df.key_name.unique():
    age_graph.add_node(factor)

# add edges, age effect
for row in age_glm_df.itertuples():
    age_graph.add_edge('Age', row.key_name, weight=abs(row.z))

print(age_graph.number_of_nodes())
print(age_graph.number_of_edges())

### add the latent factor feature loading as weighted graph

In [None]:
# regex pattern for match ATAC peaks naming format
pattern = r'^chr.*:.*-.*$'
for factor in age_glm_df.key_name.unique():
    loading = feature_loadings.get(factor)
    for feature, weight in loading.items():
        # only add genes not ATAC peaks
        if not match(pattern, feature):
            age_graph.add_node(feature)
            age_graph.add_edge(factor, feature, weight=abs(weight))

print(age_graph.number_of_nodes())
print(age_graph.number_of_edges())

### save the graph

In [None]:
write_gml(age_graph, gml_file)
write_graphml(age_graph, graphml_file)

### draw the graph

In [None]:
%%time
# pos = spring_layout(age_graph)
from networkx import fruchterman_reingold_layout, kamada_kawai_layout, shell_layout, random_layout
pos = kamada_kawai_layout(age_graph)
# draw(age_graph, pos, node_color='purple', edge_color='gray', with_labels=True)
# plt.savefig(image_file, format='PNG', bbox_inches='tight')
# # plt.show()

In [None]:
%%time
draw(age_graph, pos, node_color='purple', edge_color='gray', with_labels=False)

In [None]:
%%time
plt.savefig(image_file, format='PNG', bbox_inches='tight')

In [None]:
Image(filename=image_file)