## Notebook to generate chord plots for age regression results using Holoviews

In [None]:
!date

#### import libraries

In [None]:
from pandas import read_csv, DataFrame
from itertools import combinations
import holoviews as hv
from holoviews import opts, dim

hv.extension('bokeh')
hv.output(size=200)

#### set notebook variables

In [None]:
# parameters
modality = 'GEX' # 'GEX' or 'ATAC'
category = 'curated_type' # 'curated_type' for broad and 'cluster_name' for specific
REGRESSION_TYPE = 'glm_tweedie' # 'glm', 'glm_tweedie', or 'rlm'

In [None]:
# parameters
project = 'aging_phase2'
if category == 'curated_type':
    prefix_type = 'broad'
elif category == 'cluster_name':
    prefix_type = 'specific' 

# directories
wrk_dir = '/labshare/raph/datasets/adrd_neuro/brain_aging/phase2'
quants_dir = f'{wrk_dir}/quants'
results_dir = f'{wrk_dir}/results'
figures_dir = f'{wrk_dir}/figures'

# in files
results_file = f'{results_dir}/{project}.{modality}.{prefix_type}.{REGRESSION_TYPE}_fdr_filtered.age.csv'

# out files

# constants
DEBUG = True

### load the results to visualize

In [None]:
results_df = read_csv(results_file)
print(f'results shape {results_df.shape}')
if DEBUG:
    display(results_df.sample(5))

### create the nodes; here the unique cell-types present in the tissue column

In [None]:
# nodes_df = results_df.tissue.to_frame()
nodes_df = results_df.tissue.to_frame().drop_duplicates(keep='first').reset_index()
nodes_dict = nodes_df.set_index('tissue').to_dict()['index']
nodes = hv.Dataset(nodes_df, 'index')
if DEBUG:
    display(nodes.data)
    print(nodes_df)
    display(nodes_dict)

### create the links, number of shared features between cell-types

In [None]:
links_list = []
# find the possible pairs of celltypes
unique_pairs = list(combinations(nodes_dict.keys(), 2))
for source, target in unique_pairs:
    source_index = nodes_dict.get(source)
    target_index = nodes_dict.get(target)
    # compute shared feature count from results
    source_features = results_df.loc[results_df.tissue == source].feature
    target_features = results_df.loc[results_df.tissue == target].feature
    shared_cnt = len(set(source_features) & set(target_features))
    links_list.append([source_index, target_index, shared_cnt])
links_df = DataFrame(data=links_list, columns=['source', 'target', 'value'])
print(f'links shape is {links_df.shape}')
if DEBUG:
    display(links_df)

In [None]:
chord = hv.Chord((links_df, nodes)).select(value=(5, None))
chord.opts(
    opts.Chord(cmap='Set1', edge_cmap='Set1', edge_color=dim('source').str(), 
               labels='tissue', node_color=dim('index').str(), title=f'{modality}: {category}'))

In [None]:
!date