In [7]:
import os
import pandas as pd
from pathlib import Path
import numpy as np
import anndata
import time
import matplotlib.pyplot as plt
import json

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache

pd.set_option('display.max_columns', 500)

In [2]:
version = '20241115'
download_base = Path('../../../abc_download_root')
abc_cache = AbcProjectCache.from_s3_cache(download_base)
abc_cache.load_manifest(f'releases/{version}/manifest.json')

type.compare_manifests('releases/20230830/manifest.json', 'releases/20241115/manifest.json')
To load another version of the dataset, run
type.load_manifest('releases/20241115/manifest.json')


In [10]:
metadata_base = Path(
    "/allen/programs/celltypes/workgroups/rnaseqanalysis/lydian/mouse_aging_prep/staging/"
    "metadata/Zeng-Aging-Mouse-10Xv3/20241130"
)
taxonomy_base = Path(
    "/allen/programs/celltypes/workgroups/rnaseqanalysis/lydian/mouse_aging_prep/staging/"
    "metadata/Zeng-Aging-Mouse-WMB-taxonomy/20241130"
)

Read in cluster annotation term set dataframe

In [6]:
term_sets = abc_cache.get_metadata_dataframe(
    directory='WMB-taxonomy',
    file_name='cluster_annotation_term_set'
).set_index('label').drop('CCN20230722_CLUS')
cluster_level = pd.DataFrame(
    data={"name": "cluster",
          "description": "The finest level of cell type definition in the mouse whole brain taxonomy. Cells within a cluster share similar characteristics and belong to the same supertype.",
          "order": 4},
    index=["CS20241021_CLUS"]
)
term_sets = pd.concat([term_sets, cluster_level])
term_sets

Unnamed: 0,name,description,order
CCN20230722_NEUR,neurotransmitter,Clusters are assigned based on the average exp...,0
CCN20230722_CLAS,class,The top level of cell type definition in the m...,1
CCN20230722_SUBC,subclass,The coarse level of cell type definition in th...,2
CCN20230722_SUPT,supertype,The second finest level of cell type definitio...,3
CS20241021_CLUS,cluster,The finest level of cell type definition in th...,4


In [17]:
ls '{metadata_base.resolve()}'

[0m[01;32mcell_annotation_colors.csv[0m*    [01;32mdonor.csv[0m*
[01;32mcell_cluster_annotations.csv[0m*  [01;32mexample_genes_all_cells_expression.csv[0m*
[01;32mcell_metadata.csv[0m*             [01;32mlibrary.csv[0m*
[01;32mcluster.csv[0m*                   [01;32mvalue_sets.csv[0m*


In [14]:
ls '{taxonomy_base.resolve()}'

[0m[01;32maging_degenes.csv[0m*                     [01;32mcluster_mapping.csv[0m*
[01;32mcell_cluster_mapping_annotations.csv[0m*  [01;32mcluster_mapping_pivot.csv[0m*
[01;32mcell_cross_mapping_annotations.csv[0m*


In [75]:
cluster = pd.read_csv(metadata_base / 'cluster.csv')
cluster

Unnamed: 0,cluster_alias,number_of_cells,cluster_label,cluster_order,cluster_name,cluster_color,number_of_adult_cells,number_of_aged_cells,proportion_adult_cells,proportion_aged_cells,odds_ratio,log2_odds_ratio,cluster_age_bias,cluster_age_bias_color,neurotransmitter_combined_label,neurotransmitter_label,neurotransmitter_color
0,1,851,CS20241021_0001,0,1_CLA-EPd-CTX Car3 Glut_1,#f7c27a,183,668,0.215041,0.784959,1.453797,0.539826,unassigned,#DADEDF,Glut,Glut,#2B93DF
1,2,330,CS20241021_0002,1,2_IT EP-CLA Glut_1,#5c1a34,160,170,0.484848,0.515152,0.421595,-1.246071,unassigned,#DADEDF,Glut,Glut,#2B93DF
2,3,589,CS20241021_0003,2,3_IT EP-CLA Glut_1,#b96cf0,240,349,0.407470,0.592530,0.576896,-0.793616,unassigned,#DADEDF,Glut,Glut,#2B93DF
3,4,207,CS20241021_0004,3,4_IT EP-CLA Glut_1,#bff691,107,100,0.516908,0.483092,0.371035,-1.430373,unassigned,#DADEDF,Glut,Glut,#2B93DF
4,5,2141,CS20241021_0005,4,5_IT EP-CLA Glut_1,#39fbfa,655,1486,0.305932,0.694068,0.900735,-0.150825,unassigned,#DADEDF,Glut,Glut,#2B93DF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
842,843,2640,CS20241021_0843,842,843_Microglia NN_1,#a88b30,71,2569,0.026894,0.973106,20.304439,4.343723,aging-enriched,#CD6828,No-NT,No-NT,#666666
843,844,771,CS20241021_0844,843,844_Microglia NN_1,#df92c3,284,487,0.368353,0.631647,0.911851,-0.133131,unassigned,#DADEDF,No-NT,No-NT,#666666
844,845,3030,CS20241021_0845,844,845_BAM NN_1,#a1adea,1462,1568,0.482508,0.517492,0.556218,-0.846279,unassigned,#DADEDF,No-NT,No-NT,#666666
845,846,248,CS20241021_0846,845,846_DC NN_1,#9a5824,98,150,0.395161,0.604839,0.814133,-0.296663,unassigned,#DADEDF,No-NT,No-NT,#666666


In [23]:
mapping = pd.read_csv(taxonomy_base / 'cluster_mapping.csv')
mapping

Unnamed: 0,cluster_alias,cluster_label,cluster_order,cluster_name,cluster_color,cluster_annotation_term_label,cluster_annotation_term_name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_set_order,term_order,cluster_annotation_term_set_name,color_hex_triplet
0,1,CS20241021_0001,0,1_CLA-EPd-CTX Car3 Glut_1,#f7c27a,CS20230722_SUPT_0001,0001 CLA-EPd-CTX Car3 Glut_1,CCN20230722_SUPT,CS20230722_SUBC_001,CCN20230722_SUBC,3,0,supertype,#99822E
1,2,CS20241021_0002,1,2_IT EP-CLA Glut_1,#5c1a34,CS20230722_SUPT_0003,0003 IT EP-CLA Glut_1,CCN20230722_SUPT,CS20230722_SUBC_002,CCN20230722_SUBC,3,2,supertype,#994563
2,3,CS20241021_0003,2,3_IT EP-CLA Glut_1,#b96cf0,CS20230722_SUPT_0003,0003 IT EP-CLA Glut_1,CCN20230722_SUPT,CS20230722_SUBC_002,CCN20230722_SUBC,3,2,supertype,#994563
3,4,CS20241021_0004,3,4_IT EP-CLA Glut_1,#bff691,CS20230722_SUPT_0003,0003 IT EP-CLA Glut_1,CCN20230722_SUPT,CS20230722_SUBC_002,CCN20230722_SUBC,3,2,supertype,#994563
4,5,CS20241021_0005,4,5_IT EP-CLA Glut_1,#39fbfa,CS20230722_SUPT_0003,0003 IT EP-CLA Glut_1,CCN20230722_SUPT,CS20230722_SUBC_002,CCN20230722_SUBC,3,2,supertype,#994563
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2536,843,CS20241021_0843,842,843_Microglia NN_1,#a88b30,CS20230722_CLAS_34,34 Immune,CCN20230722_CLAS,,,1,33,class,#825f45
2537,844,CS20241021_0844,843,844_Microglia NN_1,#df92c3,CS20230722_CLAS_34,34 Immune,CCN20230722_CLAS,,,1,33,class,#825f45
2538,845,CS20241021_0845,844,845_BAM NN_1,#a1adea,CS20230722_CLAS_34,34 Immune,CCN20230722_CLAS,,,1,33,class,#825f45
2539,846,CS20241021_0846,845,846_DC NN_1,#9a5824,CS20230722_CLAS_34,34 Immune,CCN20230722_CLAS,,,1,33,class,#825f45


In [84]:
mapping[mapping['cluster_annotation_term_set_name'] == 'class']

Unnamed: 0,cluster_alias,cluster_label,cluster_order,cluster_name,cluster_color,cluster_annotation_term_label,cluster_annotation_term_name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_set_order,term_order,cluster_annotation_term_set_name,color_hex_triplet
1694,1,CS20241021_0001,0,1_CLA-EPd-CTX Car3 Glut_1,#f7c27a,CS20230722_CLAS_01,01 IT-ET Glut,CCN20230722_CLAS,,,1,0,class,#FA0087
1695,2,CS20241021_0002,1,2_IT EP-CLA Glut_1,#5c1a34,CS20230722_CLAS_01,01 IT-ET Glut,CCN20230722_CLAS,,,1,0,class,#FA0087
1696,3,CS20241021_0003,2,3_IT EP-CLA Glut_1,#b96cf0,CS20230722_CLAS_01,01 IT-ET Glut,CCN20230722_CLAS,,,1,0,class,#FA0087
1697,4,CS20241021_0004,3,4_IT EP-CLA Glut_1,#bff691,CS20230722_CLAS_01,01 IT-ET Glut,CCN20230722_CLAS,,,1,0,class,#FA0087
1698,5,CS20241021_0005,4,5_IT EP-CLA Glut_1,#39fbfa,CS20230722_CLAS_01,01 IT-ET Glut,CCN20230722_CLAS,,,1,0,class,#FA0087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2536,843,CS20241021_0843,842,843_Microglia NN_1,#a88b30,CS20230722_CLAS_34,34 Immune,CCN20230722_CLAS,,,1,33,class,#825f45
2537,844,CS20241021_0844,843,844_Microglia NN_1,#df92c3,CS20230722_CLAS_34,34 Immune,CCN20230722_CLAS,,,1,33,class,#825f45
2538,845,CS20241021_0845,844,845_BAM NN_1,#a1adea,CS20230722_CLAS_34,34 Immune,CCN20230722_CLAS,,,1,33,class,#825f45
2539,846,CS20241021_0846,845,846_DC NN_1,#9a5824,CS20230722_CLAS_34,34 Immune,CCN20230722_CLAS,,,1,33,class,#825f45


In [76]:
wmb_term = abc_cache.get_metadata_dataframe(directory='WMB-taxonomy', file_name='cluster_annotation_term')

other_term = pd.DataFrame(columns=term.columns)
other_term.loc[0, 'name'] = 'No-NT'
other_term.loc[0, 'cluster_annotation_term_set_name'] = 'neurotransmitter'
other_term.loc[0, 'color_hex_triplet'] = '#666666'
other_term.loc[0, 'term_set_order'] = 0
other_term.loc[0, 'term_order'] = 9
wmb_term = pd.concat([wmb_term, other_term], ignore_index=True)

wmb_term.sort_values(['term_set_order', 'term_order'], inplace=True)
wmb_term.head()

Unnamed: 0,label,name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_set_order,term_order,cluster_annotation_term_set_name,color_hex_triplet,first_child_label,first_child_term_set_name
0,CS20230722_NEUR_Glut,Glut,CCN20230722_NEUR,,,0,0,neurotransmitter,#2B93DF,,
1,CS20230722_NEUR_NA,,CCN20230722_NEUR,,,0,1,neurotransmitter,#666666,,
2,CS20230722_NEUR_GABA,GABA,CCN20230722_NEUR,,,0,2,neurotransmitter,#FF3358,,
3,CS20230722_NEUR_Dopa,Dopa,CCN20230722_NEUR,,,0,3,neurotransmitter,#fcf04b,,
4,CS20230722_NEUR_Glut-GABA,Glut-GABA,CCN20230722_NEUR,,,0,4,neurotransmitter,#0a9964,,


In [77]:
combined_term = []
for term_set_name in ['neurotransmitter', 'class', 'subclass', 'supertype']:
    sub_term = wmb_term[wmb_term['cluster_annotation_term_set_name'] == term_set_name]
    if term_set_name == 'neurotransmitter':
        combined_term.append(sub_term)
    else:
        combined_term.append(
            sub_term[sub_term['label'].isin(mapping['cluster_annotation_term_label']).to_numpy()]
        )
term = pd.concat(combined_term).reset_index(drop=True)
term.head()

Unnamed: 0,label,name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_set_order,term_order,cluster_annotation_term_set_name,color_hex_triplet,first_child_label,first_child_term_set_name
0,CS20230722_NEUR_Glut,Glut,CCN20230722_NEUR,,,0,0,neurotransmitter,#2B93DF,,
1,CS20230722_NEUR_NA,,CCN20230722_NEUR,,,0,1,neurotransmitter,#666666,,
2,CS20230722_NEUR_GABA,GABA,CCN20230722_NEUR,,,0,2,neurotransmitter,#FF3358,,
3,CS20230722_NEUR_Dopa,Dopa,CCN20230722_NEUR,,,0,3,neurotransmitter,#fcf04b,,
4,CS20230722_NEUR_Glut-GABA,Glut-GABA,CCN20230722_NEUR,,,0,4,neurotransmitter,#0a9964,,


In [78]:
cluster_term_set = pd.DataFrame()
supertype_mapping = mapping[mapping['cluster_annotation_term_set_name'] == 'supertype']
cluster_term_set['label'] = supertype_mapping['cluster_label']
cluster_term_set['name'] = supertype_mapping['cluster_name']
cluster_term_set['cluster_annotation_term_set_label'] = 'CS20241021_CLUS'
cluster_term_set['parent_term_label'] = supertype_mapping['cluster_annotation_term_label']
cluster_term_set['parent_term_set_label'] = supertype_mapping['cluster_annotation_term_set_label']
cluster_term_set['term_set_order'] = 4
cluster_term_set['term_order'] = supertype_mapping['cluster_order']
cluster_term_set['cluster_annotation_term_set_name'] = 'cluster'
cluster_term_set['color_hex_triplet'] = supertype_mapping['cluster_color']
cluster_term_set.head()

Unnamed: 0,label,name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_set_order,term_order,cluster_annotation_term_set_name,color_hex_triplet
0,CS20241021_0001,1_CLA-EPd-CTX Car3 Glut_1,CS20241021_CLUS,CS20230722_SUPT_0001,CCN20230722_SUPT,4,0,cluster,#f7c27a
1,CS20241021_0002,2_IT EP-CLA Glut_1,CS20241021_CLUS,CS20230722_SUPT_0003,CCN20230722_SUPT,4,1,cluster,#5c1a34
2,CS20241021_0003,3_IT EP-CLA Glut_1,CS20241021_CLUS,CS20230722_SUPT_0003,CCN20230722_SUPT,4,2,cluster,#b96cf0
3,CS20241021_0004,4_IT EP-CLA Glut_1,CS20241021_CLUS,CS20230722_SUPT_0003,CCN20230722_SUPT,4,3,cluster,#bff691
4,CS20241021_0005,5_IT EP-CLA Glut_1,CS20241021_CLUS,CS20230722_SUPT_0003,CCN20230722_SUPT,4,4,cluster,#39fbfa


In [79]:
term = pd.concat([term, cluster_term_set]).reset_index(drop=True).sort_values(['term_set_order', 'term_order'])
term

Unnamed: 0,label,name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_set_order,term_order,cluster_annotation_term_set_name,color_hex_triplet,first_child_label,first_child_term_set_name
0,CS20230722_NEUR_Glut,Glut,CCN20230722_NEUR,,,0,0,neurotransmitter,#2B93DF,,
1,CS20230722_NEUR_NA,,CCN20230722_NEUR,,,0,1,neurotransmitter,#666666,,
2,CS20230722_NEUR_GABA,GABA,CCN20230722_NEUR,,,0,2,neurotransmitter,#FF3358,,
3,CS20230722_NEUR_Dopa,Dopa,CCN20230722_NEUR,,,0,3,neurotransmitter,#fcf04b,,
4,CS20230722_NEUR_Glut-GABA,Glut-GABA,CCN20230722_NEUR,,,0,4,neurotransmitter,#0a9964,,
...,...,...,...,...,...,...,...,...,...,...,...
1482,CS20241021_0843,843_Microglia NN_1,CS20241021_CLUS,CS20230722_SUPT_1194,CCN20230722_SUPT,4,842,cluster,#a88b30,,
1483,CS20241021_0844,844_Microglia NN_1,CS20241021_CLUS,CS20230722_SUPT_1194,CCN20230722_SUPT,4,843,cluster,#df92c3,,
1484,CS20241021_0845,845_BAM NN_1,CS20241021_CLUS,CS20230722_SUPT_1195,CCN20230722_SUPT,4,844,cluster,#a1adea,,
1485,CS20241021_0846,846_DC NN_1,CS20241021_CLUS,CS20230722_SUPT_1197,CCN20230722_SUPT,4,845,cluster,#9a5824,,


Read in cluster annotation term dataframe. Spike in a row to handle unassigned neurotransmitter clusters.

Find and store first child for each term

In [80]:
filtered = term[pd.notna(term['parent_term_label'])]
first_child = filtered.groupby('parent_term_label')[['label','name','term_order','cluster_annotation_term_set_name']].first()
first_child

Unnamed: 0_level_0,label,name,term_order,cluster_annotation_term_set_name
parent_term_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CS20230722_CLAS_01,CS20230722_SUBC_001,001 CLA-EPd-CTX Car3 Glut,0,subclass
CS20230722_CLAS_02,CS20230722_SUBC_028,028 L6b/CT ENT Glut,27,subclass
CS20230722_CLAS_03,CS20230722_SUBC_036,036 HPF CR Glut,35,subclass
CS20230722_CLAS_04,CS20230722_SUBC_037,037 DG Glut,36,subclass
CS20230722_CLAS_05,CS20230722_SUBC_045,045 OB-STR-CTX Inh IMN,44,subclass
...,...,...,...,...
CS20230722_SUPT_1193,CS20241021_0833,833_Endo NN_1,832,cluster
CS20230722_SUPT_1194,CS20241021_0840,840_Microglia NN_1,839,cluster
CS20230722_SUPT_1195,CS20241021_0845,845_BAM NN_1,844,cluster
CS20230722_SUPT_1197,CS20241021_0846,846_DC NN_1,845,cluster


In [81]:
term.set_index('label',inplace=True)
term.loc[first_child.index,'first_child_label'] = first_child['label']
term.loc[first_child.index,'first_child_term_set_name'] = first_child['cluster_annotation_term_set_name']
term.reset_index(inplace=True)

In [82]:
term[pd.notna(term['first_child_label'])].head(5)

Unnamed: 0,label,name,cluster_annotation_term_set_label,parent_term_label,parent_term_set_label,term_set_order,term_order,cluster_annotation_term_set_name,color_hex_triplet,first_child_label,first_child_term_set_name
11,CS20230722_CLAS_01,01 IT-ET Glut,CCN20230722_CLAS,,,1,0,class,#FA0087,CS20230722_SUBC_001,subclass
12,CS20230722_CLAS_02,02 NP-CT-L6b Glut,CCN20230722_CLAS,,,1,1,class,#61e2a4,CS20230722_SUBC_028,subclass
13,CS20230722_CLAS_03,03 OB-CR Glut,CCN20230722_CLAS,,,1,2,class,#D00000,CS20230722_SUBC_036,subclass
14,CS20230722_CLAS_04,04 DG-IMN Glut,CCN20230722_CLAS,,,1,3,class,#16f2f2,CS20230722_SUBC_037,subclass
15,CS20230722_CLAS_05,05 OB-IMN GABA,CCN20230722_CLAS,,,1,4,class,#1b4332,CS20230722_SUBC_045,subclass


In [91]:
mapping[mapping['cluster_annotation_term_set_name'] == 'class']['cluster_annotation_term_name'].to_numpy()

array(['01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut', '01 IT-ET Glut',
       '01 IT-ET Glut', '

In [92]:
cluster_to_cluster_annotation = pd.DataFrame()
cluster_to_cluster_annotation['cluster_alias'] = cluster['cluster_alias']
cluster_to_cluster_annotation['neurotransmitter'] = cluster['neurotransmitter_label']
for term_name in ['class', 'subclass', 'supertype']:
    cluster_to_cluster_annotation[term_name] = mapping[
        mapping['cluster_annotation_term_set_name'] == term_name
    ]['cluster_annotation_term_name'].to_numpy()
cluster_to_cluster_annotation['cluster'] = cluster['cluster_name']
cluster_to_cluster_annotation = cluster_to_cluster_annotation.sort_values(['class', 'subclass', 'supertype', 'cluster'])
cluster_to_cluster_annotation

Unnamed: 0,cluster_alias,neurotransmitter,class,subclass,supertype,cluster
0,1,Glut,01 IT-ET Glut,001 CLA-EPd-CTX Car3 Glut,0001 CLA-EPd-CTX Car3 Glut_1,1_CLA-EPd-CTX Car3 Glut_1
1,2,Glut,01 IT-ET Glut,002 IT EP-CLA Glut,0003 IT EP-CLA Glut_1,2_IT EP-CLA Glut_1
2,3,Glut,01 IT-ET Glut,002 IT EP-CLA Glut,0003 IT EP-CLA Glut_1,3_IT EP-CLA Glut_1
3,4,Glut,01 IT-ET Glut,002 IT EP-CLA Glut,0003 IT EP-CLA Glut_1,4_IT EP-CLA Glut_1
4,5,Glut,01 IT-ET Glut,002 IT EP-CLA Glut,0003 IT EP-CLA Glut_1,5_IT EP-CLA Glut_1
...,...,...,...,...,...,...
842,843,No-NT,34 Immune,334 Microglia NN,1194 Microglia NN_1,843_Microglia NN_1
843,844,No-NT,34 Immune,334 Microglia NN,1194 Microglia NN_1,844_Microglia NN_1
844,845,No-NT,34 Immune,335 BAM NN,1195 BAM NN_1,845_BAM NN_1
845,846,No-NT,34 Immune,337 DC NN,1197 DC NN_1,846_DC NN_1


Read in the cluster annotation term name pivot table and sort it

Create a lookup by name dataframes for each term set

In [93]:
lookup = {}
for tag in term_sets['name']:
    #print(tag)
    pred = (term['cluster_annotation_term_set_name'] == tag)
    filtered = term[pred].copy()
    filtered.set_index('name', inplace=True)
    lookup[tag] = filtered

Helper functions to lookup an term attribut and format a cell in the html table

In [94]:
def get_value(c, n, v) :
    return lookup[c].loc[n][v]

def format_cell (df,c,add_id=False,add_plus=False,add_minus=False) :

    divs = pd.DataFrame(index=df.index)
    
    pattern = '<div class="circle" style="background-color:%s"></div>'
    divs['circle'] = [pattern % get_value(c,x,'color_hex_triplet') for x in df[c]]
    
    pattern = '<div class="celltext">%s</div>'
    divs['name'] = [pattern % x for x in df[c]]
   
    divs['id'] = ''
    if add_id :
        pattern = '<div id="%s"></div>'
        divs['id'] = [pattern % get_value(c,x,'label') for x in df[c]]
        
    divs['plus'] = ''
    if add_plus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[+]</a></div>'
        divs['plus'] = [pattern % (get_value(c,x,'first_child_term_set_name'),
                                   get_value(c,x,'first_child_label')) for x in df[c]]
        
    divs['minus'] = ''
    if add_minus :
        pattern = '<div class="celltext"><a href="%s.html#%s">[-]</a></div>'
        divs['minus'] = [pattern % (get_value(c,x,'cluster_annotation_term_set_name'),
                                    get_value(c,x,'label')) for x in df[c]]
    
    cols = ['id','circle','name','plus','minus']
    output = divs[cols].apply(lambda row: ''.join(row.values.astype(str)), axis=1)
    return output


Helper function to create html document

In [95]:
def create_html(df, ts, file, title):
    
    # apply formatter to each term set
    df_formatted = df.copy()
    
    for tag in term_sets['name'] :
        if tag in df_formatted.columns :
            
            add_id = False
            if tag == ts :
                add_id = True
                
            add_plus = False
            if tag == ts and tag not in ('cluster', 'neurotransmitter', 'division') :
                add_plus = True
                
            add_minus = False
            if tag != ts and tag not in ('neurotransmitter') :
                add_minus = True
                
            df_formatted[tag] = format_cell(df,tag,add_id,add_plus,add_minus)
            
            
    output = df_formatted.to_html(index=False, na_rep='',
                        render_links=True,escape=False,
                        classes="mystyle")

    html_string = '''
    <html>
    <head><title>%s</title></head>
    <link rel="stylesheet" type="text/css" href="../../simple_style.css"/>
    <body>
    {table}
    </body>
    </html>.
    ''' % title

    # OUTPUT AN HTML FILE
    with open(file, 'w') as f:
        f.write(html_string.format(table=output))

In [96]:
# Write the data to the _static directory of the abc_atlas_access so that links work properly in the jupyter-book/sphinx page.
output_directory = os.path.join('../../_static', 'Zeng-Aging-Mouse-taxonomy', version)
os.makedirs(output_directory, exist_ok=True)

In [100]:
df_supertype = cluster_to_cluster_annotation[['class']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'class.html')
title = 'Zeng-Aging-Mouse-taxonmy: cell type classes'
create_html(df_supertype, 'class',file, title)
print(len(df_supertype))

25


In [101]:
df_supertype = cluster_to_cluster_annotation[['class','subclass']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'subclass.html')
title = 'Zeng-Aging-Mouse-taxonmy: cell type subclasses'
create_html(df_supertype, 'subclass',file, title)
print(len(df_supertype))

172


In [102]:
df_supertype = cluster_to_cluster_annotation[['class','subclass','supertype']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'supertype.html')
title = 'Zeng-Aging-Mouse-taxonmy: cell type supertypes'
create_html(df_supertype, 'supertype',file, title)
print(len(df_supertype))

432


In [104]:
df_supertype = cluster_to_cluster_annotation[['class','subclass','supertype','cluster','neurotransmitter']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'cluster.html')
title = 'Zeng-Aging-Mouse-taxonmy: cell type clusters'
create_html(df_supertype,'cluster',file, title)
print(len(df_supertype))

847


In [18]:
if version == '20230630' :
    df_supertype = df_sorted[['division']].copy()
    df_supertype.drop_duplicates(inplace=True)

    file = os.path.join(output_directory,'division.html')
    title = 'WMB-taxonmy: cell type division'
    create_html(df_supertype, 'division', file, title)
    print(len(df_supertype))

In [105]:
df_supertype = cluster_to_cluster_annotation[['neurotransmitter']].copy()
df_supertype.drop_duplicates(inplace=True)

file = os.path.join(output_directory,'neurotransmitter.html')
title = 'Zeng-Aging-Mouse-taxonmy: neurotransmitter types'
create_html(df_supertype, 'neurotransmitter', file, title)
print(len(df_supertype))

9
