In [1]:
import pandas as pd
import plotly.express as px
import msgpack


def read_msgpack(path):
    with open(path, 'rb') as f:
        data = msgpack.unpackb(f.read())
    return data


In [2]:
data = pd.read_hdf('Variables.h5', key='data')
with pd.HDFStore('Coords.h5') as hdf:
    coord_dict = {k.lstrip('/'): hdf[k] for k in hdf.keys()}
palette = read_msgpack('Palette.msg')

In [3]:
data

Unnamed: 0,RegionName,MajorRegion,SubRegion,Region,Slice,CellClass,MajorType,SubType,Replicate,Pos96,...,CCC_Rate,CG_Rate,CG_RateAdj,CH_Rate,CH_RateAdj,FinalReads,InputReads,MappedReads,BamFilteringRate,MappingRate
0,SSp-2,Isocortex,SSp,5B,5,Inh,MGE-Sst,MGE-Sst Dock4,5B-180514,E3,...,0.009010,0.838600,0.837130,0.043650,0.034950,1353411.0,3228396.0,2150515.0,0.629343,0.666125
1,MOs-2,Isocortex,MOs,2B,2,Exc,PT-L5,PT-L5 Kcnh1,2B-180306,C10,...,0.006890,0.779250,0.777720,0.033480,0.026770,1477444.0,3264756.0,2162894.0,0.683087,0.662498
2,PIR-1,OLF,PIR,2D,2,Exc,OLF-Exc,OLF-Exc Bmpr1b,2D-180403,E3,...,0.005600,0.781040,0.779810,0.028120,0.022650,1290745.0,2875510.0,1909788.0,0.675858,0.664156
3,DG-2,HPF,DG,9J,9,Exc,DG,DG dg-all,9J-190212,G6,...,0.003905,0.703140,0.701976,0.008148,0.004260,1519997.0,3775868.0,2571553.0,0.591081,0.681050
4,PAL-1,CNU,PAL,4H,4,Inh,PAL-Inh,PAL-Inh Ptprd,4H-180911,C9,...,0.005510,0.835390,0.834480,0.025420,0.020020,2149117.0,6032772.0,4047727.0,0.530944,0.670956
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103977,DG-1,HPF,DG,8J,8,Exc,DG,DG dg-all,8J-190716,C7,...,0.004336,0.736098,0.734949,0.010825,0.006518,1275344.0,3081404.0,2096008.0,0.608463,0.680212
103978,LSX-1,CNU,LSX,4G,4,Exc,CT-L6,CT-L6 Megf9,4G-181204,C7,...,0.006641,0.833156,0.832041,0.029949,0.023464,1092657.0,3154332.0,2102205.0,0.519767,0.666450
103979,MOp-3,Isocortex,MOp,4B,4,Inh,CGE-Vip,CGE-Vip Galnt17,4B-171213,G3,...,0.008150,0.818810,0.817320,0.030900,0.022940,2427766.0,6231672.0,3847441.0,0.631008,0.617401
103980,CA-3,HPF,CA1-3,10E,10,Exc,CA3,CA3 Cadm2,10E-190625,G11,...,0.006423,0.751613,0.750008,0.027002,0.020712,1579198.0,3970912.0,2635635.0,0.599172,0.663735


In [4]:
hue_name = 'Region'

hue_palette = palette[hue_name]
plot_df = coord_dict['L1UMAP']
plot_df[hue_name] = data[hue_name]

In [5]:
def categorical_scatter(coord_name, hue_name):
    hue_palette = palette[hue_name]
    plot_df = coord_dict[coord_name]
    plot_df[hue_name] = data[hue_name]
    
    fig = px.scatter(plot_df,
                 x="x",
                 y="y",
                 color=hue_name,
                 color_discrete_map=hue_palette)
    fig.update_layout(showlegend=False,
                  legend={'itemsizing': 'constant'},
                  xaxis=go.layout.XAxis(title='', showticklabels=False),
                  yaxis=go.layout.YAxis(title='', showticklabels=False),
                  plot_bgcolor='rgba(0,0,0,0)',
                  paper_bgcolor='rgba(0,0,0,0)')

    fig.update_traces(mode='markers', marker_size=0.01)
    return figure

In [21]:
levels = ['MajorRegion', 'SubRegion', 'Region']


if 'SubType' in data.columns:
    data = data[data['SubType'].apply(lambda i: 'Outlier' not in i)]

count_df = data.groupby(levels).apply(lambda i: i.shape[0]).reset_index()
count_df.columns= levels + ['Cell Number']

total_palette = {}
for level in levels:
    total_palette.update(palette[level])

labels = []
parents = []
values = []
colors = []
for level, parent_level in zip(levels[::-1], levels[1::-1] + [None]):
    this_level_sum = count_df.groupby(level)['Cell Number'].sum().to_dict()
    this_level_sum = {k: v for k, v in this_level_sum.items() if v!=0}
    if parent_level is not None:
        this_parent_dict = count_df.set_index(level)[parent_level].to_dict()
    else:
        this_parent_dict = {label: '' for label in count_df[level].unique()}
    for label in this_level_sum.keys():
        labels.append(label)
        parents.append(this_parent_dict[label])
        values.append(this_level_sum[label])
        try:
            colors.append(total_palette[label])
        except KeyError:
            colors.append('#D3D3D3')
            
import plotly.graph_objects as go

fig =go.Figure(go.Sunburst(
    labels=labels,
    parents=parents,
    values=values,
    marker={'colors': colors},
    branchvalues="total",
))
# Update layout for tight margin
# See https://plotly.com/python/creating-and-updating-figures/
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))

fig.show()

In [19]:
levels = ['CellClass', 'MajorType', 'SubType']



if 'SubType' in data.columns:
    data = data[data['SubType'].apply(lambda i: 'Outlier' not in i)]

count_df = data.groupby(levels).apply(lambda i: i.shape[0]).reset_index()
count_df.columns= levels + ['Cell Number']

total_palette = {}
for level in levels:
    total_palette.update(palette[level])

labels = []
parents = []
values = []
colors = []
for level, parent_level in zip(levels[::-1], levels[1::-1] + [None]):
    this_level_sum = count_df.groupby(level)['Cell Number'].sum().to_dict()
    this_level_sum = {k: v for k, v in this_level_sum.items() if v!=0}
    if parent_level is not None:
        this_parent_dict = count_df.set_index(level)[parent_level].to_dict()
    else:
        this_parent_dict = {label: '' for label in count_df[level].unique()}
    for label in this_level_sum.keys():
        labels.append(label)
        parents.append(this_parent_dict[label])
        values.append(this_level_sum[label])
        try:
            colors.append(total_palette[label])
        except KeyError:
            colors.append('#D3D3D3')
            
import plotly.graph_objects as go

fig =go.Figure(go.Sunburst(
    labels=labels,
    parents=parents,
    values=values,
    marker={'colors': colors},
    branchvalues="total",
))
# Update layout for tight margin
# See https://plotly.com/python/creating-and-updating-figures/
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))

fig.show()

(Sunburst({
     'branchvalues': 'total',
     'labels': [ANP anp-dg, ANP anp-olf-cnu, ASC cortex-olf, ..., Exc, Inh, NonN],
     'marker': {'colors': [#8235E4, #A452FF, #F4BD64, ..., #5EC5A9, #E93400,
                           #E06931]},
     'parents': [ANP, ANP, ASC, ..., , , ],
     'values': [121, 210, 406, ..., 67324, 28077, 8167]
 }),)