In [None]:
import numpy as np
import os
import pandas as pd
import sys
from matplotlib.lines import Line2D

import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib import cm
import matplotlib.patches as mpatches
import json
import h5py

from itertools import combinations


bnsi_path = '/todo_your_path_to_bonsai/Bonsai-data-representation'
sys.path.append(bnsi_path)
from bonsai_scout.bonsai_scout_helpers import Bonvis_figure, Bonvis_settings, Bonvis_metadata, Annotation_info, get_celltype_colors_new

def cbar_to_numb(val):
    if cbar_info['log']:
        return np.exp(val * (max_val - min_val) + min_val)
    return val * (max_val - min_val) + min_val

In [None]:
os.getcwd()

In [None]:
path_to_output = "todo_path_to_dataset"

sanity_out_path = os.path.join(path_to_output, "Sanity")
bonsai_results_folders = os.path.join(path_to_output, "bonsai")
save_dir = "./figures"
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

In [None]:
## bonsai posterior estiamtes of ltqs
posterior_est_var_file = "posterior_ltqsVars_vertByGene.npy"  
posterior_est_file = "posterior_ltqs_vertByGene.npy"

bonsai_dir = os.path.join(path_to_output, "bonsai")

# GET POSTERIOR ESTIMATES FOR ALL GENES
results_dir = "todo_path_to_posterior_files"

cellid_file = os.path.join(path_to_output, "Sanity", "cellID.txt")
cell_id_true_df = pd.read_csv(cellid_file, header=None, names=["cell_id"])
cell_id_true_df["cell_id_idx"] = cell_id_true_df.index

ltq_post_est = np.load(os.path.join(bonsai_dir, results_dir, posterior_est_file))
ltq_eb_post_est = np.load(os.path.join(bonsai_dir, results_dir, posterior_est_var_file))

with open(os.path.join(bonsai_dir, results_dir,"metadata.json")) as f:
    metadata = json.load(f)

vert_info = pd.read_csv(os.path.join(bonsai_dir, results_dir, "vertInfo.txt"), sep="\t")

vert_info_sub = pd.merge(vert_info, cell_id_true_df, left_on="vertName", right_on="cell_id")
vert_info_sub = vert_info_sub.sort_values("cell_id_idx")

ltq_post_est_sub = ltq_post_est[vert_info_sub.vertInd, :]
ltq_post_est_sub = ltq_post_est_sub.T
print(ltq_post_est_sub.shape)

ltq_post_est_sub_df = pd.DataFrame(ltq_post_est_sub, index = metadata["geneIds"], columns=vert_info_sub.vertName)
ltq_post_est_sub_df = ltq_post_est_sub_df.T



ltq_post_est_sub_df.head()


## Sum up counts per transcript and define NK thresholds
Marker genes: "CMC1",
         "CST7",
         "GNLY",
         "KLRF1",
         "NKG7",
         "PRF1",


In [None]:
genes = ["NKG7_trscrpt1", "NKG7_trscrpt2", 
         "GNLY_trscrpt1", "GNLY_trscrpt2", "GNLY_trscrpt3",
         "CMC1", "CST7", "KLRF1_trscrpt1",
         "PRF1"
        ]

## sum transcripts
expr = ltq_post_est_sub_df[genes]

expr_exp = np.exp(expr)

# remove transcript suffixes (e.g. "_trscrpt1", "_trscrpt2")
expr_exp.columns = expr_exp.columns.str.replace(r"_trscrpt\d+", "", regex=True)

# group by gene name (columns with same gene) and sum
expr_sum = expr_exp.groupby(expr_exp.columns, axis=1).sum()

df_processed = np.log(expr_sum)


In [None]:
df_processed

In [None]:

fig, ax = plt.subplots(1,6, figsize=(12,2), dpi=200)

# decided thresholds
NKG7_threshold = -7.5
GNLY_threshold = np.percentile(df_processed["GNLY"], 90)
PRF1_threshold = -9.2 
CMC1_threshold = -8.5
CST7_threshold = -8.7
KLRF1_threshold = np.percentile(df_processed["KLRF1"], 90)

log=True

thresholds = {"CMC1": CMC1_threshold,
              "CST7": CST7_threshold,
         "GNLY": GNLY_threshold,
         "KLRF1": KLRF1_threshold,
         "NKG7": NKG7_threshold,
         "PRF1": PRF1_threshold,
              
             }
genes = ["NKG7",
         "GNLY",
        "KLRF1",
         "PRF1",
         "CMC1",
         "CST7",
        ]


ax = ax.flatten()
for ax_idx, gene in enumerate(genes):

    ax[ax_idx].hist(df_processed[gene], bins=100, log=log, density=True, alpha=0.5, color="grey")
    ax[ax_idx].axvline(thresholds[gene], color="black", linewidth=1)
    ax[ax_idx].text(0.5, 1.20, gene, fontsize=14, ha='center', transform=ax[ax_idx].transAxes)
    ax[ax_idx].text(0.5, 1.04, "threshold= {}".format(np.round(thresholds[gene], 5)), fontsize=10, ha='center', transform=ax[ax_idx].transAxes)


fig.text(0.5, 0.0, "log transcription quotients", ha='center', va='center')

fig.text(0, 0.5, 'density', ha='center', va='center', rotation='vertical')
   
plt.tight_layout()
plt.savefig('{}/marker_gene_expr-all_cells-histogram-stepfilled-log_{}.svg'.format(save_dir, log), bbox_inches='tight')
plt.savefig('{}/marker_gene_expr-all_cells-histogram-stepfilled-log_{}.png'.format(save_dir, log), bbox_inches='tight')
    
plt.show()



In [None]:
print("NKG7_threshold: {}".format(NKG7_threshold))
print("GNLY_threshold: {}".format(GNLY_threshold))
print("PRF1_threshold: {}".format(PRF1_threshold))
print("CMC1_threshold: {}".format(CMC1_threshold))
print("CST7_threshold: {}".format(CST7_threshold))
print("KLRF1_threshold: {}".format(KLRF1_threshold))


ltq_post_est_sub_df["NK_nkg7_gnly_prf1_cmc1_cst7_klrf1"] = np.where((df_processed["NKG7"] > NKG7_threshold) & 
                                                                    (df_processed["GNLY"] > GNLY_threshold) & 
                                                                    (df_processed["PRF1"] > PRF1_threshold) &
                                                                    (df_processed["CMC1"] > CMC1_threshold) &
                                                                    (df_processed["CST7"] > CST7_threshold) &
                                                                    (df_processed["KLRF1"] > KLRF1_threshold) 
                                                                    , "NK", "other")

In [None]:
NK_def = "NK_nkg7_gnly_prf1_cmc1_cst7_klrf1"
tmp = ltq_post_est_sub_df[ltq_post_est_sub_df[NK_def] == "NK"]
x = len(tmp)
print("{}: num of NK cells: {}".format(NK_def, x))

## Load bonsai tree

In [None]:
# %%capture  
print(bonsai_results_folders)
data_path = os.path.join(bonsai_results_folders, 'bonsai_vis_data.hdf')
settings_path = os.path.join(bonsai_results_folders, 'bonsai_vis_settings.json')

print("read in file: {}".format(data_path))
bonvis_metadata = Bonvis_metadata(data_path)
bonvis_settings = Bonvis_settings(load_settings_path=settings_path)
bonvis_data_hdf = h5py.File(data_path, 'r')

# Set darker edges
bonvis_settings.edge_style = {"color": [0.4117647058823529, 0.4117647058823529, 0.4117647058823529, 1.0],
                             'linewidth':0.6}


bonvis_fig = Bonvis_figure(bonvis_data_hdf, 
                           bonvis_metadata, 
                           bonvis_data_path=data_path,
                           bonvis_settings=bonvis_settings)
celltype_info = bonvis_fig.bonvis_settings.celltype_info



bonvis_fig.create_figure(figsize=(6, 6))

# Here, we set the desired celltype-annotation for the dataset
node_style = celltype_info.annot_infos['annot_{}'.format(NK_def)].label

# Visualize the tree in the ly_dendrogram layout, with the correct celltype-annotation
geometry = 'flat'
ly_type = "ly_dendrogram_ladderized"


bonvis_fig.update_figure(ly_type=ly_type, 
                         geometry=geometry, 
                         node_style=node_style, 
                         # only show NK cells
                         renew_mask=False,
                          scale_nodes=0.6666666666666666,
                         tweak_inds=1911, # change root # at node internal_1095
                         new_flip_id=True
                        )
bonvis_fig.set_mask_for_subset([{'type': 'annot', 'info': 'NK', 'mask_is_on': True}])
bonvis_fig.update_figure(ly_type=ly_type, 
                         geometry=geometry, 
                         node_style=node_style, 
                         # only show NK cells
                         renew_mask=True,
                          # scale_nodes=0.6666666666666666,
                         tweak_inds=1911, # change root # at node internal_1095
                         new_flip_id=True
                        )

bonsai_fig_darker_lines = bonvis_fig.create_figure(figsize=(6, 6))

bonsai_fig_darker_lines.savefig("{}/bonsai_dendogram-NK_annot-darker_lines_full.svg".format(save_dir))
bonsai_fig_darker_lines.savefig("{}/bonsai_dendogram-NK_annot-darker_lines_full.png".format(save_dir))

In [None]:
# create new bonvisfig
gene = "LYZ" # high expressed in myeloid
print(gene)
bonvis_fig.bonvis_settings.node_style['feature_path'] = "data/normalized"
bonvis_settings.node_style['use_mask'] = False
geometry = 'flat'
ly_type = "ly_dendrogram_ladderized"

bonvis_fig.update_figure(ly_type=ly_type, 
                         geometry=geometry, 
                         node_style=gene, 
                         renew_mask=True,
                         tweak_inds=1911,
                        )

outfig = bonvis_fig.create_figure(figsize=(6, 6))

outfig.savefig("{}/bonsai_dendogram-gene_{}-darker_lines_full.svg".format(save_dir, gene))
outfig.savefig("{}/bonsai_dendogram-gene_{}-darker_lines_full.png".format(save_dir, gene))

In [None]:
print(gene)
annot_info = bonvis_settings.node_style['annot_info']
cbar_info = annot_info.cbar_info
cbar_info


fig_cbar = plt.figure(figsize=(2, 4))
ax_cbar = fig_cbar.add_subplot(111)

norm = colors.Normalize(vmin=0, vmax=1)
mappable = cm.ScalarMappable(norm=norm, cmap=cbar_info['cmap'])
cbar = plt.colorbar(mappable, cax=ax_cbar, orientation='vertical')


print(cbar.ax.get_yticks())
tick_list = [mappable.colorbar.vmin + t * (mappable.colorbar.vmax - mappable.colorbar.vmin) for t in [0,0.25, 0.5,0.75,1]]

min_val = cbar_info['vmin']
max_val = cbar_info['vmax']

tick_labels = ['{:.2e}'.format(cbar_to_numb(tick)) for tick in tick_list]
tick_labels = ['{:.2}'.format(cbar_to_numb(tick)) for tick in tick_list]
cbar.set_ticks(tick_list)
cbar.set_ticklabels(tick_labels)
cbar.set_label('gene expression', fontsize=12, labelpad=10)
plt.title(gene)
plt.tight_layout()

print('saving {}'.format("{}/colorbar-gene_{}.svg".format(save_dir, gene)))
fig_cbar.savefig("{}/colorbar-gene_{}.svg".format(save_dir, gene))
fig_cbar.savefig("{}/colorbar-gene_{}.png".format(save_dir, gene))


In [None]:
# create new bonvisfig
gene = "IL7R_trscrpt2" # high expressed in lymphoid
print(gene)
bonvis_fig.bonvis_settings.node_style['feature_path'] = "data/normalized"
bonvis_settings.node_style['use_mask'] = False
geometry = 'flat'
ly_type = "ly_dendrogram_ladderized"

bonvis_fig.update_figure(ly_type=ly_type, 
                         geometry=geometry, 
                         node_style=gene, 
                         renew_mask=True,
                         tweak_inds=1911,
                        )

outfig = bonvis_fig.create_figure(figsize=(6, 6))

outfig.savefig("{}/bonsai_dendogram-gene_{}-darker_lines_full.svg".format(save_dir, gene))
outfig.savefig("{}/bonsai_dendogram-gene_{}-darker_lines_full.png".format(save_dir, gene))

In [None]:
print(gene)
annot_info = bonvis_settings.node_style['annot_info']
cbar_info = annot_info.cbar_info
cbar_info


fig_cbar = plt.figure(figsize=(2, 4))
ax_cbar = fig_cbar.add_subplot(111)

norm = colors.Normalize(vmin=0, vmax=1)
mappable = cm.ScalarMappable(norm=norm, cmap=cbar_info['cmap'])
cbar = plt.colorbar(mappable, cax=ax_cbar, orientation='vertical')


print(cbar.ax.get_yticks())
tick_list = [mappable.colorbar.vmin + t * (mappable.colorbar.vmax - mappable.colorbar.vmin) for t in [0,0.25, 0.5,0.75,1]]

min_val = cbar_info['vmin']
max_val = cbar_info['vmax']

tick_labels = ['{:.2e}'.format(cbar_to_numb(tick)) for tick in tick_list]
tick_labels = ['{:.2}'.format(cbar_to_numb(tick)) for tick in tick_list]
cbar.set_ticks(tick_list)
cbar.set_ticklabels(tick_labels)
cbar.set_label('gene expression', fontsize=12, labelpad=10)
plt.title(gene)
plt.tight_layout()

print('saving {}'.format("{}/colorbar-gene_{}.svg".format(save_dir, gene)))
fig_cbar.savefig("{}/colorbar-gene_{}.svg".format(save_dir, gene))
fig_cbar.savefig("{}/colorbar-gene_{}.png".format(save_dir, gene))


In [None]:
# create new bonvisfig
gene = "CD11c"
print(gene)
bonvis_fig.bonvis_settings.node_style['feature_path'] = "data/at_cnts_CLR_transformed"
bonvis_settings.node_style['use_mask'] = False
geometry = 'flat'
ly_type = "ly_dendrogram_ladderized"

bonvis_fig.update_figure(ly_type=ly_type, 
                         geometry=geometry, 
                         node_style=gene, 
                         renew_mask=True,
                         tweak_inds=1911,
                        )

outfig = bonvis_fig.create_figure(figsize=(6, 6))

outfig.savefig("{}/bonsai_dendogram-antibody_{}-darker_lines_full.svg".format(save_dir, gene))
outfig.savefig("{}/bonsai_dendogram-antibody_{}-darker_lines_full.png".format(save_dir, gene))

In [None]:
print(gene)
annot_info = bonvis_settings.node_style['annot_info']
cbar_info = annot_info.cbar_info
cbar_info


fig_cbar = plt.figure(figsize=(2, 4))
ax_cbar = fig_cbar.add_subplot(111)

norm = colors.Normalize(vmin=0, vmax=1)
mappable = cm.ScalarMappable(norm=norm, cmap=cbar_info['cmap'])
cbar = plt.colorbar(mappable, cax=ax_cbar, orientation='vertical')


print(cbar.ax.get_yticks())
tick_list = [mappable.colorbar.vmin + t * (mappable.colorbar.vmax - mappable.colorbar.vmin) for t in [0,0.25, 0.5,0.75,1]]

min_val = cbar_info['vmin']
max_val = cbar_info['vmax']

tick_labels = ['{:.2e}'.format(cbar_to_numb(tick)) for tick in tick_list]
tick_labels = ['{:.2}'.format(cbar_to_numb(tick)) for tick in tick_list]
cbar.set_ticks(tick_list)
cbar.set_ticklabels(tick_labels)
cbar.set_label('normalized surface protein expression', fontsize=12, labelpad=10)
plt.title(gene)
plt.tight_layout()

print('saving {}'.format("{}/colorbar-antibody_{}.svg".format(save_dir, gene)))
fig_cbar.savefig("{}/colorbar-antibody_{}.svg".format(save_dir, gene))
fig_cbar.savefig("{}/colorbar-antibody_{}.png".format(save_dir, gene))


In [None]:
# create new bonvisfig
gene = "CD3"
print(gene)
bonvis_fig.bonvis_settings.node_style['feature_path'] = "data/at_cnts_CLR_transformed"
bonvis_settings.node_style['use_mask'] = False
geometry = 'flat'
ly_type = "ly_dendrogram_ladderized"

bonvis_fig.update_figure(ly_type=ly_type, 
                         geometry=geometry, 
                         node_style=gene, 
                         renew_mask=True,
                         tweak_inds=1911,
                        )

outfig = bonvis_fig.create_figure(figsize=(6, 6))

outfig.savefig("{}/bonsai_dendogram-antibody_{}-darker_lines_full.svg".format(save_dir, gene))
outfig.savefig("{}/bonsai_dendogram-antibody_{}-darker_lines_full.png".format(save_dir, gene))

In [None]:
print(gene)
annot_info = bonvis_settings.node_style['annot_info']
cbar_info = annot_info.cbar_info
cbar_info


fig_cbar = plt.figure(figsize=(2, 4))
ax_cbar = fig_cbar.add_subplot(111)

norm = colors.Normalize(vmin=0, vmax=1)
mappable = cm.ScalarMappable(norm=norm, cmap=cbar_info['cmap'])
cbar = plt.colorbar(mappable, cax=ax_cbar, orientation='vertical')


print(cbar.ax.get_yticks())
tick_list = [mappable.colorbar.vmin + t * (mappable.colorbar.vmax - mappable.colorbar.vmin) for t in [0,0.25, 0.5,0.75,1]]

min_val = cbar_info['vmin']
max_val = cbar_info['vmax']

tick_labels = ['{:.2e}'.format(cbar_to_numb(tick)) for tick in tick_list]
tick_labels = ['{:.2}'.format(cbar_to_numb(tick)) for tick in tick_list]
cbar.set_ticks(tick_list)
cbar.set_ticklabels(tick_labels)
cbar.set_label('normalized surface protein expression', fontsize=12, labelpad=10)
plt.title(gene)
plt.tight_layout()

print('saving {}'.format("{}/colorbar-antibody_{}.svg".format(save_dir, gene)))
fig_cbar.savefig("{}/colorbar-antibody_{}.svg".format(save_dir, gene))
fig_cbar.savefig("{}/colorbar-antibody_{}.png".format(save_dir, gene))


#### myeloid and lymphoid clades

In [None]:
# myleoid clade
curr_subset = {}
curr_subset['type'] = "subtree"
curr_subset['info'] = (9008, 14099) 
nk_cell_inds_group1 = bonvis_fig.get_cell_inds_in_subset(curr_subset)[0]
nk_cell_ids_group1 = np.array(bonvis_metadata.cell_ids)[nk_cell_inds_group1]

curr_subset['mask_is_on']= True
bonvis_fig.set_mask_for_subset([curr_subset])
bonvis_fig.update_figure(ly_type=ly_type, 
                         geometry=geometry, 
                         node_style=node_style, 
                         # only show NK cells
                         renew_mask=True,
                          # scale_nodes=0.6666666666666666,
                         tweak_inds=1911, # change root
                        )
bonsai_fig_darker_lines = bonvis_fig.create_figure(figsize=(6, 6))

In [None]:
# lymphoid clade
curr_subset = {}
curr_subset['type'] = "subtree"
curr_subset['info'] = (17988, 26116) 
nk_cell_inds_group2 = bonvis_fig.get_cell_inds_in_subset(curr_subset)[0]
nk_cell_ids_group2 = np.array(bonvis_metadata.cell_ids)[nk_cell_inds_group2]

curr_subset['mask_is_on']= True
bonvis_fig.set_mask_for_subset([curr_subset])
bonvis_fig.update_figure(ly_type=ly_type, 
                         geometry=geometry, 
                         node_style=node_style, 
                         # only show NK cells
                         renew_mask=True,
                         tweak_inds=1911, # change root
                        )
bonsai_fig_darker_lines = bonvis_fig.create_figure(figsize=(6, 6))

#### Definition of myeloid and lymphoid NK cells

In [None]:
my_celltype = "NK"
group1 = "NK_myeloid"
group2 = "NK_lymphoid"
group3 = "other"

group = "NK_group"
ltq_post_est_sub_df[group] = [ group3 if ct != my_celltype
               else group1 if cb in nk_cell_ids_group1 
               else group2 if cb in nk_cell_ids_group2 
               else group3 
               for cb, ct in zip(ltq_post_est_sub_df.index, ltq_post_est_sub_df[NK_def])]

In [None]:
ltq_post_est_sub_df.groupby(group).count()[NK_def]

In [None]:
#red
color_NK_g1_html = "#FF0000"
color_NK_g1_rgba = (1.0, 0.0, 0.0, 0.8)

#blue
color_NK_g2_html = "#0000FF"
color_NK_g2_rgba = (0.0, 0.0, 1.0, 0.8)

color_other_html = '#808080'
color_other_rgba = (0.5019607843137255, 0.5019607843137255, 0.5019607843137255, 0.5)

colors_rgba = {"NK_lymphoid": color_NK_g1_rgba, "NK_myeloid": color_NK_g2_rgba, "other":color_other_rgba}
NK_colors = {"NK_lymphoid": "red", "NK_myeloid": "blue", "other":"grey"}



In [None]:
cats = list(ltq_post_est_sub_df[group].unique())
print(cats)
annot_to_color = {'NK_lymphoid': colors_rgba["NK_lymphoid"], 
                  'NK_myeloid':colors_rgba["NK_myeloid"],
                  'other': (0.8274509803921568, 0.8274509803921568, 0.8274509803921568, 0.1)} #lightgray


cbar_info = {'cmap': None, 'vmin': None, 'vmax': None, 'log': None}
label = "NK_group"
info_key = "annot_NK_group"
annot_type = 'cellstates' 
info_object = 'cs_info_dict'

cl_annot = Annotation_info(cats=cats, annot_to_color=annot_to_color, label=label,
                                   cbar_info=cbar_info, annot_type=annot_type,
                                   color_type='categorical', info_object=info_object,
                                   info_key=info_key)
cl_annot.to_dict()

In [None]:
bonvis_metadata.cs_info['cs_info_dict'][info_key] = ltq_post_est_sub_df[group].tolist()

bonvis_settings.set_annot(annot_info=cl_annot)
bonvis_settings.cell_to_celltype, _ = bonvis_fig.get_color_info(annot_info=bonvis_fig.bonvis_settings.node_style['annot_info'])


bonvis_settings.celltype_info.annot_infos[info_key] = cl_annot
bonvis_settings.celltype_info.annot_alts.append(info_key)

celltype_info = bonvis_fig.bonvis_settings.celltype_info

In [None]:
myannot = "annot_NK_group"
sub1='NK_lymphoid'
sub2='NK_myeloid'

sub_idx1 = np.argwhere(np.array(celltype_info.annot_infos[myannot].cats) == sub1)[0][0]
sub_idx2 = np.argwhere(np.array(celltype_info.annot_infos[myannot].cats) == sub2)[0][0]

print(sub_idx1)
print(sub_idx2)

selected_annot1 = celltype_info.annot_infos[myannot].cats[sub_idx1]
selected_annot2 = celltype_info.annot_infos[myannot].cats[sub_idx2]
print(selected_annot1)
print(selected_annot2)

selected_subset1 = {'type': 'annot', 
                       'info': selected_annot1, 
                       'mask_is_on': True}
selected_subset2 = {'type': 'annot', 
                       'info': selected_annot2, 
                       'mask_is_on': True}

selected_subset = [selected_subset1,selected_subset2 ]

In [None]:
# With NK mask
# Here, we set the desired celltype-annotation for the dataset
node_style = celltype_info.annot_infos['annot_NK_group'].label

geometry = 'flat'
ly_type = "ly_dendrogram_ladderized"


bonvis_fig.set_mask_for_subset(selected_subset)
bonvis_fig.update_figure(ly_type=ly_type, 
                         geometry=geometry, 
                         node_style=node_style, 
                         # only show NK cells
                         renew_mask=True,
                         tweak_inds=1911,
                        )

nk_new_bonsai_fig_darker_lines = bonvis_fig.create_figure(figsize=(6, 6))
nk_new_bonsai_fig_darker_lines.savefig("{}/bonsai_dendogram-nk_lymphoid_myeloid-darker_lines_full.svg".format(save_dir))
nk_new_bonsai_fig_darker_lines.savefig("{}/bonsai_dendogram-nk_lymphoid_myeloid-darker_lines_full.png".format(save_dir))

In [None]:
categories = [
    "NK_lymphoid",
    "NK_myleoid",
    "rest"
]

colors_nk_subgroups = [color_NK_g1_html, color_NK_g2_html, color_other_html]  # Use the first 13 colors


legend_elements = [
    Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=10, label=category, alpha=0.8)
    for color, category in zip(colors_nk_subgroups, categories)
]


fig, ax = plt.subplots(figsize=(6, 3))  
ax.axis('off') 
legend = ax.legend(handles=legend_elements, loc='center', frameon=False, ncol=1, fontsize=10)
plt.show()

### Antibody expression

In [None]:
antibody_names = json.loads(bonvis_data_hdf["data/at_cnts_CLR_transformed"].attrs['gene_ids'])
cd56_idx = np.where(np.array(antibody_names) == "CD56")[0]
cd16_idx = np.where(np.array(antibody_names) == "CD16")[0]

In [None]:
trnfs_antibodies = bonvis_data_hdf["data/at_cnts_CLR_transformed/means"][:]

s=15

fig = plt.figure(figsize=(8, 8))
grid = fig.add_gridspec(4, 4, hspace=0.1, wspace=0.1) 


main_ax = fig.add_subplot(grid[1:, :-1])


main_ax.scatter(x=trnfs_antibodies[cd56_idx, ltq_post_est_sub_df.NK_group==group3],
            y=trnfs_antibodies[cd16_idx, ltq_post_est_sub_df.NK_group==group3],
             label=group3, 
             color= NK_colors[group3],
             s=s,
             alpha=0.3)


main_ax.scatter(x=trnfs_antibodies[cd56_idx, ltq_post_est_sub_df.NK_group==group1],
            y=trnfs_antibodies[cd16_idx, ltq_post_est_sub_df.NK_group==group1],
             label=group1, 
             color= NK_colors[group1],
             s=s,
             alpha=0.3)


main_ax.scatter(x=trnfs_antibodies[cd56_idx, ltq_post_est_sub_df.NK_group==group2],
            y=trnfs_antibodies[cd16_idx, ltq_post_est_sub_df.NK_group==group2],
             label=group2, 
             color= NK_colors[group2],
             s=s,
             alpha=0.4)
main_ax.set_xlabel("CD56")
main_ax.set_ylabel("CD16")

# Top histogram (X-axis)
x_hist = fig.add_subplot(grid[0, :-1], sharex=main_ax)


x_hist.hist(trnfs_antibodies[cd56_idx, ltq_post_est_sub_df.NK_group==group1],
         bins=20, label=group1, 
             color= NK_colors[group1],
             alpha=0.5,
             density=True)

x_hist.hist(trnfs_antibodies[cd56_idx, ltq_post_est_sub_df.NK_group==group2],
         bins=20, label=group2, 
             color= NK_colors[group2],
             alpha=0.5,
             density=True)

x_hist.hist(trnfs_antibodies[cd56_idx, ltq_post_est_sub_df.NK_group==group3],
         bins=20, label=group3, 
             color= NK_colors[group3],
             alpha=0.6,
             density=True)

x_hist.axis("off")  


# Side histogram (Y-axis)
y_hist = fig.add_subplot(grid[1:, -1], sharey=main_ax)


y_hist.hist(trnfs_antibodies[cd16_idx, ltq_post_est_sub_df.NK_group==group1],
         bins=20, label=group1, 
             color= NK_colors[group1],
             alpha=0.5,orientation="horizontal",
             density=True)

y_hist.hist(trnfs_antibodies[cd16_idx, ltq_post_est_sub_df.NK_group==group2],
         bins=20, label=group2, 
             color= NK_colors[group2],
             alpha=0.5,orientation="horizontal",
             density=True)


y_hist.hist(trnfs_antibodies[cd16_idx, ltq_post_est_sub_df.NK_group==group3],
         bins=20, label=group3, 
             color= NK_colors[group3],
             alpha=0.6,orientation="horizontal",
             density=True)
y_hist.axis("off")  

plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

plt.savefig('{}/antibody_expr-scatter-stepfilled-antibody_CD56_CD16-with_hist.svg'.format(save_dir), bbox_inches='tight')
plt.savefig('{}/antibody_expr-scatter-stepfilled-antibody_CD56_CD16-with_hist.png'.format(save_dir), bbox_inches='tight')

plt.show()


In [None]:
df_processed[group] = ltq_post_est_sub_df[group]

In [None]:
fig, ax = plt.subplots(2,2, figsize=(6,4), dpi=300)
ax = ax.flatten()

# top 4 marker genes for NK from coord blood
genes = ["NKG7", 
         "GNLY",
         "CMC1",
         "CST7"
        ]


len(genes)
for ax_idx, gene in enumerate(genes): 
    
    
    label="other"
    subset = df_processed[group] ==label
    x = df_processed[subset][gene]
    ax[ax_idx].hist(x,
             bins=20, label=label, 
                 color= NK_colors[label],
                 alpha=0.5,
                 density=True)
    
    label="NK_lymphoid"
    subset = df_processed[group] ==label
    x = df_processed[subset][gene]
    ax[ax_idx].hist(x,
             bins=20, label=label, 
                 color= NK_colors[label],
                 alpha=0.5,
                    histtype="stepfilled",
                 density=True)
    
    label="NK_myeloid"
    subset = df_processed[group] ==label
    x = df_processed[subset][gene]
    ax[ax_idx].hist(x,
             bins=10, label=label, 
                 color= NK_colors[label],
                 alpha=0.5,
                    histtype="stepfilled",
                 density=True)
    
    
    ax[ax_idx].set_title(gene)
fig.text(0.5, 0.0, "log transcription quotients", ha='center', va='center')
fig.text(0, 0.5, 'density', ha='center', va='center', rotation='vertical')
    
plt.tight_layout()

gene_str = "_".join(genes)
plt.savefig('{}/marker_gene_expr-histogram-stepfilled-genes_{}.svg'.format(save_dir, gene_str), bbox_inches='tight')
plt.savefig('{}/marker_gene_expr-histogram-stepfilled-gene_{}.png'.format(save_dir, gene_str), bbox_inches='tight')
    
plt.show()

In [None]:

fig, ax = plt.subplots(2,2, figsize=(6,4), dpi=300)
ax = ax.flatten()

genes = [
    "FTL",
    "CST3", 
    "CTSW",
    "TRBC2"
]
len(genes)
for ax_idx, gene in enumerate(genes): 
    
    label="NK_lymphoid"
    subset = ltq_post_est_sub_df[group] ==label
    x = ltq_post_est_sub_df[subset][gene]
    ax[ax_idx].hist(x,
             bins=20, label=label, 
                 color= NK_colors[label],
                 alpha=0.5,
                    histtype="stepfilled",
                 density=True)
    
    label="NK_myeloid"
    subset = ltq_post_est_sub_df[group] ==label
    x = ltq_post_est_sub_df[subset][gene]
    ax[ax_idx].hist(x,
             bins=10, label=label, 
                 color= NK_colors[label],
                 alpha=0.5,
                    histtype="stepfilled",
                 density=True)
    
    
    ax[ax_idx].set_title(gene)
fig.text(0.5, 0.0, "log transcription quotients", ha='center', va='center')
fig.text(0, 0.5, 'density', ha='center', va='center', rotation='vertical')
    
plt.tight_layout()

gene_str = "_".join(genes)
plt.savefig('{}/marker_gene_expr-histogram-stepfilled-genes_{}.svg'.format(save_dir, gene_str), bbox_inches='tight')
plt.savefig('{}/marker_gene_expr-histogram-stepfilled-gene_{}.png'.format(save_dir, gene_str), bbox_inches='tight')


plt.show()