In [None]:
import numpy as np
import os
import pandas as pd
import sys
import matplotlib.pyplot as plt
import json

import h5py
import subprocess
import pandas as pd
import matplotlib.pyplot as plt
import csv

bnsi_path = '/scicore/home/nimwegen/degroo0000/Bonsai-data-representation'
sys.path.append(bnsi_path)
from bonsai_scout.bonsai_scout_helpers import Bonvis_figure, Bonvis_settings, Bonvis_metadata

from downstream_analyses.average_over_groups import run_averaging

## Path to data

In [None]:
path_to_output = "/scicore/home/nimwegen/GROUP/Projects/bonsai_runs/bonsai_scout_public/Tabula_Muris"
bonsai_results = os.path.join(path_to_output, 'Bonsai')

### Make first Bonsai tree

In [None]:
# %%capture  
print(bonsai_results)
################ TODO: maybe change path to bonsai results!!
# data_path = os.path.join(bonsai_results, 'bonsai_vis_data-OLD.hdf')
# settings_path = os.path.join(bonsai_results, 'bonsai_vis_settings-OLD.json')
data_path = os.path.join(bonsai_results, 'bonsai_vis_data.hdf')
settings_path = os.path.join(bonsai_results, 'bonsai_vis_settings.json')

print("read in file: {}".format(data_path))
bonvis_metadata = Bonvis_metadata(data_path)
bonvis_settings = Bonvis_settings(load_settings_path=settings_path)
bonvis_data_hdf = h5py.File(data_path, 'r')
bonvis_fig = Bonvis_figure(bonvis_data_hdf, 
                           bonvis_metadata, 
                           bonvis_data_path=data_path,
                       bonvis_settings=bonvis_settings)
celltype_info = bonvis_fig.bonvis_settings.celltype_info
bonvis_fig.create_figure(figsize=(6, 6))

In [None]:
celltype_info.annot_infos

In [None]:
# Here, we set the desired celltype-annotation for the dataset
node_style = celltype_info.annot_infos['annot_tissue'].label

# Visualize the tree in the equal-daylight layout, with the correct celltype-annotation
bonvis_fig.update_figure(node_style=node_style, zoom=0.6597539553864471, ly_type = 'ly_eq_angle', geometry='hyperbolic')
a = bonvis_fig.create_figure(figsize=(6, 6))

In [None]:
a.savefig("figures/bonsai_tabula_muris_tissue_annot.svg")
a.savefig("figures/bonsai_tabula_muris_tissue_annot.png", dpi=300)

## Tree with liver cells highlighted

In [None]:
myannot = "annot_tissue"
sub='Liver'

In [None]:
sub_idx1 = np.argwhere(np.array(celltype_info.annot_infos[myannot].cats) == sub)[0][0]
selected_annot1 = celltype_info.annot_infos[myannot].cats[sub_idx1]
print(selected_annot1)

In [None]:
selected_subset1 = {'type': 'annot', 
                       'info': selected_annot1, 
                       'mask_is_on': True}

In [None]:
selected_subset = [selected_subset1]

In [None]:
bonvis_fig.set_mask_for_subset(selected_subset, 
                               curr_categorical_annot=celltype_info.annot_infos[myannot].label)
bonvis_fig.update_figure(renew_mask=True)

a = bonvis_fig.create_figure(figsize=(6, 6))

In [None]:
# a.savefig("figures/bonsai_tabula_muris_liver_mask.svg")
# a.savefig("figures/bonsai_tabula_muris_liver_mask.png", dpi=300)

In [None]:
bonvis_fig.set_mask_for_subset([{'type': None, 'info': None, 'mask_is_on': False}], 
                               curr_categorical_annot=celltype_info.annot_infos[myannot].label)
bonvis_fig.update_figure(origin=np.array([0.35844828, -0.63362069]), renew_mask=True)
a = bonvis_fig.create_figure(figsize=(6, 6))

In [None]:
a.savefig("figures/bonsai_tabula_muris_liver_focus.svg")
a.savefig("figures/bonsai_tabula_muris_liver_focus.png", dpi=300)

### make legend for this

In [None]:
from matplotlib.cm import get_cmap
from matplotlib.lines import Line2D

In [None]:
# Define 13 categories
categories = [
"Bladder",
"Heart",
"Kidney",
"Liver",
"Lung",
"Mammary",
"Marrow",
"Muscle",
"Spleen",
"Thymus",
"Tongue",
"Trachea"
]

# Get colors from the tab20 colormap
cmap = get_cmap("tab20")
colors = [cmap(i / 20) for i in range(12)]  # Use the first 13 colors

# Create legend elements
legend_elements = [
    Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=10, label=category)
    for color, category in zip(colors, categories)
]

# Create a figure for the legend
fig, ax = plt.subplots(figsize=(6, 3))  # Adjust size as needed
ax.axis('off')  # Turn off the axis

# Add legend to the figure
legend = ax.legend(handles=legend_elements, loc='center', frameon=False, ncol=1, fontsize=10)

# Save the figure as an SVG
output_file = "figures/bonsai_tabula_muris_tissue_legend.svg"
# plt.savefig(output_file, format="svg", bbox_inches='tight')
print(f"Legend saved as {output_file}")
# Display the legend
plt.show()

## Get marker genes for subtree with only liver cells

In [None]:
subset = {'type': 'subtree', 'info': (69624, 75562), 'mask_is_on': False}
subtree_inds, _ = bonvis_fig.get_cell_inds_in_subset(subset, curr_categorical_annot=celltype_info.annot_infos[myannot].label, 
                                   return_vert_inds=False)
subtree_inds

In [None]:
sanity_out_path = os.path.join(path_to_output, 'Sanity')
deltas_vg = np.load(os.path.join(path_to_output, "Bonsai/final_bonsai_zscore1.0_tmpStartpremerged_cs_tree/posterior_ltqs_vertByGene.npy"))
d_deltas_sq_vg = np.load(os.path.join(path_to_output, "Bonsai/final_bonsai_zscore1.0_tmpStartpremerged_cs_tree/posterior_ltqsVars_vertByGene.npy"))

In [None]:
# cellIDs = pd.read_csv(os.path.join(sanity_out_path, "cellID.txt"), header=None, names=["cellID"])
# geneIDs = pd.read_csv(os.path.join(sanity_out_path, "geneID.txt"), header=None, names=["geneID"])

In [None]:
cell_ind_to_vert_ind = bonvis_metadata.cell_info['cell_info_dict']['cell_ind_to_vert_ind']

In [None]:
deltas_cg = deltas_vg[np.array(cell_ind_to_vert_ind), :]
d_deltas_sq_cg = d_deltas_sq_vg[np.array(cell_ind_to_vert_ind), :]

In [None]:
clusters = {'liver clade': [], 'rest': []}
for ind in range(bonvis_metadata.n_cells):
    if ind in subtree_inds:
        clusters['liver clade'].append(ind)
    else:
        clusters['rest'].append(ind)

In [None]:
# run on deltas and d_delta
avg_activities, avg_deltas, significance = run_averaging(activities=deltas_cg, 
                                                       deltas=np.sqrt(d_deltas_sq_cg), 
                                                       clusters=clusters, 
                                                       wms=bonvis_metadata.gene_ids)

In [None]:
zscores = np.sqrt((avg_activities.loc['liver clade', :] - avg_activities.loc['rest', :]) ** 2 / np.sum(avg_deltas ** 2, axis=0))

In [None]:
liver_markers = list(zscores.sort_values(ascending=False).index)
liver_markers[:20]

# Color by gene

## Gene expression

In [None]:
all_genes_list = json.loads(bonvis_data_hdf['data/normalized'].attrs['gene_ids'])
print("number of genes: {}".format(len(all_genes_list)))

In [None]:
list(filter(lambda x: 'Gnmt' in x, all_genes_list))

In [None]:
# create new bonvisfig
gene = "Gnmt"

data_path = os.path.join(bonsai_results, 'bonsai_vis_data.hdf')

settings_path = os.path.join(bonsai_results, 'bonsai_vis_settings.json')
print("read in file: {}".format(data_path))
bonvis_metadata = Bonvis_metadata(data_path)
bonvis_settings = Bonvis_settings(load_settings_path=settings_path)
bonvis_data_hdf = h5py.File(data_path, 'r')
bonvis_fig = Bonvis_figure(bonvis_data_hdf, 
                           bonvis_metadata, 
                           bonvis_data_path=data_path,
                       bonvis_settings=bonvis_settings)
bonvis_fig.create_figure(figsize=(6, 6))


bonvis_fig.bonvis_settings.node_style['feature_path'] = "data/normalized"
# geometry = 'flat'
# ly_type = 'ly_dendrogram'
# ly_type = "ly_dendrogram_ladderized"
ly_type = 'ly_eq_angle'
node_style = celltype_info.annot_infos['annot_tissue'].label
bonvis_fig.update_figure(zoom=0.6597539553864471, origin=np.array([0.35844828, -0.63362069]), node_style=gene, ly_type=ly_type, geometry='hyperbolic')
outfig = bonvis_fig.create_figure(figsize=(6, 6))


In [None]:
outfig.savefig("figures/bonsai_tabula_muris_Gnmt_mrkr.svg")
outfig.savefig("figures/bonsai_tabula_muris_Gnmt_mrkr.png", dpi=300)