In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import torch
import anndata
import scanpy as sc

# With PKD

In [None]:
control = pd.read_csv('/exports/humgen/cnovellarausell/SevtapSpatial/outputs/tangram_outputs_with_PKD/control_v1_sample_healthyMKA/Control_celltype_counts_denoised_zimmermanhumphreysmuto_highres.csv')
pkd_1 = pd.read_csv('/exports/humgen/cnovellarausell/SevtapSpatial/outputs/tangram_outputs_with_PKD/pkd_1_v1/pkd_1_celltype_counts_denoised_zimmermanhumphreysmuto_highres.csv')
pkd_2 = pd.read_csv('/exports/humgen/cnovellarausell/SevtapSpatial/outputs/tangram_outputs_with_PKD/pkd_2_v1/pkd_2_celltype_counts_denoised_zimmermanhumphreysmuto_highres.csv')
pkd_3 = pd.read_csv('/exports/humgen/cnovellarausell/SevtapSpatial/outputs/tangram_outputs_with_PKD/pkd_3_v1/pkd_3_celltype_counts_denoised_zimmermanhumphreysmuto_highres.csv')

In [None]:
control['library'] = ['Control'] * len(control)
pkd_1['library'] = ['pkd_1'] * len(pkd_1)
pkd_2['library'] = ['pkd_2'] * len(pkd_2)
pkd_3['library'] = ['pkd_3'] * len(pkd_3)

In [None]:
low_res = {
'Immune': ['Neutro', 'NK', 'Macro', 'NKT1', 'B lymph', 'B1 B lymph', 'T lymph', 'DC', 'Lyc6 low Macrophages', 'Lyc6 high Macrophages','Spp1+ Resident Macrophages ', 'Mrc1+ Resident Macrophages', 'Gzma+ NK','CD4+ Th17', 'CD4+ T regs', 'B1 B lymph', 'T3/Follicular B lymph', 'T1 B lymph', 'Gzma low NK', 'CD4+ T lymph', 'Gzma+ CD8+ T lymph', 'Memory B lymph'],
'Endo': ['Endo', 'Glom-Endo', 'Asc-Vasa-Recta', 'Vas-Efferens', 'Desc-Vasa-Recta', 'Vas-Afferens'],
'PT': ['PTS1', 'PTS2', 'PTS3', 'PTS3T2'],
'LOH': ['TAL', 'ATL', 'DTL', 'DTL-ATL'],
'DCT': ['DCT', 'DCT-CNT'],
'Fib': ['Fib', 'Myofib']}
low_res_rev = {vs: k for k, v in low_res.items() for vs in v}

In [None]:
dfs = [control, pkd_1, pkd_2, pkd_3]
for df in dfs:
    mask = df['cell_type'].isin([x for xs in low_res.values() for x in xs])
    df['low_res_cell_type'] = df[mask]['cell_type'].replace(low_res_rev)
    df.low_res_cell_type.fillna(df.cell_type, inplace=True)

In [None]:
conc = pd.concat([control, pkd_1, pkd_2, pkd_3])

In [None]:
conc['total'] = conc['counts'].groupby(conc['library']).transform('sum')

In [None]:
conc['freq'] = conc['counts'] / conc['total']

In [None]:
conc

In [None]:
low_res_conc = conc.groupby(['low_res_cell_type', 'library'])['counts'].sum().reset_index(level=[0,1])
low_res_conc['total'] = low_res_conc['counts'].groupby(low_res_conc['library']).transform('sum')
low_res_conc['freq'] = low_res_conc['counts'] / low_res_conc['total']
low_res_conc = low_res_conc.pivot(index='library', columns='low_res_cell_type', values='freq')

In [None]:
low_res_conc

In [None]:
low_res_conc.fillna(0, inplace=True)

In [None]:
low_res_conc.to_excel('/exports/humgen/cnovellarausell/SevtapSpatial/Notebooks/figures-ct-frequencies/withPKD/freqs_lowres_healthyMKA_sample.xlsx')

In [None]:
import pickle as pkl
palette = pkl.load( open( "/exports/archive/hg-groep-peters/Healthy_Mouse_Atlas_Claudio/KidneyAtlas/predicted_celltype_palette_highres_2.pkl", "rb" ) )

In [None]:
palette['Myofib']  = '#963b00'
palette['CD4+ T lymph'] = '#c4bfeb'
palette['Gzma+ CD8+ T lymph'] = '#88bf35'
palette['Memory B lymph'] = '#a691b4'
palette['NKT1'] = '#01a7f4'
palette['Trans-PTC'] = '#321fa0'
palette['PT-FR'] = '#03655c'
palette['PT-R'] = '#059e3f'
palette['PT-AcInj'] = '#615318'
palette['PT-Inj'] = '#fc362f'

In [None]:
low_res_conc = low_res_conc.iloc[::-1]

In [None]:
low_res_conc.columns

In [None]:
# get current columns
cols = list(low_res_conc.columns)

# define the PT group + where to insert Trans-PTC
pt_group = ["PT", "PT-R", "PT-FR", "PT-Inj", "PT-AcInj"]

# remove Trans-PTC from its current position
cols.remove("Trans-PTC")

# find index of PT group (say right after the last PT cell type)
insert_pos = cols.index("PT-AcInj") + 1

# insert Trans-PTC there
cols = cols[:insert_pos] + ["Trans-PTC"] + cols[insert_pos:]

# reorder dataframe and palette
low_res_conc = low_res_conc[cols]
palette = [palette[c] for c in cols]  # assuming palette is dict-like

# plot again
ax = low_res_conc.iloc[::-1].plot.barh(
    stacked=True, color=palette, grid=False, figsize=(20,5)
)
ax.legend(bbox_to_anchor=(1.0, 1.0))
plt.savefig(
    '/exports/humgen/cnovellarausell/SevtapSpatial/Notebooks/figures-ct-frequencies/withPKD/freqs_lowres_heatlhyMKA_control_swapped.pdf',
    bbox_inches='tight'
)

In [None]:
low_res_conc = low_res_conc.iloc[::-1]
low_res_conc.plot.barh(stacked=True, color=palette, grid=False,figsize=(20,5), ).legend(bbox_to_anchor=(1.0, 1.0))
plt.savefig('/exports/humgen/cnovellarausell/SevtapSpatial/Notebooks/figures-ct-frequencies/withPKD/freqs_lowres_heatlhyMKA_control.pdf', bbox_inches='tight')

In [None]:
control = pd.read_csv('/exports/humgen/cnovellarausell/SevtapSpatial/outputs/tangram_outputs_with_PKD/control_v1_sample_healthyMKA/Control_celltype_counts_denoised_zimmermanhumphreysmuto_highres.csv')
pkd_1 = pd.read_csv('/exports/humgen/cnovellarausell/SevtapSpatial/outputs/tangram_outputs_with_PKD/pkd_1_v1/pkd_1_celltype_counts_denoised_zimmermanhumphreysmuto_highres.csv')
pkd_2 = pd.read_csv('/exports/humgen/cnovellarausell/SevtapSpatial/outputs/tangram_outputs_with_PKD/pkd_2_v1/pkd_2_celltype_counts_denoised_zimmermanhumphreysmuto_highres.csv')
pkd_3 = pd.read_csv('/exports/humgen/cnovellarausell/SevtapSpatial/outputs/tangram_outputs_with_PKD/pkd_3_v1/pkd_3_celltype_counts_denoised_zimmermanhumphreysmuto_highres.csv')

In [None]:
control.rename({'low_res_cell_type': 'highres_cell_type'}, inplace=True)
pkd_1.rename({'low_res_cell_type': 'highres_cell_type'}, inplace=True)
pkd_2.rename({'low_res_cell_type': 'highres_cell_type'}, inplace=True)
pkd_3.rename({'low_res_cell_type': 'highres_cell_type'}, inplace=True)

In [None]:
control['library'] = ['Control'] * len(control)
pkd_1['library'] = ['pkd_1'] * len(pkd_1)
pkd_2['library'] = ['pkd_2'] * len(pkd_2)
pkd_3['library'] = ['pkd_3'] * len(pkd_3)

In [None]:
low_res = {
#'Immune': ['Neutro', 'Fib', 'NK', 'Macro', 'B lymph', 'T lymph', 'DC'],
'Endo': ['Endo', 'Glom-Endo', 'Asc-Vasa-Recta', 'Vas-Efferens', 'Desc-Vasa-Recta', 'Vas-Afferens'],
'PT': ['PTS1', 'PTS2', 'PTS3', 'PTS3T2'],
'LOH': ['TAL', 'ATL', 'DTL', 'DTL-ATL'],
'DCT': ['DCT', 'DCT-CNT'],
'NK': ['NKT1', 'NK']}

In [None]:
low_res_rev = {vs: k for k, v in low_res.items() for vs in v}

In [None]:
dfs = [control, pkd_1, pkd_2, pkd_3]
for df in dfs:
    mask = df['cell_type'].isin([x for xs in low_res.values() for x in xs])
    df['highres_cell_type'] = df[mask]['cell_type'].replace(low_res_rev)
    df.highres_cell_type.fillna(df.cell_type, inplace=True)

In [None]:
conc = pd.concat([control, pkd_1, pkd_2, pkd_3])

In [None]:
conc

In [None]:
conc['total'] = conc['counts'].groupby(conc['library']).transform('sum')

In [None]:
conc['freq'] = conc['counts'] / conc['total']

In [None]:
low_res_conc = conc.groupby(['highres_cell_type', 'library'])['counts'].sum().reset_index(level=[0,1])
low_res_conc['total'] = low_res_conc['counts'].groupby(low_res_conc['library']).transform('sum')
low_res_conc['freq'] = low_res_conc['counts'] / low_res_conc['total']
low_res_conc = low_res_conc.pivot(index='library', columns='highres_cell_type', values='freq')

In [None]:
low_res_conc.to_excel('/exports/humgen/cnovellarausell/SevtapSpatial/Notebooks/figures-ct-frequencies/withPKD/freqs_highres_healthyMKA_sample.xlsx')

In [None]:
# get current columns
cols = list(low_res_conc.columns)

# define the PT group + where to insert Trans-PTC
pt_group = ["PT", "PT-R", "PT-FR", "PT-Inj", "PT-AcInj"]

# remove Trans-PTC from its current position
cols.remove("Trans-PTC")

# find index of PT group (say right after the last PT cell type)
insert_pos = cols.index("PT-AcInj") + 1

# insert Trans-PTC there
cols = cols[:insert_pos] + ["Trans-PTC"] + cols[insert_pos:]

# reorder dataframe and palette
low_res_conc = low_res_conc[cols]
palette = [palette[c] for c in cols]  # assuming palette is dict-like

# plot again
ax = low_res_conc.iloc[::-1].plot.barh(
    stacked=True, color=palette, grid=False, figsize=(20,5)
)
ax.legend(bbox_to_anchor=(1.0, 1.0))
plt.savefig('/exports/humgen/cnovellarausell/SevtapSpatial/Notebooks/figures-ct-frequencies/withPKD/freqs_highres_healthyMKA_control_swappedTransPT.pdf', bbox_inches='tight')

In [None]:
low_res_conc = low_res_conc.iloc[::-1]
low_res_conc.plot.barh(stacked=True, color=palette, grid=False,figsize=(20,5), ).legend(bbox_to_anchor=(1.0, 1.0))
plt.savefig('/exports/humgen/cnovellarausell/SevtapSpatial/Notebooks/figures-ct-frequencies/withPKD/freqs_highres_healthyMKA_control.pdf', bbox_inches='tight')