In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
adata = ad.read_zarr('/home/daniele/atlases/Human_Atlas_Harmonised.zarr/')

In [None]:
adata.obs['Dataset_ID'] = adata.obs.Dataset.astype(str) + adata.obs.Sample_ID.astype(str)

# barplots

In [None]:
mapping = {}
for t in adata.obs.TreatmentType.unique():
    if 'Radiotherapy' in t:
        mapping[t] = 'Radiotherapy'
    else:
        mapping[t] = 'No Radiotherapy'
adata.obs['Radio'] = adata.obs.TreatmentType.map(mapping)

In [None]:
df = adata.obs.loc[:, ['Dataset_ID','Radio','Level_3']]

In [None]:
palette = {
   "Radiotherapy": '#E99676',
   "No Radiotherapy": '#72B7A1'
}

In [None]:
cells = ['CD4 T cells', 'CD8 T cells', 'Macrophages', 'Monocytes',]

counts = (
    df.groupby(["Dataset_ID", "Level_3"])
      .size()
      .reset_index(name="count")
)

totals = (
    counts.groupby("Dataset_ID")["count"]
          .sum()
          .reset_index(name="total_cells")
)

counts = counts.merge(totals, on="Dataset_ID")
counts["rel_abundance"] = counts["count"] / counts["total_cells"]

radio_per_patient = (
    df[["Dataset_ID", "Radio"]]
      .drop_duplicates()
)

counts = counts.merge(radio_per_patient, on="Dataset_ID", how="left")

plt.figure(figsize=(12, 6))
sns.boxplot(
    data=counts,
    x="Level_3",
    y="rel_abundance",
    hue="Radio",
    showfliers=False
)
plt.xticks(rotation=90)
plt.xlabel("Cell type (Level_3)")
plt.ylabel("Relative abundance per patient")
plt.title("Per-patient Level_3 relative abundances by radiotherapy status")
plt.tight_layout()
plt.savefig('/home/daniele/figures_reply_dieter/barplot_radiotherapy_all_cells.pdf')


In [None]:
df = adata.obs.loc[:, ['Dataset_ID','Radio','Level_3']]
cells = ['CD4+ T Cell', 'CD8+ T Cell', 'Macrophage', 'Monocyte']

counts = (
    df.groupby(["Dataset_ID", "Level_3"])
      .size()
      .reset_index(name="count")
)

totals = (
    counts.groupby("Dataset_ID")["count"]
          .sum()
          .reset_index(name="total_cells")
)

counts = counts.merge(totals, on="Dataset_ID")
counts["rel_abundance"] = counts["count"] / counts["total_cells"]

radio_per_patient = df[["Dataset_ID", "Radio"]].drop_duplicates()
counts = counts.merge(radio_per_patient, on="Dataset_ID", how="left")

counts = counts[counts["Level_3"].isin(cells)]
counts = counts[~counts.Dataset_ID.str.contains("Schalck")]

plt.figure(figsize=(6, 8))
sns.boxplot(
    data=counts,
    x="Level_3",
    y="rel_abundance",
    hue="Radio",
    showfliers=False,
    palette=palette,
    order=cells
)
sns.stripplot(
    data=counts,
    x="Level_3",
    y="rel_abundance",
    hue="Radio",
    dodge=True,
    order=cells,
    color='black',
    s=2,
    alpha=0.7
)

plt.ylim(0, 0.45)
plt.xticks(rotation=90)
plt.xlabel("Cell type (Level_3)")
plt.ylabel("Relative abundance per patient")
plt.title("Per-patient Level_3 relative abundances by radiotherapy status")
plt.tight_layout()

handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[:2], labels[:2], bbox_to_anchor=(1.02, 1), loc='upper left')

plt.savefig(
    '/mnt/kkf2/Cell/AG-Saur/KKF2/Daniele/pdac_atlas_figures/figure4/barplot_radiotherapy_immune_cells.pdf',
    dpi=300
)


In [None]:
df = adata.obs.loc[:, ['Dataset_ID','Radio','Level_3']]
cells = ['CD4+ T Cell', 'CD8+ T Cell', 'Macrophage', 'Monocyte', 'Cancer Associated Fibroblast']

counts = (
    df.groupby(["Dataset_ID", "Level_3"])
      .size()
      .reset_index(name="count")
)

totals = (
    counts.groupby("Dataset_ID")["count"]
          .sum()
          .reset_index(name="total_cells")
)

counts = counts.merge(totals, on="Dataset_ID")
counts["rel_abundance"] = counts["count"] / counts["total_cells"]

radio_per_patient = df[["Dataset_ID", "Radio"]].drop_duplicates()
counts = counts.merge(radio_per_patient, on="Dataset_ID", how="left")

counts = counts[counts["Level_3"].isin(cells)]
counts = counts[~counts.Dataset_ID.str.contains("Schalck")]

plt.figure(figsize=(6, 8))
sns.boxplot(
    data=counts,
    x="Level_3",
    y="rel_abundance",
    hue="Radio",
    showfliers=False,
    palette=palette,
    order=cells
)

sns.stripplot(
    data=counts,
    x="Level_3",
    y="rel_abundance",
    hue="Radio",
    dodge=True,
    order=cells,
    color='black',
    s=2,
    alpha=0.7
)

plt.ylim(0, 0.45)
plt.xticks(rotation=90)
plt.xlabel("Cell type (Level_3)")
plt.ylabel("Relative abundance per patient")
plt.title("Per-patient Level_3 relative abundances by radiotherapy status")
plt.tight_layout()

handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[:2], labels[:2], bbox_to_anchor=(1.02, 1), loc='upper left')

plt.savefig(
    '/mnt/kkf2/Cell/AG-Saur/KKF2/Daniele/pdac_atlas_figures/figure4/barplot_radiotherapy_immune_and_caf_cells.pdf',
    dpi=300
)


In [None]:
df_all = adata.obs.loc[:, ['Dataset_ID','Radio','Level_4']]

totals = (
    df_all.groupby("Dataset_ID")
          .size()
          .reset_index(name="total_cells")
)

df_endo = df_all[df_all["Level_4"].str.contains("Endoth", na=False)].copy()
df_endo["Level_4"] = df_endo["Level_4"].astype("category").cat.remove_unused_categories()

counts = (
    df_endo.groupby(["Dataset_ID", "Level_4"])
           .size()
           .reset_index(name="count")
)

counts = counts.merge(totals, on="Dataset_ID")
counts["rel_abundance"] = counts["count"] / counts["total_cells"]

radio_map = df_all[["Dataset_ID", "Radio"]].drop_duplicates()
counts = counts.merge(radio_map, on="Dataset_ID", how="left")

order = sorted(counts["Level_4"].unique())
counts = counts.sort_values('rel_abundance')
counts = counts.iloc[:-1]

plt.figure(figsize=(6, 8))

sns.boxplot(
    data=counts,
    x="Level_4",
    y="rel_abundance",
    hue="Radio",
    showfliers=False,
    palette=palette,
    order=order
)
sns.stripplot(
    data=counts,
    x="Level_4",
    y="rel_abundance",
    hue="Radio",
    dodge=True,
    order=order,
    color='black',
    s=2,
    alpha=0.7
)

plt.xticks(rotation=90)
plt.xlabel("Endothelial subtype (Level_4)")
plt.ylabel("Relative abundance among all cells per patient")
plt.title("Per-patient endothelial abundances by radiotherapy status")
plt.ylim(0, 0.15)
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[:2], labels[:2], bbox_to_anchor=(1.02, 1), loc='upper left')

plt.tight_layout()
plt.savefig(
    '/mnt/kkf2/Cell/AG-Saur/KKF2/Daniele/pdac_atlas_figures/figure4/barplot_radiotherapy_endothelial_cells.pdf',
    dpi=300
)
plt.show()


### Traj

In [None]:
t_cells = adata[adata.obs.Level_3.str.contains('T Cel') | adata.obs.Level_3.str.contains('Th')].copy()
t_cells.write_zarr('t_cells.zarr')
DP_T_cells = t_cells[t_cells.obs.Level_4.str.contains('Double')].copy()
DP_T_cells.write_zarr('DP_T_cells.zarr')

In [None]:
t_cells = ad.read_zarr('t_cells.zarr')
DP_T_cells = ad.read_zarr('DP_T_cells.zarr')

In [None]:
from sklearn_ann.kneighbors.annoy import AnnoyTransformer  # noqa: F401


In [None]:
sc.pp.neighbors(t_cells, use_rep='scanvi_extended_atlas_emb', transformer=AnnoyTransformer(30))

In [None]:
sc.tl.diffmap(t_cells)

In [None]:
sc.pp.neighbors(t_cells, use_rep='X_diffmap', transformer=AnnoyTransformer(30))

In [None]:
sc.tl.paga(t_cells, groups='Level_4')

In [None]:
sc.set_figure_params(figsize=(4,4))   # global
sc.pl.paga(
    t_cells,
    color=["Level_4"],
    node_size_scale=0.2,
    edge_width_scale=0.2,
    fontoutline=0.2,
    layout='rt_circular',
    root=1,
)

In [None]:
sc.tl.umap(t_cells)

In [None]:
cells = ['CD4+ Naive T Cell', 'CD8+ Naive T Cell', 'Double Positive CD4+CD8+ T Cell']

In [None]:

sc.pl.diffmap(t_cells, color=['Level_3'], components = ['2,6'])
sc.pl.diffmap(t_cells, color=['Level_4'], groups = cells, components =  ['2,6'])

In [None]:
embedding_key = "t_diffmap_density_Level_4"

In [None]:
t_cells.obsm['X_diffmap_sel'] = t_cells.obsm['X_diffmap'][:, [1,5]]

In [None]:
sc.tl.embedding_density(t_cells, basis='diffmap_sel', groupby = "Level_4", key_added=embedding_key)

In [None]:
for cell in cells:
    sc.pl.embedding_density(
        t_cells, 
        basis='diffmap_sel', 
        key = embedding_key, 
        group = cell,
        add_outline=True,
        frameon=False,
        color_map = 'viridis',
        na_color = 'white',
        fg_dotsize=5,
        bg_dotsize=1,
        show=False,
    )
    save_path = f'/mnt/kkf2/Cell/AG-Saur/KKF2/Daniele/pdac_atlas_figures/figure2/diffmap_density_{cell.replace(" ","_")}.png'
    plt.savefig(save_path, dpi = 300)

# DP T cells similarity with mouse DP T cells

In [None]:
adata_mouse = ad.read_zarr('/home/daniele/atlases/Mouse_Atlas_Harmonised.zarr/')

In [None]:
DP_T_cells_mouse = adata_mouse[adata_mouse.obs.Level_4.str.contains('Double Positive')].copy()


In [None]:
DP_T_cells_mouse.var_names = DP_T_cells_mouse.var_names.str.upper()
common_genes = DP_T_cells_mouse.var_names.intersection(DP_T_cells.var_names)
DP_T_cells = DP_T_cells[:, common_genes].copy()
DP_T_cells_mouse = DP_T_cells_mouse[:, common_genes].copy()

In [None]:
import numpy as np
from scipy.sparse import issparse

def get_score_vectorised(adata, gene_set):
    pos = [g for g in gene_set["positive"] if g in adata.var_names]
    neg = [g for g in gene_set["negative"] if g in adata.var_names]
    if len(pos) == 0 and len(neg) == 0:
        return np.zeros(adata.n_obs, dtype=float)
    if len(pos) > 0:
        X_pos = adata[:, pos].X
        if issparse(X_pos):
            X_pos = X_pos.toarray()
        pos_score = X_pos.sum(axis=1).flatten()
    else:
        pos_score = 0

    if len(neg) > 0:
        X_neg = adata[:, neg].X
        if issparse(X_neg):
            X_neg = X_neg.toarray()
        neg_score = X_neg.sum(axis=1).flatten()
    else:
        neg_score = 0

    score = pos_score - neg_score
    score /= (len(pos) + len(neg))
    return score

def get_cell_mask(adata, gene_set, n_cells=50):
    score = get_score_vectorised(adata, gene_set)
    idx = np.argpartition(score, -n_cells)[-n_cells:]
    mask = np.zeros(adata.n_obs, dtype=bool)
    mask[idx] = True
    return mask



In [None]:
gene_set = {
    "DP": {
        "positive": ["CD4", "CD8A"],
        "negative": []
    }
}
for cell_n in (100, 500, 1000, 2000):
    seed = np.array(["Uncertain"] * DP_T_cells_mouse.shape[0], dtype=object)
    cell_mask = get_cell_mask(DP_T_cells_mouse, gene_set["DP"], n_cells=cell_n)
    seed[cell_mask] = cell
    DP_T_cells_mouse.obs[f"seed_{cell_n}"] = seed

In [None]:
sc.pl.dotplot(
    DP_T_cells_mouse,
    var_names = gene_set["DP"]["positive"],
    groupby = "seed_100",
    standard_scale = 'var',
    layer = 'log_norm',
)
sc.pl.dotplot(
    DP_T_cells_mouse,
    var_names = gene_set["DP"]["positive"],
    groupby = "seed_500",
    standard_scale = 'var',
    layer = 'log_norm',
)
sc.pl.dotplot(
    DP_T_cells_mouse,
    var_names = gene_set["DP"]["positive"],
    groupby = "seed_1000",
    standard_scale = 'var',
    layer = 'log_norm',
)
sc.pl.dotplot(
    DP_T_cells_mouse,
    var_names = gene_set["DP"]["positive"],
    groupby = "seed_2000",
    standard_scale = 'var',
    layer = 'log_norm',
)

In [None]:
DP_T_cells_mouse = DP_T_cells_mouse[DP_T_cells_mouse.obs["seed_2000"] != "Uncertain"].copy()

In [None]:
DP_T_cells_mouse.write_zarr('DP_T_cells_mouse.zarr')
DP_T_cells_mouse = ad.read_zarr('DP_T_cells_mouse.zarr')

In [None]:
DP_T_cells_mouse.write_zarr('DP_T_cells_mouse.zarr')
DP_T_cells_mouse = ad.read_zarr('DP_T_cells_mouse.zarr')

In [None]:
DP_T_cells = ad.read_zarr('DP_T_cells.zarr')

In [None]:
DP_T_cells_mouse.var_names = DP_T_cells_mouse.var_names.str.upper()
common_genes = DP_T_cells_mouse.var_names.intersection(DP_T_cells.var_names)

In [None]:
DP_T_cells_mouse.var_names = DP_T_cells_mouse.var_names.str.upper()
common_genes = DP_T_cells_mouse.var_names.intersection(DP_T_cells.var_names)
DP_T_cells = DP_T_cells[:, common_genes].copy()
DP_T_cells_mouse = DP_T_cells_mouse[:, common_genes].copy()

In [None]:
shared_genes = DP_T_cells.var_names.intersection(DP_T_cells_mouse.var_names)

human = DP_T_cells[:, shared_genes]
mouse = DP_T_cells_mouse[:, shared_genes]

human_meta = np.asarray(human.X.mean(axis=0)).flatten()
mouse_meta = np.asarray(mouse.X.mean(axis=0)).flatten()





In [None]:
import numpy as np
import pandas as pd
import scipy.sparse as sp

def corr_to_meta(X, meta):
    if sp.issparse(X):
        X = X.toarray()
    meta = np.asarray(meta)
    meta_z = (meta - meta.mean()) / meta.std()
    X_mean = X.mean(axis=1, keepdims=True)
    X_std = X.std(axis=1, keepdims=True)
    X_z = (X - X_mean) / X_std
    return (X_z @ meta_z) / (len(meta) - 1)


mouse_to_human_corr = corr_to_meta(mouse.X, human_meta)

df = pd.DataFrame({
    "correlation": mouse_to_human_corr,
    "cells": DP_T_cells_mouse.obs_names
})

DP_T_cells_mouse.obs['correlation_to_human_meta'] = pd.Series(df['correlation'].values, index=df['cells'].values)

In [None]:
DP_T_cells_tmp = DP_T_cells.copy()
DP_T_cells_mouse_tmp = DP_T_cells_mouse.copy()

DP_T_cells_tmp.obsm.clear()
DP_T_cells_mouse_tmp.obsm.clear()
DP_T_cells_tmp.varm.clear()
DP_T_cells_mouse_tmp.varm.clear()

DP_T_cells_total = ad.concat(
    [DP_T_cells_tmp, DP_T_cells_mouse_tmp],
    label='species',
    keys=['human', 'mouse']
)


In [None]:
DP_T_cells_total.X = DP_T_cells_total.layers['log_norm'].copy()
sc.pp.pca(DP_T_cells_total, svd_solver='arpack')
sc.external.pp.bbknn(DP_T_cells_total, batch_key='species')
sc.tl.umap(DP_T_cells_total)

In [None]:
DP_T_cells_total.obs['correlation_to_human_meta'] = pd.Series(df['correlation'].values, index=df['cells'].values)

In [None]:
sc.pl.umap(
    DP_T_cells_total, 
    color = 'species',
    size = 150,
    frameon = False,
    add_outline = True,
    na_color = 'white',
    show = False,
)
plt.savefig('/mnt/kkf2/Cell/AG-Saur/KKF2/Daniele/pdac_atlas_figures/figure5/umap_DP_T_cells_species.png', dpi=300)

In [None]:
sc.pl.umap(
    DP_T_cells_total, 
    color = 'correlation_to_human_meta',
    cmap = 'coolwarm',
    vmin = -1,
    vmax = 1,    
    frameon = False,
    add_outline = True,
    na_color = 'white',
    size=150,
    show = False,
)   
plt.savefig('/mnt/kkf2/Cell/AG-Saur/KKF2/Daniele/pdac_atlas_figures/figure5/umap_DP_T_cells_correlation_to_human_meta.png', dpi=300) 
#edges = True,
#edges_color = 'black',
#edges_width = 0.1,