In [None]:
import liana as li
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import anndata as ad

In [None]:
adata = sc.read_h5ad('radiotherapy.h5ad')

In [None]:
adata

In [None]:
li.mt.show_methods()

In [None]:
from liana.method import singlecellsignalr, connectome, cellphonedb, natmi, logfc, cellchat, geometric_mean

In [None]:
adata.X = adata.layers['log_norm'].copy()

# RT

In [None]:
radio = adata[adata.obs.Radiotherapy_Status == 'Radiotherapy']

In [None]:
li.mt.rank_aggregate(radio, 
                     groupby='Level_4', use_raw=False, layer='log_norm',
                     resource_name='consensus',
                     expr_prop=0.1,
                     verbose=True)

In [None]:
radio

In [None]:
df = radio.uns['liana_res']

In [None]:
df.head()

In [None]:
df_subset = df[~(df.source.isin(['Alpha Cell', 'Beta Cell', 'Other Endocrine']))]

In [None]:
df_subset = df_subset[~(df_subset.target.isin(['Alpha Cell', 'Beta Cell', 'Other Endocrine']))]

In [None]:
df.to_csv('cellcell_comm_radio_all_resources.csv')

In [None]:
df[df.source.str.contains('Malignant') & (df.target.str.contains('Endo'))].head(100)

# NO-RT

In [None]:
non_radio = adata[adata.obs.Radiotherapy_Status != 'Radiotherapy']

In [None]:
non_radio.obs.Radiotherapy_Status.value_counts()

In [None]:
li.mt.rank_aggregate(non_radio, 
                     groupby='Level_4', use_raw=False, layer='log_norm',
                     resource_name='consensus',
                     expr_prop=0.1,
                     verbose=True)

In [None]:
df_non_radio = non_radio.uns['liana_res']

In [None]:
df_non_radio.to_csv('cellcell_comm_non_radio_all_resources.csv')

In [None]:
df_non_radio.head()

# Reload

In [None]:
df_non_radio = pd.read_csv('cellcell_comm_non_radio_all_resources.csv')
df_radio = pd.read_csv('cellcell_comm_radio_all_resources.csv')

In [None]:
pwd

In [None]:
radio = adata[adata.obs.Radiotherapy_Status == 'Radiotherapy']
non_radio = adata[adata.obs.Radiotherapy_Status != 'Radiotherapy']

In [None]:
radio.uns['liana_res'] = df_radio.copy()
non_radio.uns['liana_res'] = df_non_radio.copy()

In [None]:
keep_cols = ["source","target","ligand_complex","receptor_complex",
             "magnitude_rank","specificity_rank","lr_probs","lr_logfc",
             "cellphone_pvals","cellchat_pvals"]

r = df_radio[keep_cols].rename(columns=lambda c: c if c in ["source","target","ligand_complex","receptor_complex"] else f"{c}_rt")
n = df_non_radio[keep_cols].rename(columns=lambda c: c if c in ["source","target","ligand_complex","receptor_complex"] else f"{c}_nor")

cmp = (r.merge(n, on=["source","target","ligand_complex","receptor_complex"], how="inner")
         .assign(
           d_mag = lambda d: d["magnitude_rank_nor"] - d["magnitude_rank_rt"], # +ve => stronger in RT
           d_spec= lambda d: d["specificity_rank_nor"] - d["specificity_rank_rt"],
           d_prob= lambda d: d["lr_probs_rt"] - d["lr_probs_nor"],
           d_logfc=lambda d: d["lr_logfc_rt"] - d["lr_logfc_nor"],
           sig_rt = lambda d: (d["cellphone_pvals_rt"]<0.05) | (d["cellchat_pvals_rt"]<0.05),
           sig_nor= lambda d: (d["cellphone_pvals_nor"]<0.05) | (d["cellchat_pvals_nor"]<0.05),
         ))
focus = cmp.query("(sig_rt | sig_nor) and (d_mag>0.1 or d_prob>0.05 or d_logfc>0.5)")

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
focus

In [None]:
focus["p_rt"]  = focus[["cellphone_pvals_rt","cellchat_pvals_rt"]].min(axis=1)
focus["p_nor"] = focus[["cellphone_pvals_nor","cellchat_pvals_nor"]].min(axis=1)
for c in ["d_mag","d_prob","d_logfc"]:
    mu, sd = focus[c].mean(), focus[c].std(ddof=0) or 1.0
    focus[c+"_z"] = (focus[c] - mu) / sd
focus["delta_score"] = (1.0*focus["d_mag_z"] + 0.7*focus["d_prob_z"] + 0.5*focus["d_logfc_z"])
meaningful = focus.query("(p_rt < 0.05) and (delta_score > 0)")
top_global = meaningful.sort_values(["delta_score","d_mag","d_prob"], ascending=False).head(100)

In [None]:
top_global.head(10)

In [None]:
adata.obs.Level_3.value_counts()

In [None]:
tumor = adata[adata.obs.Level_2.str.contains('Mal')].obs.Level_4.unique().tolist()
cafs  = adata[adata.obs.Level_2.str.contains('Stromal')].obs.Level_4.unique().tolist()
endo  = adata[adata.obs.Level_2.str.contains('Endoth')].obs.Level_4.unique().tolist()
myeloid = adata[adata.obs.Level_2.str.contains('Myelo')].obs.Level_4.unique().tolist()
t_cells = adata[adata.obs.Level_3.str.contains('T Cell')].obs.Level_4.unique().tolist()

axes = {
  "Tumor/Endo":  lambda d: (d.source.isin(tumor)) & (d.target.isin(endo)),
  "Tumor/CAF→Endo":  lambda d: (d.source.isin(tumor+cafs)) & (d.target.isin(endo)),
  "Tumor/CAF↔Tumor": lambda d: (d.source.isin(tumor+cafs)) & (d.target.isin(tumor+cafs)),
  "Myeloid/Endo→T":  lambda d: (d.source.isin(myeloid+endo)) & (d.target.isin(lymphoid)),
  # "Tumor/Lymp":  lambda d: (d.source.isin(tumor)) & (d.target.isin(lymphoid)),
  "Tumor/CAF":  lambda d: (d.source.isin(tumor)) & (d.target.isin(cafs)),
  "Tumor/T":  lambda d: (d.source.isin(tumor)) & (d.target.isin(t_cells)),
}

In [None]:
def top_by_axis(df, mask, k=30):
    d = df[mask(df)].sort_values(["delta_score","d_mag","d_prob"], ascending=[False,False,False]).copy()
    return d[["source","target","ligand_complex","receptor_complex",
              "delta_score","d_mag","d_prob","d_logfc","p_rt","p_nor",
              "magnitude_rank_rt","magnitude_rank_nor","lr_probs_rt","lr_probs_nor"]].head(k)

top_tumor_endo   = top_by_axis(meaningful, axes["Tumor/Endo"], 1000)
top_tumor_caf_endo   = top_by_axis(meaningful, axes["Tumor/CAF→Endo"], 1000)
top_tumor_tumor  = top_by_axis(meaningful, axes["Tumor/CAF↔Tumor"], 1000)
# top_myeloid_t    = top_by_axis(meaningful, axes["Myeloid/Endo→T"], 1000)
top_tumor_t    = top_by_axis(meaningful, axes["Tumor/T"], 1000)
top_tumor_caf    = top_by_axis(meaningful, axes["Tumor/CAF"], 1000)

In [None]:
top_tumor_endo.shape

In [None]:
top_tumor_endo.head(20)

In [None]:
order = ["delta_score", "d_mag", "d_prob"]
sorted_df = top_tumor_endo.sort_values(order, ascending=[False, False, False])
mask = ~sorted_df.duplicated(
    subset=["source", "target", "ligand_complex", "receptor_complex"],
    keep="first"
)

topN_per_source_endo = (sorted_df.loc[mask]
                   .groupby("source", group_keys=False)
                   .head(5)
                   .sort_values("source"))

In [None]:
topN_per_source_endo

In [None]:
top_tumor_t

In [None]:
order = ["delta_score", "d_mag", "d_prob"]
sorted_df = top_tumor_t.sort_values(order, ascending=[False, False, False])
mask = ~sorted_df.duplicated(
    subset=["source", "target", "ligand_complex", "receptor_complex"],
    keep="first"
)

topN_per_source_t = (sorted_df.loc[mask]
                   .groupby("source", group_keys=False)
                   .head(5)
                   .sort_values("source"))

In [None]:
df = topN_per_source_endo.copy() 
plot_df_endo = df.loc[:, [
    "source","target","ligand_complex","receptor_complex",
    "delta_score","lr_probs_rt","magnitude_rank_rt"
]].copy()
plot_df_endo["strength_rt"] = 1.0 - plot_df_endo["magnitude_rank_rt"]
d = plot_df_endo["delta_score"]
lim = np.nanpercentile(np.abs(d), 95)
plot_df_endo["delta_clipped"] = d.clip(-lim, lim)
radio.uns["selected_int"] = plot_df_endo

In [None]:
plot_df_endo

In [None]:
plot_df_t = topN_per_source_t.loc[:, [
    "source","target","ligand_complex","receptor_complex",
    "delta_score","lr_probs_rt","magnitude_rank_rt"
]].copy()
plot_df_t["strength_rt"] = 1.0 - plot_df_t["magnitude_rank_rt"]
d = plot_df_t["delta_score"]
lim = np.nanpercentile(np.abs(d), 95)
plot_df_t["delta_clipped"] = d.clip(-lim, lim)
radio.uns["selected_int"] = plot_df_t

In [None]:
order = ["delta_score", "d_mag", "d_prob"]
sorted_df = top_tumor_caf.sort_values(order, ascending=[False, False, False])
mask = ~sorted_df.duplicated(
    subset=["source", "target", "ligand_complex", "receptor_complex"],
    keep="first"
)

topN_per_source_caf = (sorted_df.loc[mask]
                   .groupby("source", group_keys=False)
                   .head(5)
                   .sort_values("source"))

plot_df_caf = topN_per_source_caf.loc[:, [
    "source","target","ligand_complex","receptor_complex",
    "delta_score","lr_probs_rt","magnitude_rank_rt"
]].copy()
plot_df_caf["strength_rt"] = 1.0 - plot_df_caf["magnitude_rank_rt"]
d = plot_df_caf["delta_score"]
lim = np.nanpercentile(np.abs(d), 95)
plot_df_caf["delta_clipped"] = d.clip(-lim, lim)
radio.uns["selected_int"] = plot_df_caf

In [None]:
plot_df_caf

In [None]:
from matplotlib.colors import to_hex
from pycirclize import Circos

In [None]:
import yaml

with open("config_color_scheme.yml", "r") as f:
    scheme = yaml.safe_load(f)

In [None]:
celltype_color = scheme['palettes']['Level_4']

# split in two: endo and t cells

In [None]:
adata.obs.groupby('Is_Core')['Dataset'].unique().explode()

In [None]:
from matplotlib import cm
from matplotlib.colors import to_hex
from pycirclize import Circos
from pycirclize.parser import Matrix
from matplotlib.lines import Line2D

def plot_chord(
    plot_df: pd.DataFrame,
    *,
    value_col="delta_score",
    source_col="source",
    target_col="target",
    ligand_col="ligand_complex",
    receptor_col="receptor_complex",
    celltype_colors=None,       # dict like {"Tumor":"#...", "Endo":"#...", ...}; auto-made if None
    cmap_name="tab20",
    small_gap=1,
    big_gap=3,
    r_outer0=94,
    r_outer1=100,
    label_size=8,
    group_label_size=8,
    link_alpha=0.35,
    link_lw=0.4,
    figsize=(20, 20),
    dpi=300,
    show_gene_labels=False,
    name_sectors=True,
    outpath="radio_circle.png", # set to None to skip saving
):
    """
    Build a chord diagram of ligand(release) -> receptor(receive), grouped by source/target cell types.

    Inputs:
      plot_df: DataFrame with columns [ligand_complex, receptor_complex, delta_score, source, target]
      value_col:     column for link thickness/weight (e.g., "delta_score")
      *_col params:  column names for source/target/ligand/receptor

    Returns:
      fig, circos, edges, nodes
    """

    # Unique node ids per (group, gene)
    edges = plot_df[[ligand_col, receptor_col, value_col, source_col, target_col]].copy()
    edges.columns = ["from_gene", "to_gene", "value", "source_cell", "target_cell"]
    
    edges["from_id"] = edges["from_gene"] + " | " + edges["source_cell"]
    edges["to_id"]   = edges["to_gene"]   + " | " + edges["target_cell"]
    
    # Nodes table = one row per (group, gene)
    lig = edges[["from_id","from_gene","source_cell"]].drop_duplicates() \
            .rename(columns={"from_id":"node","from_gene":"gene","source_cell":"group"})
    rec = edges[["to_id","to_gene","target_cell"]].drop_duplicates() \
            .rename(columns={"to_id":"node","to_gene":"gene","target_cell":"group"})
    nodes = pd.concat([lig, rec], ignore_index=True)
    
    # Order and spacing
    nodes = nodes.sort_values(["group","gene"]).reset_index(drop=True)
    order = nodes["node"].tolist()
    space = [(big_gap if g1!=g2 else small_gap)
             for g1,g2 in zip(nodes["group"], nodes["group"].shift(-1))]
    if space: space[-1] = big_gap
    
    # Colors
    cell_types = nodes["group"].unique().tolist()
    if celltype_colors is None:
        cmap = cm.get_cmap(cmap_name, max(20, len(cell_types)))
        celltype_colors = {ct: to_hex(cmap(i % cmap.N)) for i, ct in enumerate(cell_types)}
    node_color = {row.node: celltype_colors[row.group] for _, row in nodes.iterrows()}
    
    # # Link color by sender group
    sender_of_node = dict(zip(lig["node"], lig["group"]))
    # def link_kws_handler(from_label, to_label):
    #     ct = sender_of_node.get(from_label)
    #     return dict(fc=celltype_colors.get(ct, "#999999"), alpha=link_alpha, zorder=0)
    # choose a per-node padding (constant or based on label length)
    pad_per_node = 2  # try 0.5–2.0
    pad = (
        nodes.assign(value=pad_per_node)
             .rename(columns={"node":"from_id","gene":"from_gene","group":"source_cell"})
    )
    pad["to_id"] = pad["from_id"]
    pad["to_gene"] = pad["from_gene"]
    pad["target_cell"] = pad["source_cell"]
    
    edges = pd.concat([edges, pad[edges.columns]], ignore_index=True)
    
    # hide self-links (alpha=0, no edge)
    def link_kws_handler(from_label, to_label):
        if from_label == to_label:
            return dict(alpha=0, lw=0)  # invisible padding link
        ct = sender_of_node.get(from_label)
        return dict(fc=celltype_colors.get(ct, "#999999"), alpha=link_alpha, zorder=0)
    # Build matrix with the unique node ids
    matrix = Matrix.parse_fromto_table(
        edges[["from_id","to_id","value"]].rename(columns={"from_id":"from","to_id":"to"})
    )
    
    # Create circos using node ids, colored by group
    circos = Circos.chord_diagram(
        matrix,
        order=order,
        space=space,
        r_lim=(r_outer0, r_outer1),
        cmap=node_color,
        label_kws=None,                             # <-- turn off built-in labels
        link_kws=dict(direction=1, ec="black", lw=link_lw),
        link_kws_handler=link_kws_handler,
    )
    if show_gene_labels:
        label_r = (r_outer0 + r_outer1) / 2
        for s in circos.sectors:
            gene_only = s.name.split(" | ")[0]
            s.text(gene_only, r=label_r + 3, size=label_size, orientation='vertical')

    # (B) ONE label per cell type (group)
    if name_sectors:
        name2sector = {s.name: s for s in circos.sectors}
        for ct, gdf in nodes.groupby("group", sort=False):
            mid_node = gdf["node"].iloc[len(gdf) // 2]
            sector = name2sector[mid_node]
            sector.text(ct, r=r_outer1 + 10, size=group_label_size, orientation="horizontal", )
    
    fig = circos.plotfig(figsize=figsize, dpi=dpi)
    # keep only those that exist in your palette
    legend_labels = plot_df.source.unique().tolist() + plot_df.target.unique().tolist() 
    legend_labels = [lab for lab in legend_labels if lab in celltype_colors]
    
    handles = [
        Line2D([0], [0], marker='o', linestyle='None', markersize=10,
               markerfacecolor=celltype_colors[lab], markeredgecolor='none')
        for lab in legend_labels
    ]
    
    # add some bottom margin so the legend fits under the chord
    plt.subplots_adjust(bottom=0.13)
    
    # place legend centered below the axes, spanning multiple columns
    fig.legend(
        handles, legend_labels,
        loc="lower center",
        ncol=6,                # tweak column count to control wrapping
        frameon=False,
        handletextpad=0.6,
        columnspacing=1.2,
        borderaxespad=0.5,
        bbox_to_anchor=(0.5, -0.1)  # y can be slightly negative if you need more room
    )
    ax = fig.axes[0]
    for t in ax.texts:
        if " | " in t.get_text():
            t.set_visible(False)
    plt.show()
    if outpath:
        fig.savefig(outpath, dpi=dpi, bbox_inches="tight")

    return fig, circos, edges, nodes

In [None]:
pwd

In [None]:
plot_df_endo

In [None]:
plot_df_t

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_endo, celltype_colors=celltype_color,big_gap=1.33, small_gap=0.20, outpath='Radio_CCC/tumour_endo.png', show_gene_labels=True)

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_t, celltype_colors=celltype_color,big_gap=1.33, small_gap=0.25, outpath='Radio_CCC/tumour_t.png', show_gene_labels=True)

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_caf, celltype_colors=celltype_color,big_gap=1.33, small_gap=0.25, outpath='Radio_CCC/tumour_caf.png', show_gene_labels=True)

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_endo, celltype_colors=celltype_color,big_gap=1, small_gap=0.20, outpath='Radio_CCC/tumour_endo_no_label.png', 
                                       show_gene_labels=False, name_sectors=False, r_outer0=83, r_outer1=97)

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_t, celltype_colors=celltype_color,big_gap=1, small_gap=0.20, outpath='Radio_CCC/tumour_t_no_label.png', 
                                       show_gene_labels=False, name_sectors=False, r_outer0=83, r_outer1=97)

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_caf, celltype_colors=celltype_color,big_gap=1, small_gap=0.20, outpath='Radio_CCC/tumour_caf_no_label.png', 
                                       show_gene_labels=False, name_sectors=False, r_outer0=83, r_outer1=97)

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_endo, celltype_colors=celltype_color,big_gap=2, small_gap=0.33, outpath='Radio_CCC/tumour_endo_no_box.png', 
                                       show_gene_labels=True, name_sectors=False, r_outer0=83, r_outer1=85, label_size=20)

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_t, celltype_colors=celltype_color,big_gap=2, small_gap=0.33, outpath='Radio_CCC/tumour_t_no_box.png', 
                                       show_gene_labels=True, name_sectors=False, r_outer0=83, r_outer1=85, label_size=20)

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_caf, celltype_colors=celltype_color,big_gap=2, small_gap=0.33, outpath='Radio_CCC/tumour_caf_no_box.png', 
                                       show_gene_labels=True, name_sectors=False, r_outer0=83, r_outer1=85, label_size=20)

In [None]:
all_cells  = plot_df_endo.source.unique().tolist() + plot_df_endo.target.unique().tolist() + plot_df_t.target.unique().tolist() + plot_df_caf.target.unique().tolist()

In [None]:
all_cells

In [None]:
new_dict = {}
for k,v in celltype_color.items():
    if k in all_cells:
        new_dict[k] = v

In [None]:
celltype_colors = {
    # --- unchanged (malignant) ---
    "Malignant Cell - Pit Like": "#a6cee3",
    "Malignant Cell - Acinar-like": "#b2df8a",
    "Malignant Cell - Epithelial": "#fb9a99",
    "Malignant Cell - Hypoxia": "#fdbf6f",
    "Malignant Cell - EMT": "#cab2d6",
    "Malignant Cell - Highly Proliferative": "#e31a1c",
    "Malignant Cell - Mesenchymal": "#fed683",
    "Malignant Cell - Highly Invasive": "#6a3d9a",
    "Malignant Cell - Senescence": "#1f78b4",
    "Malignant Cell - Apoptotic": "#b15928",

    # --- adjusted non-malignant (deduped) ---
    "Adipocyte": "#ffff99",  # unchanged (unique)
    "CD4+ Memory T Cell": "#9ecae1",
    "CD4+ Naive T Cell": "#c6dbef",
    "CD4+ Th1 Cell": "#6baed6",
    "CD4+ Th17 Cell": "#3182bd",
    "CD4+ Th2 Cell": "#8da0cb",
    "CD4+ Th22 Cell": "#1f7a8c",
    "CD8+ Effector T Cell": "#74add1",
    "CD8+ Terminal Effector T Cell": "#7fb3d5",
    "CD8+ Tissue-Resident Memory T Cell": "#3b8bc2",
    "CD8+ Exhausted T Cell": "#33a02c",  # unchanged (already unique)
    "Double Positive CD4+CD8+ T Cell": "#2b8cbe",
    "γδ T Cell (Vδ1)": "#4c78a8",
    "iCAF": "#a1d99b",
    "Lymphatic Endothelial Cell": "#17BECF",  # unchanged (unique)
    "myCAF": "#74c476",
    "Schwann Cell": "#843c39",               # unchanged (unique)
    "T-reg": "#5b9bd5",
    "Tumor-Associated Endothelial Cell": "#9467BD",  # unchanged (unique)
    "Vascular Endothelial Cell": "#E6A700",          # unchanged (unique)
}

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_endo, celltype_colors=celltype_color,big_gap=1.33, small_gap=0.50, 
                                       outpath='Radio_CCC/tumour_endo_legend_bottom.png', 
                                       show_gene_labels=True, name_sectors=False, r_outer0=83, r_outer1=85.5, label_size=12, figsize=(10,10))

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_t, celltype_colors=celltype_color,big_gap=1.33, small_gap=0.50, 
                                       outpath='Radio_CCC/tumour_t_legend_bottom.png', 
                                       show_gene_labels=True, name_sectors=False, r_outer0=83, r_outer1=85.5, label_size=12, figsize=(10,10))

In [None]:
fig, circos, edges, nodes = plot_chord(plot_df_caf, celltype_colors=celltype_color,big_gap=1.33, small_gap=0.50, 
                                       outpath='Radio_CCC/tumour_caf_legend_bottom.png', 
                                       show_gene_labels=True, name_sectors=False, r_outer0=83, r_outer1=85.5, label_size=12, figsize=(10,10))