In [None]:
from sctoolbox.utils.jupyter import bgcolor, _compare_version

# change the background of input cells
bgcolor("PowderBlue", select=[2, 4, 6, 8, 10, 13, 15, 17])

nb_name = "0C_ligand_receptor_differences.ipynb"

_compare_version(nb_name)

# 0C - Receptor-Ligand Differences Analysis
<hr style="border:2px solid black"> </hr>

## 1 - Description

**Requires an anndata object with precomputed clusters  and multiple timepoints or conditions. Clusters can be generated with the clustering notebook (`rna_analysis/notebooks/04_clustering.ipynb`).**

This notebook extends the receptor-ligand analysis to identify and visualize differences in cell-cell communication interactions across:
 1. Different timepoints
 2. Different experimental conditions

 with network graphs.


____

## 2 - Setup

In [None]:
import sctoolbox.tools.receptor_ligand as rl
import sctoolbox.utils as utils
import pandas as pd
import matplotlib.pyplot as plt
from sctoolbox import settings

settings.settings_from_config("config.yaml", key="0C")

____

## 3 - Load adata

# <h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
anndata_file = 'adata_condition_rna.h5ad'

In [None]:
adata = utils.adata.load_h5ad(anndata_file)

with pd.option_context("display.max.rows", 5, "display.max.columns", None):
    display(adata)
    display(adata.obs)
    display(adata.var)

____

## 4 - Download receptor-ligand database

Supply a receptor-ligand database. This can be done in two ways:

1. Provide the name of a recognized [LIANA resource](https://liana-py.readthedocs.io/en/latest/notebooks/prior_knowledge.html#Ligand-Receptor-Interactions).

2. Provide a path or link to a table that consists of at least two columns - one for the receptors and one for the    ligands. Each row of this table must correspond to a unique interaction, e.g. `http://tcm.zju.edu.cn/celltalkdb/download/processed_data/human_lr_pair.txt`

The databse will be stored in the adata object (`adata.uns['receptor-ligand]['database']`).

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
# Either a path/link to a table or the name of a LIANA resource
db_path = 'consensus'

ligand_column = 'ligand'
receptor_column = 'receptor'

In [None]:
rl.download_db(adata=adata,
               db_path=db_path,
               ligand_column=ligand_column,
               receptor_column=receptor_column,
               inplace=True,
               overwrite=False)

____

## 5 - Difference Analysis

### 5.1 `calculate_condition_differences` Function
This function compares receptor-ligand interactions between different experimental conditions. It:
- Analyzes differences in cell-cell communication across condition dimensions
- Supports hierarchical comparison across multiple dimensions
- Identifies which receptor-ligand interactions are stronger or weaker between conditions
- Quantifies differences using quantile rank scoring

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
condition_columns = ['condition', 'timepoint']  # Columns for hierarchical comparison by ordering of list
cluster_col = "celltype"  # Name of the cluster column in adata.obs.
normalize = None  # Normalize cell counts. 
                  # Scales expression values to correct for different cluster sizes, using this as the reference cell count.
                  #  If None, max cluster size will be used.
gene_col = None   # Column in adata.var that holds gene symbols/ ids. Set to None to use index

In [None]:
# Optional filtering of clusters or conditions
condition_filters = None # Else define like so:
'''
condition_filters = {
    "condition": ["control"],       # Compare these treatments
    "timepoint": ["tp1", "tp3"],    # At these timepoints
}
'''
# Optional filters to focus analysis on specific clusters or genes
selected_clusters = None # Define list to select specific clusters, e.g., ["cluster1", "clusterN"]
selected_genes =  None # Define list to select specific genes, e.g., ["gene1", "geneN"]


In [None]:
# Expression and interaction filtering
min_perc = 15,  # Minimum percentage of cells expressing each gene
interaction_perc = 90,  # Focus on top 90% of interactions

In [None]:
# Calculate differences between conditions
diff_results = rl.calculate_condition_differences(
    adata=adata,
    condition_columns=condition_columns,
    cluster_column=cluster_column,
    condition_filters=condition_filters,
    cluster_filter=selected_clusters,
    gene_filter=selected_genes,
    normalize=normalize,
    min_perc=min_perc,
    interaction_perc=interaction_perc,
    inplace=False  # Return results instead of storing in adata
)

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
n_top=100  # Show top 100 differential interactions
figsize = (24, 18)
dpi=300
save_prefix="condition_comparison_networks"
split_by_direction = True  # Create separate plots for upregulated/downregulated
hub_threshold = 4 # Define hubs as nodes with 4+ connections
show=True

In [None]:
# Visualize differences between conditions as multigrid networks
comparision_figures = rl.plot_all_condition_differences(
    diff_results=diff_results,
    n_top=n_top,
    figsize=figsize,
    dpi=dpi,
    save_prefix=save_prefix,
    split_by_direction=split_by_direction,
    hub_threshold=hub_threshold,
    show=show
)

____

### 5.2 `calculate_condition_differences_over_time` Function

This function analyzes how receptor-ligand interactions between cell types change across multiple timepoints for a specific condition. It:
- Takes timepoint-labeled data for a specific experimental condition
- Compares receptor-ligand interactions either sequentially between timepoints or against a baseline timepoint
- Calculates quantile rank differences to identify which interactions become stronger or weaker over time


<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
timepoint_column = "timepoint"  # Column containing timepoints
condition_column = "condition"  # Column containing conditions
cluster_col = "clustering"      # Name of the cluster column in adata.obs.
condition_value = "treatment"   # The condition to be analyzed
timepoint_order = ["tp1", "tp2", "tp3"] # Order of timepoints


In [None]:
# Optional
reference_timepoint = None  # Compare all timepoints to baseline, e.g. "tp1"
                            # Without reference_timepoints, the timepoints are considered in their sequential order


In [3]:
# Optional filters to focus analysis on specific clusters or genes
selected_clusters = None # Define list to select specific clusters, e.g., ["cluster1", "clusterN"]
selected_genes =  None # Define list to select specific genes, e.g., ["gene1", "geneN"]

In [None]:
# Expression and interaction filtering
min_perc = 15,  # Minimum percentage of cells expressing each gene
interaction_perc = 90,  # Focus on top 90% of interactions

In [None]:
temp_diff_results = rl.calculate_condition_differences_over_time(
    adata=adata,
    timepoint_column=timepoint_column,
    condition_column=condition_column,
    condition_value=condition_value,
    cluster_column=cluster_column,
    timepoint_order=timepoint_order,
    reference_timepoint=reference_timepoint,
    min_perc=min_perc,
    interaction_perc=interaction_perc,
    cluster_filter=selected_clusters,
    gene_filter=selected_genes,
    normalize=normalize,
    save=f"{condition_value}_timepoint_differences"
)

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:
n_top=100  # Show top 100 differential interactions
figsize = (24, 18)
dpi=300
save_prefix=f"{condition_value}_comparison_networks"
split_by_direction = True  # Create separate plots for upregulated/downregulated
hub_threshold = 4 # Define hubs as nodes with 4+ connections
show=True

In [None]:
# Visualize differences between conditions as multigrid networks
temp_comparision_figures = rl.plot_all_condition_differences(
    diff_results=temp_diff_results,
    n_top=n_top,
    figsize=figsize,
    dpi=dpi,
    save_prefix=save_prefix,
    split_by_direction=split_by_direction,
    hub_threshold=hub_threshold,
    show=show
)

____

### 5.3 `track_clusters_or_genes` Function

This function allows to focus on and visualize the evolution of specific receptor-ligand interactions of interest across timepoints. It:

- Filters the differential analysis results to focus only on interactions involving specific genes and/or cell clusters
- Creates targeted network visualizations showing how selected interactions change over the time course

<h1><center>⬐ Fill in input data here ⬎</center></h1>

In [None]:

# Track only interactions involving specified clusters or genes
selected_clusters = ['END','aSM'] # Define list to select specific clusters
selected_genes = ['AC114498.1', 'AC136616.3'] # Define list to select specific genes
timepoint_order = ["tp1", "tp2", "tp3"],
min_interactions = 1, # Minimum number of interactions required to generate a visualization

In [None]:
n_top=100  # Show top 100 differential interactions
figsize = (24, 18)
dpi=300
save_prefix=f"{condition_value}_comparison_networks"
split_by_direction = True  # Create separate plots for upregulated/downregulated
hub_threshold = 4 # Define hubs as nodes with 4+ connections
show=True

In [None]:
clusters_suffix = "_".join(selected_clusters) if selected_clusters else "all_clusters"
genes_suffix = "_".join(selected_genes) if selected_genes else "all_genes"

save_prefix = f"track_{clusters_suffix}_{genes_suffix}"

In [None]:
# Create network graphs for specified clusters/genes
specific_figures = rl.track_clusters_or_genes(
    diff_results=diff_results,
    genes=selected_genes,
    clusters=selected_clusters,
    timepoint_order=timepoint_order,
    min_interactions=min_interactions,
    n_top=n_top,
    figsize=figsize,
    dpi=dpi,
    save_prefix=save_prefix,
    split_by_direction=split_by_direction
)

____