In [None]:
### Import Libraries.

import os
import anndata
import scanpy as sc
import scEntropy.scEntropy as scEntropy
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from scipy.stats import entropy

In [None]:
### Load Data.

os.chdir("/folder/")
adata = anndata.read_h5ad("adata.h5ad")

adata.X = adata.layers['log1p_normalized']

In [None]:
### Function: Compute Single-cell Entropy.

def compute_scEntropy(adata):
    adata_df = pd.DataFrame(
        adata.X.toarray() if hasattr(adata.X, "toarray") else adata.X,
        index=adata.obs_names,
        columns=adata.var_names
    )
    entropy_scores = scEntropy.scEntropy(adata_df.T, option='RCSA')
    return entropy_scores

In [None]:
### Function: Visualization of Single-cell Entropy.

def plot_scEntropy_boxplot_by_cluster(adata, entropy_scores, cluster_key = 'Cluster_Column'):
    df_plot = pd.DataFrame({
        'scEntropy': entropy_scores,
        'Cluster': adata.obs[cluster_key].astype(str).values
    })
    
    if pd.api.types.is_categorical_dtype(adata.obs[cluster_key]):
        cluster_order = adata.obs[cluster_key].cat.categories
    else:
        cluster_order = sorted(df_plot['Cluster'].unique())

    plt.figure(figsize = (6, 6))
    sns.boxplot(
        data = df_plot, x = 'Cluster', y = 'scEntropy',
        palette = 'tab10', order=cluster_order
    )
    plt.xticks(rotation = 45, ha = 'right')
    plt.xlabel('Cluster')
    plt.ylabel('scEntropy')
    plt.title('scEntropy Distribution by Cluster')
    plt.tight_layout()
    plt.savefig('scEntropy_boxplot_by_cluster.png', dpi = 800)
    plt.show()

In [None]:
### Function: Prepare scEntropy Statistics DF.

def make_scEntropy_stat_df(adata, entropy_scores, cluster_key = 'Cluster_Column', sample_key = None, group_key = None):
    if isinstance(entropy_scores, pd.DataFrame):
        entropy_vector = entropy_scores.iloc[:, 0]
    elif isinstance(entropy_scores, pd.Series):
        entropy_vector = entropy_scores
    else:
        entropy_vector = pd.Series(entropy_scores, index = adata.obs_names)

    df_stat = pd.DataFrame({
        'cell': adata.obs_names,
        'scEntropy': entropy_vector.values,
        'Cluster': adata.obs[cluster_key].astype(str).values
    })

    if sample_key:
        df_stat['Sample'] = adata.obs[sample_key].astype(str).values
    if group_key:
        df_stat['Group'] = adata.obs[group_key].astype(str).values

    return df_stat

In [None]:
### Function: Gene-level Entropy.

def compute_gene_entropy(adata, n_bins = 10, pseudocount = 1e-9):
    X = adata.X.toarray() if hasattr(adata.X, "toarray") else adata.X
    gene_entropy = []
    for i, gene in enumerate(adata.var_names):
        gene_values = X[:, i]
        bins = np.histogram_bin_edges(gene_values, bins = n_bins)
        binned = np.digitize(gene_values, bins) - 1
        counts = np.bincount(binned, minlength=n_bins)
        probs = (counts / counts.sum()) + pseudocount
        probs /= probs.sum()
        gene_entropy.append(entropy(probs))
    return pd.DataFrame({'gene': adata.var_names, 'entropy': gene_entropy}).sort_values('entropy', ascending = False)

In [None]:
### Compute scEntropy and Plot.

entropy_scores = compute_scEntropy(adata)
plot_scEntropy_boxplot_by_cluster(adata, entropy_scores, cluster_key = 'Cluster_Column')

In [None]:
### Prepare scEntropy Statistics DF.

df_entropy = make_scEntropy_stat_df(
    adata,
    entropy_scores = entropy_scores,
    cluster_key = 'Cluster_Column',
    sample_key = 'Sample_ID'
)

In [None]:
### Statistics.

model = smf.mixedlm("scEntropy ~ C(Cluster, Treatment(reference = 'Cluster_Column'))",
                    data = df_entropy, groups = df_entropy["Sample"])
result = model.fit()
print(result.summary())

In [None]:
### Boxplot of Selected Clusters ---

clusters_of_interest = ['Cluster_1', 'Cluster_2']
colors = ['#Color_1', '#Color_2']

df_plot = pd.DataFrame({
    'scEntropy': entropy_scores,
    'Cluster': adata.obs['Cluster_Column'].astype(str).values
})
df_plot = df_plot[df_plot['Cluster'].isin(clusters_of_interest)]

sns.set(style = "white")

fig, ax = plt.subplots(figsize = (3, 3))

sns.boxplot(
    data = df_plot, x = 'Cluster', y = 'scEntropy',
    order = clusters_of_interest, palette = colors,
    width = 0.6, fliersize = 0, linewidth = 0,
    boxprops = dict(alpha = 0.3),
    whiskerprops = dict(linewidth = 1.5, alpha = 0.5),
    capprops = dict(linewidth = 1.5, alpha = 0.5),
    medianprops = dict(linewidth = 1.5, alpha = 0.5),
    ax = ax
)

for i, cluster in enumerate(clusters_of_interest):
    y = df_plot[df_plot['Cluster'] == cluster]['scEntropy']
    x = np.random.normal(i, 0.08, size = len(y))
    ax.scatter(x, y, color=colors[i], alpha = 0.02, s = 6, edgecolor = 'none')

for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)
for spine in ['bottom', 'left']:
    ax.spines[spine].set_linewidth(1.5)
    ax.spines[spine].set_color("#404040")

ax.set_xlabel('Basic_Clusters', fontsize = 8, fontweight = 'bold', color = '#404040')
ax.set_ylabel('scEntropy', fontsize = 8, fontweight = 'bold', color = '#404040')
plt.tight_layout()
fig.savefig("boxplot_scentropy.png", dpi = 800, bbox_inches = 'tight')
plt.show()
