In [None]:
import warnings
warnings.filterwarnings("ignore")
import ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc


from itertools import combinations
from statsmodels.stats.multitest import multipletests
from scipy.stats import ranksums


In [None]:
data_path = #insert data path here
adata = sc.read(data_path)
sc.pp.normalize_total(adata, target_sum = 1e3)
df_cytos = pd.read_csv('.././data/cytokine_genelist.csv', index_col = [0])
hallmark_ifna = ast.literal_eval(df_cytos.loc[7, 'genes'])
hallmark_ifnb = ast.literal_eval(df_cytos.loc[8, 'genes'])



In [None]:
# Define hallmark gene sets and names
hallmark_list = [hallmark_ifna, hallmark_ifnb]
hallmark_names = ['hallmark_ifna', 'hallmark_ifnb']

# Store results
all_vs_all_results = []

# Loop over hallmark sets
for hallmark_genes, hallmark_name in zip(hallmark_list, hallmark_names):
    
    for ct in ['B', 'CD4T', 'CD8T', 'Monocyte', 'DC', 'Eosinophil', 'NK', 'Neutrophil', 'pDC', 'Plasmablast']:
        
        # Filter to conditions where the cell type exists
        conditions = set(
            adata.obs[adata.obs.CellType == ct]['stim_treat_time']
        )
        
        # Compare all pairs of conditions
        for cond1, cond2 in combinations(conditions, 2):
            # Get data for cond1
            df1 = adata[(adata.obs.stim_treat_time == cond1) & 
                        (adata.obs.CellType == ct), 
                        adata.var.index.isin(hallmark_genes)].to_df()
            df1_summed = df1.sum(axis=1)

            # Get data for cond2
            df2 = adata[(adata.obs.stim_treat_time == cond2) & 
                        (adata.obs.CellType == ct), 
                        adata.var.index.isin(hallmark_genes)].to_df()
            df2_summed = df2.sum(axis=1)

            # Skip if empty
            if df1_summed.empty or df2_summed.empty:
                continue

            # Perform Wilcoxon rank-sum test
            stat, pval = ranksums(df1_summed, df2_summed, alternative = 'two-sided')

            # Store result
            all_vs_all_results.append({
                'hallmark': hallmark_name,
                'CellType': ct,
                'Condition1': cond1,
                'Condition2': cond2,
                'Statistic': stat,
                'p-value': pval,
                'mean1': df1_summed.mean(),
                'mean2': df2_summed.mean()
            })

# Convert to DataFrame
all_vs_all_df = pd.DataFrame(all_vs_all_results)

all_vs_all_df['p_adj'] = multipletests(all_vs_all_df['p-value'], method='bonferroni')[1]
