In [1]:
import os
import warnings
warnings.filterwarnings('ignore')

import scanpy as sc
import anndata as ad
import pandas as pd
import seaborn as sns
import decoupler as dc

In [2]:
out_dir = '../../results/05_fibroblast/17_fib1_vs_fib2'
os.makedirs(out_dir, exist_ok=True)

In [3]:
adata = sc.read_h5ad('../../results/05_fibroblast/01_clustering/fibroblast.h5ad')

In [4]:
adata.uns['log1p']['base'] = None

In [5]:
sc.tl.rank_genes_groups(adata, groupby="sub_clusters", groups=["Fib1"], reference="Fib2")



In [6]:
df = pd.DataFrame(adata.uns['rank_genes_groups']['names'])

In [7]:
df.head()

Unnamed: 0,Fib1
0,WISP2
1,SCARA5
2,CLU
3,DCN
4,FBLN1


In [8]:
# Extract marker genes, log fold changes, and p-values
results = []
for cluster in adata.uns['rank_genes_groups']['names'].dtype.names:
    cluster_marker_genes = adata.uns['rank_genes_groups']['names'][cluster]
    logfoldchanges = adata.uns['rank_genes_groups']['logfoldchanges'][cluster]
    pvals = adata.uns['rank_genes_groups']['pvals'][cluster]
    pvals_adj = adata.uns['rank_genes_groups']['pvals_adj'][cluster]
    
    for gene, logfc, pval, pval_adj in zip(cluster_marker_genes, logfoldchanges, pvals, pvals_adj):
        results.append({
            'gene': gene,
            'log fold change': logfc,
            'p-value': pval,
            'adjusted p-value': pval_adj,
            'cluster': cluster
        })

In [9]:
# Convert to DataFrame
markers_df = pd.DataFrame(results)

In [10]:
markers_df = markers_df[markers_df['adjusted p-value'] < 0.01]

In [11]:
markers_df

Unnamed: 0,gene,log fold change,p-value,adjusted p-value,cluster
0,WISP2,6.691909,0.0,0.0,Fib1
1,SCARA5,6.163282,0.0,0.0,Fib1
2,CLU,4.849216,0.0,0.0,Fib1
3,DCN,1.980738,0.0,0.0,Fib1
4,FBLN1,3.146318,0.0,0.0,Fib1
...,...,...,...,...,...
15270,F2RL2,-3.725028,0.0,0.0,Fib1
15271,COL11A1,-6.184157,0.0,0.0,Fib1
15272,TNN,-7.056352,0.0,0.0,Fib1
15273,EDNRA,-6.086195,0.0,0.0,Fib1


In [13]:
# Save to CSV
markers_df.to_csv(f'{out_dir}/fib1_vs_fib2.csv', index=False)