In [110]:
import scanpy as sc
import numpy as np

def find_differentially_expressed_genes(adata, pval_threshold=0.0001, logfc_threshold=0.5):
    """
    Find differentially expressed genes for each cell type in the AnnData object.
    
    Parameters:
    adata (AnnData): Annotated data matrix.
    pval_threshold (float): p-value threshold to consider a gene as differentially expressed.
    logfc_threshold (float): Log fold change threshold to consider a gene as differentially expressed.
    
    Returns:
    dict: A dictionary with cell types as keys and lists of differentially expressed genes as values.
    """
    
    # Dictionary to hold differential expression results
    differential_expression_results = {}

    # Get unique cell types
    cell_types = adata.obs['cell_type'].unique()

    # Perform differential expression analysis for each cell type
    sc.tl.rank_genes_groups(adata, groupby='cell_type', method='t-test')
    
    # Extract the list of differentially expressed genes for each cell type
    for cell_type in cell_types:
        de_genes = []
        gene_names = adata.uns['rank_genes_groups']['names'][cell_type]
        pvals = adata.uns['rank_genes_groups']['pvals'][cell_type]
        logfc = adata.uns['rank_genes_groups']['logfoldchanges'][cell_type]
        
        for gene, pval, lfc in zip(gene_names, pvals, logfc):
            if pval < pval_threshold and abs(lfc) > logfc_threshold:
                de_genes.append(gene)
        
        differential_expression_results[cell_type] = de_genes

    return differential_expression_results

# Example usage
# adata = sc.read_h5ad('your_data_file.h5ad')
# diff_expr_genes = find_differentially_expressed_genes(adata)
# print(diff_expr_genes)


In [111]:
from copy import deepcopy
from interpretable_ssl.datasets.immune import *

ds = ImmuneDataset()

In [113]:
adata = deepcopy(ds.adata)
# Add a small constant to avoid zeros
# Convert sparse matrix to dense
adata.X = adata.X.toarray()

# Add the small constant to the dense matrix
adata.X = adata.X + 1e-10

# Ensure no negative values (this is just a safety check, ideally your preprocessing should not produce negative values)
adata.X = np.clip(adata.X, a_min=1e-10, a_max=None)

# Convert back to sparse matrix if needed
adata.X = sc.AnnData(adata.X).X


sc.pp.scale(adata, max_value=10)
res1 = find_differentially_expressed_genes(adata)
[len(res1[cell]) for cell in res1]
# sc.pp.scale(adata, max_value=10)
# means = np.mean(adata.X, axis=0)
# mean_close_to_zero = np.allclose(means, 0, atol=1e-2)
# mean_close_to_zero

  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'logfoldchanges'] = np.log2(


[95,
 444,
 556,
 1336,
 772,
 1646,
 1518,
 947,
 190,
 998,
 1168,
 41,
 626,
 1248,
 896,
 1586]

In [114]:
adata = deepcopy(ds.adata)
adata.X = adata.X.toarray()

# Add the small constant to the dense matrix
adata.X = adata.X + 1e-10

# Ensure no negative values (this is just a safety check, ideally your preprocessing should not produce negative values)
adata.X = np.clip(adata.X, a_min=1e-10, a_max=None)

# Convert back to sparse matrix if needed
adata.X = sc.AnnData(adata.X).X
res = find_differentially_expressed_genes(adata)
[len(res[cell]) for cell in res]

[2228,
 3294,
 2050,
 2514,
 1716,
 2828,
 2682,
 1774,
 2198,
 1786,
 2446,
 3182,
 1242,
 2602,
 1837,
 2887]

In [115]:
intersection = {cell: len(set(res1[cell]) - (set(res[cell]))) for cell in res}

In [116]:
intersection

{'CD16+ Monocytes': 26,
 'CD4+ T cells': 13,
 'CD14+ Monocytes': 94,
 'NKT cells': 28,
 'HSPCs': 6,
 'CD8+ T cells': 15,
 'Erythrocytes': 12,
 'CD10+ B cells': 6,
 'Plasmacytoid dendritic cells': 13,
 'Monocyte progenitors': 5,
 'CD20+ B cells': 23,
 'Monocyte-derived dendritic cells': 14,
 'Plasma cells': 6,
 'Erythroid progenitors': 6,
 'Megakaryocyte progenitors': 6,
 'NK cells': 28}

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(
  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(
  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(
  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(
  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(
  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(
  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = um.true_divide(
  mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
  return _methods._mean(a, 

{'CD16+ Monocytes': ['PQLC2'],
 'CD4+ T cells': ['PQLC2'],
 'CD14+ Monocytes': ['PQLC2'],
 'NKT cells': ['PQLC2'],
 'HSPCs': ['PQLC2'],
 'CD8+ T cells': ['PQLC2'],
 'Erythrocytes': ['PQLC2'],
 'CD10+ B cells': ['PQLC2'],
 'Plasmacytoid dendritic cells': ['PQLC2'],
 'Monocyte progenitors': ['PQLC2'],
 'CD20+ B cells': ['PQLC2'],
 'Monocyte-derived dendritic cells': ['PQLC2'],
 'Plasma cells': ['PQLC2'],
 'Erythroid progenitors': ['PQLC2'],
 'Megakaryocyte progenitors': ['PQLC2'],
 'NK cells': ['PQLC2']}