In [16]:
import scanpy as sc
import os
import pandas as pd
import decoupler as dc

In [17]:
data_dir = os.path.abspath('../../../data/sc-data')
counts_fn = os.path.join(data_dir, 'GSE165897_UMIcounts_HGSOC.tsv')
meta_fn = os.path.join(data_dir, 'GSE165897_cellInfo_HGSOC.tsv')

In [18]:
adata = sc.read_text(counts_fn, delimiter='\t').T
meta = pd.read_table(meta_fn, delimiter='\t', index_col=0)
adata.obs = meta
adata.layers['counts'] = adata.X

In [19]:
keep = adata.obs['treatment_phase'] == 'treatment-naive'
adata = adata[keep,:]

In [24]:
adata

View of AnnData object with n_obs × n_vars = 21761 × 32847
    obs: 'sample', 'patient_id', 'treatment_phase', 'anatomical_location', 'cell_type', 'cell_subtype', 'nCount_RNA', 'nFeature_RNA', 'percent.mt'
    layers: 'counts'

In [5]:
adata.obs.cell_subtype.unique()

array(['EOC_C11', 'EOC_C12', 'EOC_C5', 'EOC_C4', 'EOC_C9', 'EOC_C2',
       'EOC_C1', 'EOC_C6', 'EOC_C8', 'EOC_C7', 'EOC_C10', 'EOC_C3',
       'CAF-3', 'Mesothelial', 'CAF-1', 'CAF-2', 'Endothelial', 'T-cells',
       'Plasma-cells', 'NK', 'DC-2', 'B-cells', 'Macrophages', 'DC-1',
       'pDC', 'Mast-cells', 'ILC'], dtype=object)

In [6]:
padata = dc.get_pseudobulk(adata, sample_col='patient_id', groups_col='cell_subtype', layer='counts', min_prop=0.2, min_smpls=3)
sc.pp.normalize_total(padata, target_sum=1e4)
sc.pp.log1p(padata)
padata.obs.cell_subtype.unique()

  psbulk = AnnData(psbulk, obs=new_obs, var=new_var)
  view_to_actual(adata)


array(['B-cells', 'CAF-1', 'CAF-2', 'CAF-3', 'DC-1', 'DC-2', 'EOC_C1',
       'EOC_C11', 'EOC_C12', 'EOC_C2', 'EOC_C4', 'EOC_C5', 'EOC_C6',
       'EOC_C7', 'EOC_C8', 'Macrophages', 'Mesothelial', 'NK',
       'Plasma-cells', 'T-cells', 'pDC'], dtype=object)

In [7]:
len(padata.obs.cell_subtype.unique())

21

In [8]:
cell_types = padata.obs.cell_subtype.unique()
net = []
cutoff = -10
for cell_type in cell_types:
    logFCs, pvals = dc.get_contrast(padata, None, condition_col='cell_subtype', condition= cell_type, reference='rest', method='t-test')
    deg = dc.format_contrast_results(logFCs, pvals)
    sign = dc.get_top_targets(logFCs, pvals, cell_type + '.vs.rest', sign_thr=0.05, lFCs_thr=0.5)
    sign = sign.reset_index().rename({'index':'target'}, axis = 'columns')
    sign['cell_subtype'] = cell_type
    keep = sign['logFCs'] > cutoff
    sign = sign[keep]
    net.append(sign)
net = pd.concat(net, ignore_index=True)

not_mito = [not gene.startswith('MT-') for gene in net['target']]
net = net[not_mito]

  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group

In [14]:
net.shape

(45547, 5)

(45426, 5)