In [1]:
import anndata
import pandas as pd
import xarray as xr
from ALLCools.mcds import MCDS

In [2]:
integration_groups = pd.read_csv('snmC_snm3C_integration_groups.csv', index_col=0)
integration_groups = integration_groups.loc[integration_groups['batch'] == 'snmC', 'IntegrationGroup']
integration_groups.value_counts()

Exc      7702
DG       6183
Glia     1720
Inh      1015
Other     365
Name: IntegrationGroup, dtype: int64

In [3]:
gene_ds = MCDS.open('../../../dmg/gene_frac/*.mcds')

In [4]:
# intra modality clusters
cell_meta = pd.read_csv('../../../step_by_step/100kb/L1.ClusteringResults.csv.gz', index_col=0)

In [5]:
integration_groups = integration_groups.map({
    'Exc': 'Neuron',
    'Inh': 'Neuron',
    'DG': 'DG',
    'Glia': 'Glia',
    'Other': 'Other',
})
group_mc_types = {
    'Neuron': 'CHN',
    'Glia': 'CGN',
    'DG': 'CGN'
}

In [13]:
meta_anno = pd.read_csv('/home/hanliu/project/allcools_doc/data/Brain/snmC-seq2/HIP.Annotated.CellMetadata.csv.gz', index_col=0)

for group, cells in integration_groups.groupby(integration_groups):
    if group == 'Other':
        continue
    group_mc_type = group_mc_types[group]
    gene_fracs = gene_ds.sel(cell=cells.index,
                             mc_type=group_mc_type)['gene_da_frac'].values
    adata = anndata.AnnData(X=gene_fracs,
                            obs=pd.DataFrame([],
                                             index=cells.index),
                            var=pd.DataFrame([],
                                             index=gene_ds.get_index('gene')))
    adata.obs['MajorType'] = meta_anno.loc[cells.index, 'MajorType']
    adata.obs['SubType'] = meta_anno.loc[cells.index, 'SubType']
    adata.write_h5ad(f'snmC.{group}.gene_{group_mc_type}_fracs.h5ad')

... storing 'MajorType' as categorical
... storing 'SubType' as categorical
... storing 'MajorType' as categorical
... storing 'SubType' as categorical
... storing 'MajorType' as categorical
... storing 'SubType' as categorical


In [7]:
integration_groups.value_counts()

Neuron    8717
DG        6183
Glia      1720
Other      365
Name: IntegrationGroup, dtype: int64

In [15]:
adata

AnnData object with n_obs × n_vars = 8717 × 40331
    obs: 'MajorType', 'SubType'