In [1]:
import anndata
import pandas as pd
import xarray as xr
from ALLCools.mcds import MCDS

In [2]:
integration_groups = pd.read_csv('snmC_snm3C_integration_groups.csv', index_col=0)
integration_groups = integration_groups.loc[integration_groups['batch'] == 'snm3C', 'IntegrationGroup']
integration_groups.value_counts()

Glia     3974
DG       3624
Exc      2450
Inh       647
Other     279
Name: IntegrationGroup, dtype: int64

In [3]:
gene_ds = MCDS.open('../../snm3C_clustering/gene_frac/*.mcds')

In [4]:
# intra modality clusters
cell_meta = pd.read_csv('../../snm3C_clustering/L1.ClusteringResults.csv.gz', index_col=0)

In [5]:
integration_groups = integration_groups.map({
    'Exc': 'Neuron',
    'Inh': 'Neuron',
    'DG': 'DG',
    'Glia': 'Glia',
    'Other': 'Other',
})
group_mc_types = {
    'Neuron': 'CHN',
    'Glia': 'CGN',
    'DG': 'CGN'
}

In [6]:
for group, cells in integration_groups.groupby(integration_groups):
    if group == 'Other':
        continue
    group_mc_type = group_mc_types[group]
    gene_fracs = gene_ds.sel(cell=cells.index,
                             mc_type=group_mc_type)['gene_da_frac'].values
    adata = anndata.AnnData(X=gene_fracs,
                            obs=pd.DataFrame([],
                                             index=cells.index),
                            var=pd.DataFrame([],
                                             index=gene_ds.get_index('gene')))
    adata.obs['MajorType'] = cell_meta.loc[cells.index, 'L1']
    adata.obs['SubType'] = cell_meta.loc[cells.index, 'L1']
    adata.write_h5ad(f'snm3C.{group}.gene_{group_mc_type}_fracs.h5ad')

... storing 'MajorType' as categorical
... storing 'SubType' as categorical
... storing 'MajorType' as categorical
... storing 'SubType' as categorical
... storing 'MajorType' as categorical
... storing 'SubType' as categorical


In [7]:
integration_groups.value_counts()

Glia      3974
DG        3624
Neuron    3097
Other      279
Name: IntegrationGroup, dtype: int64