In [None]:
import pandas as pd
from wmb import *

import pathlib

In [None]:
mc_annot = cemba.get_mc_annot()
m3c_annot = cemba.get_m3c_mapping_metric()

In [None]:
def save_index(cells, file_name, downsample=100000, random_state=0):
    cells = pd.Series(cells)
    if cells.size > downsample:
        cells = cells.sample(downsample, random_state=random_state)
    cells.to_csv(file_name, index=None, header=False)

## L1 - Neuron Non-Neuron

In [None]:
pathlib.Path('../L1/ALL').mkdir(parents=True, exist_ok=True)
save_index(mc_annot.get_index('cell'),
           file_name='../L1/ALL/mc_cells.txt',
           downsample=3000000,
           random_state=0)

save_index(m3c_annot.index,
           file_name='../L1/ALL/m3c_cells.txt',
           downsample=3000000,
           random_state=0)

In [None]:
cwd = pathlib.Path().absolute()

with open('Snakefile') as f, open('../L1/Snakefile', 'w') as out_f:
    snakefile_tmp = f.read()
    snakefile_tmp = snakefile_tmp.replace('REPLACE_TEMPLATE_DIR', str(cwd))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_DATASET', 'CEMBA_3C')
    snakefile_tmp = snakefile_tmp.replace('REPLACE_CLUSTER_COL', str('L1'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_GROUP_KEY',
                                          str('L2'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_PLOT_KEY',
                                          str('L1'))
    snakefile_tmp = snakefile_tmp.replace(
        'REPLACE_CATEGORICAL_KEY', str(['L1_annot', 'L2', 'DissectionRegion']))
    out_f.write(snakefile_tmp)

## L2- Neuron and Non-Neuron

In [None]:
mc_groups = pd.read_csv('../L1/ALL/mc_integration_group.csv.gz',
                        index_col=0).squeeze()
m3c_groups = pd.read_csv('../L1/ALL/m3c_integration_group.csv.gz',
                         index_col=0).squeeze()

In [None]:
# neuron (not include DG-GC and CB)
mc_neurons = ~mc_annot['L1_annot'].isin(
    ['ODC', 'OPC', 'ASC', 'MGC', 'CB', 'CBX', 'DG'])
mc_non_neurons = mc_annot['L1_annot'].isin(
    ['ODC', 'OPC', 'ASC', 'MGC'])
gc_cb = mc_annot['L1_annot'].isin(
    ['CB', 'CBX', 'DG'])

mc_neurons = mc_annot.sel(cell = mc_neurons).get_index('cell').to_list()
mc_non_neurons = mc_annot.sel(cell = mc_non_neurons).get_index('cell').to_list()
gc_cb = mc_annot.sel(cell = gc_cb).get_index('cell').to_list()

#inge groups in 3 groups
gc_cb_inte = mc_groups[mc_groups.index.isin(gc_cb)].unique().tolist()
non_neuron_inte = mc_groups[mc_groups.index.isin(mc_non_neurons)].unique().tolist()
neuron_inte = mc_groups[mc_groups.index.isin(mc_neurons)].unique().tolist()

In [None]:
# for L2 Neuron
for integration_group in neuron_inte:
    mc_cells = mc_groups[mc_groups  == integration_group].index
    m3c_cells = m3c_groups[m3c_groups == integration_group].index

    integration_group = f'InteGroup{integration_group}'
    
    
    if mc_cells.size <= 150 or m3c_cells.size <= 150:
        print(integration_group)
    else:
        out_dir = pathlib.Path(f'../L2/Neuron/{integration_group}')
        out_dir.mkdir(exist_ok=True, parents=True)
        save_index(mc_cells,
                   f'../L2/Neuron/{integration_group}/mc_cells.txt',
                   downsample=1000000,
                   random_state=0)
        save_index(m3c_cells,
                   f'../L2/Neuron/{integration_group}/m3c_cells.txt',
                   downsample=1000000,
                   random_state=0)

    
cwd = pathlib.Path().absolute()
with open('Snakefile') as f, open('../L2/Neuron/Snakefile', 'w') as out_f:
    snakefile_tmp = f.read()
    snakefile_tmp = snakefile_tmp.replace('REPLACE_MC_TYPE', str('CHN'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_TEMPLATE_DIR', str(cwd))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_DATASET', 'AIBS_SMART')
    snakefile_tmp = snakefile_tmp.replace('REPLACE_CLUSTER_COL', str('L2'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_GROUP_KEY',
                                          str('L3'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_PLOT_KEY',
                                          str('L2'))
    snakefile_tmp = snakefile_tmp.replace(
        'REPLACE_CATEGORICAL_KEY', str(['L2', 'L3', 'DissectionRegion']))
    out_f.write(snakefile_tmp)

In [None]:
#for L2 non_neuron
for integration_group in non_neuron_inte:
    mc_cells = mc_groups[mc_groups  == integration_group].index
    m3c_cells = m3c_groups[m3c_groups == integration_group].index

    integration_group = f'InteGroup{integration_group}'
    
    if mc_cells.size <= 150 or m3c_cells.size <= 150:
        print(integration_group)
    else:
        out_dir = pathlib.Path(f'../L2/NonNeuron/{integration_group}')
        out_dir.mkdir(exist_ok=True, parents=True)
        
        save_index(mc_cells,
                   f'../L2/NonNeuron/{integration_group}/mc_cells.txt',
                   downsample=1000000,
                   random_state=0)
        save_index(m3c_cells,
                   f'../L2/NonNeuron/{integration_group}/m3c_cells.txt',
                   downsample=1000000,
                   random_state=0)

    
cwd = pathlib.Path().absolute()
with open('Snakefile') as f, open('../L2/NonNeuron/Snakefile', 'w') as out_f:
    snakefile_tmp = f.read()
    snakefile_tmp = snakefile_tmp.replace('REPLACE_MC_TYPE', str('CGN'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_TEMPLATE_DIR', str(cwd))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_DATASET', 'AIBS_SMART')
    snakefile_tmp = snakefile_tmp.replace('REPLACE_CLUSTER_COL', str('L2'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_GROUP_KEY',
                                          str('L3'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_PLOT_KEY',
                                          str('L2'))
    snakefile_tmp = snakefile_tmp.replace(
        'REPLACE_CATEGORICAL_KEY', str(['L2', 'L3', 'DissectionRegion']))
    out_f.write(snakefile_tmp)

In [None]:
#for gc_cb CHN
for integration_group in gc_cb_inte:
    mc_cells = mc_groups[mc_groups  == integration_group].index
    m3c_cells = m3c_groups[m3c_groups == integration_group].index
    
    integration_group = f'InteGroup{integration_group}'
    
    
    if mc_cells.size <= 150 or m3c_cells.size <= 150:
        print(integration_group)
    else:
        out_dir = pathlib.Path(f'../L2/gc_cb/ch/{integration_group}')
        out_dir.mkdir(exist_ok=True, parents=True)
        
        save_index(mc_cells,
                   f'../L2/gc_cb/ch/{integration_group}/mc_cells.txt',
                   downsample=1000000,
                   random_state=0)
        save_index(m3c_cells,
                   f'../L2/gc_cb/ch/{integration_group}/m3c_cells.txt',
                   downsample=1000000,
                   random_state=0)

    
cwd = pathlib.Path().absolute()
with open('Snakefile') as f, open('../L2/gc_cb/ch//Snakefile', 'w') as out_f:
    snakefile_tmp = f.read()
    snakefile_tmp = snakefile_tmp.replace('REPLACE_MC_TYPE', str('CHN'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_TEMPLATE_DIR', str(cwd))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_DATASET', 'AIBS_SMART')
    snakefile_tmp = snakefile_tmp.replace('REPLACE_CLUSTER_COL', str('L2'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_GROUP_KEY',
                                          str('L3'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_PLOT_KEY',
                                          str('L2'))
    snakefile_tmp = snakefile_tmp.replace(
        'REPLACE_CATEGORICAL_KEY', str(['L2', 'L3', 'DissectionRegion']))
    out_f.write(snakefile_tmp)

In [None]:
#for gc_cb, CGN
for integration_group in gc_cb_inte:
    mc_cells = mc_groups[mc_groups  == integration_group].index
    m3c_cells = m3c_groups[m3c_groups == integration_group].index
    
    integration_group = f'InteGroup{integration_group}'
    
    
    if mc_cells.size <= 150 or m3c_cells.size <= 150:
        print(integration_group)
    else:
        out_dir = pathlib.Path(f'../L2/gc_cb/cg/{integration_group}')
        out_dir.mkdir(exist_ok=True, parents=True)
        
        save_index(mc_cells,
                   f'../L2/gc_cb/cg/{integration_group}/mc_cells.txt',
                   downsample=1000000,
                   random_state=0)
        save_index(m3c_cells,
                   f'../L2/gc_cb/cg/{integration_group}/m3c_cells.txt',
                   downsample=1000000,
                   random_state=0)

    
cwd = pathlib.Path().absolute()
with open('Snakefile') as f, open('../L2/gc_cb/cg/cSnakefile', 'w') as out_f:
    snakefile_tmp = f.read()
    snakefile_tmp = snakefile_tmp.replace('REPLACE_MC_TYPE', str('CGN'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_TEMPLATE_DIR', str(cwd))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_DATASET', 'AIBS_SMART')
    snakefile_tmp = snakefile_tmp.replace('REPLACE_CLUSTER_COL', str('L2'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_GROUP_KEY',
                                          str('L3'))
    snakefile_tmp = snakefile_tmp.replace('REPLACE_INTEGRATION_PLOT_KEY',
                                          str('L2'))
    snakefile_tmp = snakefile_tmp.replace(
        'REPLACE_CATEGORICAL_KEY', str(['L2', 'L3', 'DissectionRegion']))
    out_f.write(snakefile_tmp)