# Map m3C L1 to mC Cell Types

In [1]:
import joblib
import pandas as pd
from gliderport.preset import notebook_snakemake
from wmb import cemba


def save_index(path, index):
    with open(path, "w") as f:
        f.write("\n".join(index))

In [2]:
mc_annot = pd.read_csv('mC_META_230814.csv', index_col = 0)
m3c_annot = pd.read_csv('m3C_META_230814.csv', index_col = 0)
m3c_annot['l1'] = m3c_annot['l1'].astype(str)

In [3]:
integraion_group = joblib.load("Round1/m3C-L4Region.mc-L4Region.integration_groups.dict")

In [4]:
integraion_group

{-1: {'ref': ['c55', 'c65', 'c69', 'c71', 'c73', 'c78'],
  'query': ['24', '37']},
 0: {'ref': ['c19', 'c29', 'c3', 'c33', 'c35', 'c66'],
  'query': ['29', '7', '13', '44']},
 1: {'ref': ['c12', 'c20', 'c36', 'c38', 'c4'],
  'query': ['35', '23', '15', '17']},
 2: {'ref': ['c11', 'c16', 'c61', 'c64', 'c77'],
  'query': ['20', '40', '8', '3']},
 3: {'ref': ['c0', 'c23', 'c48', 'c68', 'c74', 'c75'], 'query': ['45', '1']},
 4: {'ref': ['c17', 'c46'], 'query': ['12', '21', '5', '36']},
 5: {'ref': ['c10', 'c45', 'c50', 'c63'], 'query': ['41', '19']},
 6: {'ref': ['c2', 'c53', 'nan'], 'query': ['4', '0']},
 7: {'ref': ['c34', 'c7', 'c8'], 'query': ['9', '16']},
 8: {'ref': ['c21', 'c39', 'c9'], 'query': ['18', '38']},
 9: {'ref': ['c15', 'c31', 'c52', 'c67'], 'query': ['6']},
 10: {'ref': ['c1', 'c13', 'c5'], 'query': ['2']},
 11: {'ref': ['c18', 'c47', 'c51'], 'query': ['25']},
 12: {'ref': ['c14', 'c22'], 'query': ['11']},
 13: {'ref': ['c24', 'c25'], 'query': ['10']},
 14: {'ref': ['c26'

In [4]:
inte_groups = list(integraion_group.keys())
#inte_groups = [f'inte_{_group}' for _group in inte_groups]

In [5]:
notebook_snakemake(
    work_dir="Round2/",
    notebook_dir="template/",
    groups=inte_groups,
    default_cpu=8,
    default_mem_gb=10,
    redo_prepare=False,
)

## Add Cell IDs

In [14]:
n_min_cells = 50

In [15]:
for group_name in inte_groups:
    _dict = integraion_group[group_name]
    mc_l1s = _dict['ref']
    m3c_l1s = _dict['query']
    
    mc_cells = mc_annot[mc_annot['l1'].isin(mc_l1s)].index
    m3c_cells = m3c_annot[m3c_annot['l1'].isin(m3c_l1s)].index
    if len(mc_cells) < n_min_cells or len(m3c_cells) < n_min_cells:
        print(group_name)
    else:
        save_index(f"Round2/{group_name}/mc_cells.txt", mc_cells)
        save_index(f"Round2/{group_name}/m3c_cells.txt", m3c_cells)
