In [None]:
import yaml
import pathlib
import anndata
import pandas as pd

In [None]:
cluster_dir = '../L4/'

cluster_dir = pathlib.Path(cluster_dir).absolute()
cluster_dir.mkdir(exist_ok=True)

In [None]:
def prepare_group(group, cells):
    group_dir = cluster_dir / group
    group_dir.mkdir(exist_ok=True)
    config_dir = group_dir / 'config'
    config_dir.mkdir(exist_ok=True)

    # subset cells
    with open(group_dir / 'select_cells.txt', 'w') as f:
        f.write('\n'.join(cells.index))

    for path in pathlib.Path('config/').glob('*.yaml'):
        with open(path) as f, open(config_dir / path.name, 'w') as outf:
            config = yaml.safe_load(f)

            # adjust the default parameters for all the groups
            if 'select_cells' in config:
                config['select_cells'] = 'select_cells.txt'
            if path.name == '06.yaml':
                # key parameter in clustering step
                config['leiden_resolution'] = 0.3
                config['target_accuracy'] = 0.92
                config['min_cluster_size'] = 30
                config['n_jobs'] = 10
            yaml.safe_dump(config, outf)
    return

In [None]:
adata_paths = pathlib.Path('../L3/').glob('c*/adata.with_coords.h5ad')

i = 0
for adata_path in adata_paths:
    adata = anndata.read_h5ad(adata_path)
    n_cluster = adata.obs['L1'].unique().size
    if n_cluster == 1:
        continue
    
    for group, cells in adata.obs.groupby('L1'):
        group = adata_path.parent.name + group
        if cells.shape[0] < 65:
            # < 60 means it won't be split to two 30
            continue
        print(i, group, cells.shape[0])
        i += 1
        prepare_group(group, cells)

In [None]:
# copy the snakefile
with open('../L4/Snakefile', 'w') as outf, open('Snakefile') as f:
    snakefile = f.read()
    snakefile = snakefile.replace('REPLACE_TEMPLATE_DIR',
                                  str(pathlib.Path().absolute()))
    outf.write(snakefile)

## Collect html

In [None]:
import pathlib
import subprocess
import anndata

In [None]:
!mkdir L4

In [None]:
out_dir = './L4'
count = 0
for group_dir in pathlib.Path('../L4/').glob('c*/'):
    path = group_dir / '06-Clustering.ipynb'
    count += 1
    if not pathlib.Path(f'{out_dir}/{path.parent.name}-{path.name}.html').exists():    
        subprocess.run(
            f'jupyter nbconvert --to html --no-input ' \
            f'--output {path.parent.name}-{path.name} --output-dir {out_dir} {path}',
            shell=True, check=True
        )
        
    path = group_dir / '07-Plot.ipynb'
    if not pathlib.Path(f'{out_dir}/{path.parent.name}-{path.name}.html').exists(): 
        subprocess.run(
            f'jupyter nbconvert --to html --no-input ' \
            f'--output {path.parent.name}-{path.name} --output-dir {out_dir} {path}',
            shell=True, check=True
        )

In [None]:
import pathlib

In [None]:
sub_dirs = [
    'c30c1c0', 'c0c0c14', 'c0c0c16', 'c0c2c15', 'c1c0c0', 'c1c1c0', 'c1c1c1',
    'c1c1c2', 'c1c1c3', 'c1c2c3', 'c1c3c0', 'c1c4c3', 'c1c5c1', 'c1c6c0',
    'c1c6c1', 'c2c3c3', 'c2c4c1', 'c2c5c0', 'c3c2c0', 'c3c3c1', 'c3c3c2',
    'c4c0c2', 'c4c1c0', 'c4c1c1', 'c4c2c4', 'c4c6c0', 'c4c9c0', 'c4c9c1',
    'c6c2c1', 'c6c3c1', 'c6c4c2', 'c6c6c0', 'c6c8c0', 'c7c1c5', 'c7c2c0',
    'c8c2c1', 'c8c7c2', 'c8c8c0', 'c9c1c0', 'c10c1c2', 'c11c4c0', 'c13c0c0',
    'c14c1c1', 'c16c0c0', 'c16c0c1', 'c17c1c0', 'c17c2c1', 'c25c2c0', 'c26c0c1'
]

In [None]:
for sub_dir in sub_dirs:
    with open(f'../L4/{sub_dir}/config/06.yaml') as f:
        config = yaml.safe_load(f)
        config['leiden_resolution'] = 0.2
        config['target_accuracy'] = 0.9
        
    with open(f'../L4/{sub_dir}/config/06.yaml', 'w') as f:    
        yaml.safe_dump(config, f)

In [None]:
from wmb import brain, cemba

In [None]:
df = cemba.get_m3c_mapping_metric()

In [None]:
df['DissectionRegion'].isna().sum()