In [None]:
import yaml
import pathlib
import anndata
import pandas as pd

In [None]:
cluster_dir = '../L3/'

cluster_dir = pathlib.Path(cluster_dir).absolute()
cluster_dir.mkdir(exist_ok=True)

In [None]:
def prepare_group(group, cells):
    group_dir = cluster_dir / group
    group_dir.mkdir(exist_ok=True)
    config_dir = group_dir / 'config'
    config_dir.mkdir(exist_ok=True)

    # subset cells
    with open(group_dir / 'select_cells.txt', 'w') as f:
        f.write('\n'.join(cells.index))

    for path in pathlib.Path('config/').glob('*.yaml'):
        with open(path) as f, open(config_dir / path.name, 'w') as outf:
            config = yaml.safe_load(f)

            # adjust the default parameters for all the groups
            if 'select_cells' in config:
                config['select_cells'] = 'select_cells.txt'
            if path.name == '06.yaml':
                # key parameter in clustering step
                config['leiden_resolution'] = 0.3
                config['target_accuracy'] = 0.9
                config['min_cluster_size'] = 30
            yaml.safe_dump(config, outf)
    return

In [None]:
adata_paths = pathlib.Path('../L2/').glob('c*/adata.with_coords.h5ad')

for adata_path in adata_paths:
    adata = anndata.read_h5ad(adata_path)
    n_cluster = adata.obs['L1'].unique().size
    if n_cluster == 1:
        print(adata_path.parent.name, adata.shape[0])
        continue
    
    for group, cells in adata.obs.groupby('L1'):
        group = adata_path.parent.name + group
        if cells.shape[0] < 60:
            print(group, cells.shape[0])
            # < 60 means it won't be split to two 30
            continue
        prepare_group(group, cells)

In [None]:
# copy the snakefile
with open('../L3/Snakefile', 'w') as outf, open('Snakefile') as f:
    snakefile = f.read()
    snakefile = snakefile.replace('REPLACE_TEMPLATE_DIR',
                                  str(pathlib.Path().absolute()))
    outf.write(snakefile)

## Collect html

In [None]:
import pathlib
import subprocess

In [None]:
!mkdir L3

In [None]:
out_dir = './L3'
for group_dir in pathlib.Path('../L3/').glob('c*/'):
    path = group_dir / '06-Clustering.ipynb'
    if not pathlib.Path(f'{out_dir}/{path.parent.name}-{path.name}.html').exists():    
        subprocess.run(
            f'jupyter nbconvert --to html --no-input ' \
            f'--output {path.parent.name}-{path.name} --output-dir {out_dir} {path}',
            shell=True, check=True
        )
        
    path = group_dir / '07-Plot.ipynb'
    if not pathlib.Path(f'{out_dir}/{path.parent.name}-{path.name}.html').exists(): 
        subprocess.run(
            f'jupyter nbconvert --to html --no-input ' \
            f'--output {path.parent.name}-{path.name} --output-dir {out_dir} {path}',
            shell=True, check=True
        )