In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
from pathlib import Path
from anndata import AnnData
import newick

In [2]:
root = Path("output_f")
output_dir = Path("adatas")
!mkdir -p $output_dir

In [3]:
def create_anndata(p_a: float, step_size : float):
    adata = AnnData(X=pd.read_csv(f"./{root}/counts_tedsim_{p_a}_{step_size}.csv").T.values,
                    obs=pd.read_csv(f"./{root}/cell_meta_tedsim_{p_a}_{step_size}.csv", index_col=0),
                    obsm={"barcodes": pd.read_csv(f"./{root}/character_matrix_{p_a}_{step_size}.txt", sep=" ").values})
    bcs = adata.obsm['barcodes']
    if not np.issubdtype(bcs.dtype, int):
        bcs[bcs == '-'] = -1
        bcs = bcs.astype(int)
        adata.obsm['barcodes'] = bcs

    adata.obs['cluster'] = adata.obs['cluster'].astype("category")
    adata.obs['depth'] = adata.obs['depth'].astype(int)
    adata.obs['parent'] = adata.obs['parent'].astype("category")
    with open(root / f'tree_gt_bin_tedsim_{p_a}_{step_size}.newick', 'r') as fin:
        tree = newick.load(fin)[0].newick
    
    p_a, step_size = float(p_a), float(step_size)
    adata.uns['tree'] = tree
    adata.uns['metadata'] = {'p_a': p_a, 'step_size': step_size}
    print(p_a, step_size, "dropout%:", (adata.obsm['barcodes'] == -1).sum() / np.prod(adata.shape))
        
    return adata

In [4]:
p_as = [0.2, 0.4, 0.6, 0.8, 1]
step_sizes = [0.2, 0.4, 0.6, 0.8, 1]

In [5]:
for p_a in p_as:
    for step_size in step_sizes:
        adata = create_anndata(p_a, step_size)
        adata.write_h5ad(output_dir / f"adata_pa{p_a}_ss{step_size}.h5ad")



0.2 0.2 dropout%: 0.0
0.2 0.4 dropout%: 0.0
0.2 0.6 dropout%: 0.0
0.2 0.8 dropout%: 0.0
0.2 1.0 dropout%: 0.0
0.4 0.2 dropout%: 0.0
0.4 0.4 dropout%: 0.0
0.4 0.6 dropout%: 0.0
0.4 0.8 dropout%: 0.0
0.4 1.0 dropout%: 0.0
0.6 0.2 dropout%: 0.0
0.6 0.4 dropout%: 0.0
0.6 0.6 dropout%: 0.0
0.6 0.8 dropout%: 0.0
0.6 1.0 dropout%: 0.0
0.8 0.2 dropout%: 0.0
0.8 0.4 dropout%: 0.0
0.8 0.6 dropout%: 0.0
0.8 0.8 dropout%: 0.0
0.8 1.0 dropout%: 0.0
1.0 0.2 dropout%: 0.0
1.0 0.4 dropout%: 0.0
1.0 0.6 dropout%: 0.0
1.0 0.8 dropout%: 0.0
1.0 1.0 dropout%: 0.0
