## Import

In [None]:
from wmb import brain, cemba, mm10
import seaborn as sns
import anndata
import dask
import numpy as np
from ALLCools.clustering import *
from ALLCools.mcds import MCDS
from ALLCools.plot import *

import pandas as pd
import matplotlib.pyplot as plt
from ALLCools.clustering import cluster_enriched_features, log_scale

In [None]:
var_dim = 'geneslop2k-vm23'
chrom_to_remove = ['chrX', 'chrY', 'chrM', 'chrL']
downsample = 1000
mc_type = 'CHN'

## Select cells

In [None]:
cells = pd.read_csv('mc_cells.txt', index_col=0, header=None).index
cells.name = 'cell'
cells.size

## Get adata with shared features

In [None]:
mcds = MCDS.open(cemba.CEMBA_SNMC_MCDS_PATH, var_dim=var_dim, use_obs=cells)
mcds

In [None]:
#change merfish var to ids
merfish = anndata.read_h5ad('./adata/ori_merfish_input.h5ad')
merfish_genes_ids = [mm10.gene_name_to_id(gene_name) for gene_name in merfish.var_names]

for i in range(0,len(merfish_genes_ids)):
    if merfish_genes_ids[i] == mm10.gene_name_to_id(merfish.var_names[i]):
        continue
    else:
        print(merfish.var_names[i])
        
merfish.var_names = merfish_genes_ids

In [None]:
# get shared genes
all_mc_genes = mcds.get_index(var_dim).tolist()
shared_genes = set(all_mc_genes) & set(list(merfish.var_names))
shared_genes = pd.Index(shared_genes)
shared_genes.name = var_dim

In [None]:
shared_genes

In [None]:
merfish = merfish[:,merfish.var_names.isin(shared_genes)]

In [None]:
mcds.add_feature_selection_column(shared_genes, f'{var_dim}_shared_genes')
adata = mcds.get_adata(mc_type=mc_type,
                       select_hvf=f'{var_dim}_shared_genes',
                       split_large_chunks=False,
                      )

## Preprocessing and save

In [None]:
# log mC fraction and scale features
log_scale(adata, with_mean=True)

# reverse mC fraction so its positively corr with RNA
adata.X *= -1

In [None]:
adata

In [None]:
merfish

In [None]:
assert len(adata.var_names) == len(merfish.var_names)
assert len(set(adata.var_names) ^ set(merfish.var_names)) == 0

In [None]:
adata.write_h5ad('./adata/mc_input.h5ad')
merfish.write_h5ad('./adata/merfish_input.h5ad')