In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1" 
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1" 
import multiprocessing
multiprocessing.set_start_method('fork')
import concurrent.futures

import numpy as np
import pandas as pd
import sys
import re
from scipy import stats
import pickle

sys.path.append('/shareb/zliu/analysis/')
sys.path.append('/shareb/zliu/analysis/CHARMtools')
from CHARMtools import Cell3Ddev as Cell3D
from CHARMtools import MultiCell3D

import matplotlib.pyplot as plt
import seaborn as sns
import tqdm

import pybedtools

## Create c3d object

In [2]:
metadata = pd.read_csv("./all.metadata.tsv", sep="\t",header=None)
metadata.columns = ["cellname", "celltype","mc_15","mc_20","mc_25"]

dpt_link = pd.read_csv("/zliu_ssd/CHARM/CHARM_brain/4_cregene/metacell_allcells/droplet_pairtag/CHARM_mesc_DPT_link.tsv", sep="\t")
dpt_link["CHARM"] = dpt_link["CHARM"].apply(lambda x: x.split("_")[1])
dpt_link["Closest_DPT"] = dpt_link["Closest_DPT"].apply(lambda x: x.split("_")[1])
dpt_link_dict = {}
for i in range(dpt_link.shape[0]):
    dpt_link_dict[dpt_link.iloc[i,0]] = dpt_link.iloc[i,1]

In [3]:
def _load_cell(cellname, resolution):
    if cellname[0] == "R":
        celltype = "brain"
    else:
        celltype = "mesc"

    if celltype == "brain":
        cell = Cell3D.Cell3D(
            cellname = cellname,
            resolution = int(resolution),
            tdg_path = f"/zliu_ssd/CHARM/CHARM_brain/data/tdg/{cellname}.5k.3dg.gz",
        )
        cell.add_bed_data(path=f"/zliu_ssd/CHARM/CHARM_brain/data/fragments/atac_frags/{cellname}.atac.frag.bed.gz", column_name='atac',type="all")
        cell.add_bed_data(path=f"/zliu_ssd/CHARM/CHARM_brain/data/fragments/ct_frags/{cellname}.ct.frag.bed.gz", column_name='ct',type="all")

    else:
        dpt_cellname = dpt_link_dict[cellname]
        cell = Cell3D.Cell3D(
            cellname = cellname,
            resolution = int(resolution),
            tdg_path = f"/zliu_ssd/CHARM/CHARM_mesc/data/tdg/{cellname}.5k.3dg.gz",
        )
        cell.add_bed_data(path=f"/zliu_ssd/CHARM/CHARM_mesc/data/fragments/atac_frags/{cellname}.atac.frag.bed.gz", column_name='atac',type="all")
        cell.add_bed_data(path=f"/zliu_ssd/CHARM/CHARM_brain/4_cregene/metacell_allcells/droplet_pairtag/mesc_dpt_cells/{dpt_cellname}.ct.frag.bed.gz", column_name='ct',type="all")

    cell.add_chrom_length(chrom_length_path = "/share/Data/public/ref_genome/mouse_ref/M23/raw_data/chr.dip.len")
    cell.calc_expected(n_diag=401)
    cell.tdg = cell.get_data(if_dense=True)
    
    cell.to_disk(on_disk_path = f"/zliu_ssd/CHARM/CHARM_brain/4_cregene/metacell_allcells/c3d/")
    return cell

with concurrent.futures.ProcessPoolExecutor(250,mp_context=multiprocessing.get_context("fork")) as executor:
   cells = list(tqdm.tqdm(executor.map(_load_cell, metadata['cellname'], 5000*np.ones(metadata.shape[0])), total=metadata.shape[0]))

brain = MultiCell3D.MultiCell3D(cells)

with open("brain.pkl", "wb") as f:
    pickle.dump(brain, f)


100%|██████████| 3456/3456 [51:25<00:00,  1.12it/s]  
