In [1]:
import os
import os.path as osp
import numpy as np
import pandas as pd
import scipy
import torch
from sklearn import metrics
import pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
import itertools
import argparse
import warnings

warnings.filterwarnings('ignore')
%matplotlib inline

In [6]:
input_dir = '/data/leslie/suny4/processed_input'
atac_dir = osp.join(input_dir, 'atac')
chrom = 1
ct = 'mycGCB_am_gfp_myc_gcb_thelp_sample'

In [5]:
def get_metacell_profile(tile_dict, nbrs):
    metacell_tile_dict = {}
    metacell = nbrs
    for chrom in list(tile_dict.keys()):
        metacell_tile_dict[chrom] = (scipy.sparse.csr_matrix(metacell) * tile_dict[chrom])
    return metacell_tile_dict


def cpu_jaccard_vstripe(x):
    size = x.shape[1]
    eps=1e-8

    x = torch.where(x>0.0, torch.tensor([1.0]), torch.tensor([0.0]))
    num = torch.mm(x, x.transpose(0,1))
    
    x = torch.where(x==0.0, torch.tensor([1.0]), torch.tensor([0.0]))
    denom = torch.mm(x, x.transpose(0,1))
    denom = size - denom

    num = torch.div(num, torch.max(denom, eps * torch.ones_like(denom)))
    return num


def cpu_batch_corcoeff_vstripe(x):
    c = cpu_jaccard_vstripe(x.permute(1,0))
    c[c != c] = 0
    return c


def calc_jaccard(scatac, chrom, start):
    tmp = cpu_batch_corcoeff_vstripe(
        torch.tensor(scatac['chr{}'.format(chrom)][:,start*20:(start+700)*20].toarray())
    )
    tmp = tmp.reshape(tmp.shape[0]//20,20,-1).mean(axis=1).reshape(-1,tmp.shape[1]//20,20).mean(axis=2)
    return tmp


def load_atac_data(atac_dir, chrom, ct):
    atac = pickle.load(open(osp.join(atac_dir,'{}_tile_pbulk_50bp_dict.p'.format(ct)),'rb'))
    atac = atac['chr{}'.format(chrom)]
    scatac = pickle.load(open(osp.join(atac_dir,'{}_tile_500bp_dict.p'.format(ct)),'rb'))
    metacell_path = pd.read_csv(osp.join(atac_dir,'{}_metacell_mask.csv'.format(ct)),index_col=0).values
    scatac = get_metacell_profile(scatac, metacell_path)
    return atac, scatac

In [15]:
scatacfile = pickle.load(open(osp.join(atac_dir,'{}_tile_500bp_dict.p'.format(ct)),'rb'))
scatac_test = scatacfile['chr{}'.format(chrom)]
metacell_test = pd.read_csv(osp.join(atac_dir,'{}_metacell_mask.csv'.format(ct)),index_col=0).values

In [16]:
scatac_test = metacell_tiledict = scipy.sparse.csr_matrix(metacell_test) * scatac_test

In [17]:
scatac_test

<27x390944 sparse matrix of type '<class 'numpy.float64'>'
	with 1607534 stored elements in Compressed Sparse Row format>

In [8]:
atac, scatac = load_atac_data(atac_dir, chrom, ct)
jaccard = calc_jaccard(scatac, chrom, 12345)

In [9]:
jaccard

tensor([[0.0390, 0.0105, 0.0112,  ..., 0.0331, 0.0285, 0.0212],
        [0.0105, 0.0150, 0.0071,  ..., 0.0142, 0.0148, 0.0145],
        [0.0112, 0.0071, 0.0402,  ..., 0.0196, 0.0162, 0.0154],
        ...,
        [0.0331, 0.0142, 0.0196,  ..., 0.1175, 0.0482, 0.0469],
        [0.0285, 0.0148, 0.0162,  ..., 0.0482, 0.0989, 0.0451],
        [0.0212, 0.0145, 0.0154,  ..., 0.0469, 0.0451, 0.0840]])