In [1]:
import numpy as np
from scipy import signal
import cooler
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
def gaussian_2d(x, y, mu1=0, mu2=0, sigma1=1, sigma2=1):
    return np.exp(-((x-mu1)**2/sigma1**2 + (y-mu2)**2/sigma2**2))
def plot_matrix(mat,title,cmap,vmin,vmax,show):
    plt.figure(figsize=(4,4))
    plt.imshow(mat,interpolation='nearest',cmap=cmap,vmin=vmin,vmax=vmax)
    if show:
        plt.colorbar()
    plt.title(title)
    #不显示刻度
    plt.xticks([])
    plt.yticks([])
    plt.show()

In [3]:
def gaussian_2d_imputation(contacts, l2, d0):
    n = contacts.shape[0]
    imputed_contacts = np.zeros_like(contacts, dtype=float)
    
    # 创建一个坐标网格
    x, y = np.meshgrid(np.arange(n), np.arange(n))

    # 找到所有的接触点
    contact_points = np.where(contacts == 1)

    for i in range(n):
        for j in range(n):
            if contacts[i, j] == 1:
                imputed_contacts[i, j] = 1
            else:
                # 计算所有接触点与当前点的距离
                distances = (contact_points[0] - i)**2 + (contact_points[1] - j)**2
                
                # 只考虑在d0范围内的接触点
                mask = distances <= d0**2
                
                # 计算高斯和
                gaussian_sum = np.sum(np.exp(-distances[mask] / (2 * l2 ** 2)))
                
                imputed_contacts[i, j] = gaussian_sum / (2 * np.pi * l2 ** 2)
    
    return imputed_contacts

In [4]:
imputed_matrix_dir = f"/shareb/mliu/evaluate_impute/data/imputed_data/scl"

In [28]:
target = "Cell2020"
print(target)
downsample_matrix_dir = f"/shareb/mliu/evaluate_impute/data/simulation_hic/{target}/hic/matrix"
for group in range(1,5):
    print(group)
    downsample_matrix = np.load(f"{downsample_matrix_dir}/{target}_sample_matrix{group}.npy")
    downsample_matrix = downsample_matrix[0:100,:]
    imputed_matrices = np.array([gaussian_2d_imputation(mat, l2=2, d0=15) for mat in downsample_matrix])
    #对imputed_matrices中的每一项取-np.log2
    for mat in imputed_matrices:
        np.fill_diagonal(mat, np.nan)

    print(imputed_matrices.shape)
    #保存
    save_path = f"{imputed_matrix_dir}/scl_imputed_{target}_group{group}.npy"
    print(save_path)
    np.save(save_path, imputed_matrices)

Cell2020
1
(100, 243, 243)
/shareb/mliu/evaluate_impute/data/imputed_data/scl/scl_imputed_Cell2020_group1.npy
2
(100, 243, 243)
/shareb/mliu/evaluate_impute/data/imputed_data/scl/scl_imputed_Cell2020_group2.npy
3
(100, 243, 243)
/shareb/mliu/evaluate_impute/data/imputed_data/scl/scl_imputed_Cell2020_group3.npy
4
(100, 243, 243)
/shareb/mliu/evaluate_impute/data/imputed_data/scl/scl_imputed_Cell2020_group4.npy


In [5]:
## real data

In [8]:
chr_list = ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22"]
sc_matrix_dir = f"/shareb/mliu/evaluate_impute/data/real_sc_hic/Ramani/cooler"
for chr in chr_list:
    print(chr)
    imputed_matrices_list = []
    for cell in range(0,620):
        print(f"cell{cell}")
        clr = cooler.Cooler(f"{sc_matrix_dir}/cell{cell}.cool")
        matrix_chr = clr.matrix(balance=False).fetch(chr)
        matrix_chr_imputed = gaussian_2d_imputation(matrix_chr, l2=2, d0=15)
        imputed_matrices_list.append(matrix_chr_imputed)
    imputed_matrices_array = np.array(imputed_matrices_list)
    print(imputed_matrices_array.shape)
    save_path = f"{imputed_matrix_dir}/scl_imputed_Ramani_{chr}.npy"
    np.save(save_path, imputed_matrices_array)

chr1
cell0
cell1
cell2
cell3
cell4
cell5
cell6
cell7
cell8
cell9
cell10
cell11
cell12
cell13
cell14
cell15
cell16
cell17
cell18
cell19
cell20
cell21
cell22
cell23
cell24
cell25
cell26
cell27
cell28
cell29
cell30
cell31
cell32
cell33
cell34
cell35
cell36
cell37
cell38
cell39
cell40
cell41
cell42
cell43
cell44
cell45
cell46
cell47
cell48
cell49
cell50
cell51
cell52
cell53
cell54
cell55
cell56
cell57
cell58
cell59
cell60
cell61
cell62
cell63
cell64
cell65
cell66
cell67
cell68
cell69
cell70
cell71
cell72
cell73
cell74
cell75
cell76
cell77
cell78
cell79
cell80
cell81
cell82
cell83
cell84
cell85
cell86
cell87
cell88
cell89
cell90
cell91
cell92
cell93
cell94
cell95
cell96
cell97
cell98
cell99
cell100
cell101
cell102
cell103
cell104
cell105
cell106
cell107
cell108
cell109
cell110
cell111
cell112
cell113
cell114
cell115
cell116
cell117
cell118
cell119
cell120
cell121
cell122
cell123
cell124
cell125
cell126
cell127
cell128
cell129
cell130
cell131
cell132
cell133
cell134
cell135
cell136
cell137
c