In [4]:
import sys
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression as logreg
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error
from scipy.stats import spearmanr, pearsonr
import seaborn as sns
from sklearn.decomposition import PCA
# import umap
# import hdbscan
import sklearn.cluster as cluster
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score
import matplotlib.pyplot as plt
# %matplotlib notebook
from matplotlib.ticker import MaxNLocator
# import shap
import random
import itertools
import warnings
from tqdm import tqdm
warnings.filterwarnings('ignore')

assays = ['H3K36me3', 'H3K27me3', 'H3K27ac', 'H3K4me1', 'H3K4me3', 'H3K9me3']
assay_index_choice = 5 # 1-based

RESOLUTION = 25
window_size = 401
operative_half_window_size = 84 # will lead to a total of the central 2*operative_half_window_size + 1 features being chosen
pairwise_flag = True # If true then H3K27ac features will be concatentated with assays[assay_index_choice-1]
if(pairwise_flag):
    pairwise_multiplier = 0
else:
    pairwise_multiplier = 1

In [None]:
colnames = ["pos_" + str(x) for x in list(range(-(window_size//2), (window_size//2) + 1))]
colnames = colnames + ["log10p1(TPM)", "cell_type", "chrom", "position", "strand", "assay_index"] 
df = pd.read_csv('../../Data/Training_Data.T13.sorted.csv', sep=",", names=colnames, low_memory=False)
df_sorted = df.sort_values(['chrom', 'position', 'cell_type', 'assay_index'], ascending=[1, 1, 1, 1])
df_sorted_unique = df_sorted.drop_duplicates()
df_H3K27ac = df_sorted_unique.iloc[range(3-1, len(df_sorted_unique), len(assays)), :]
df_other_assay = df_sorted_unique.iloc[range(assay_index_choice-1, len(df_sorted_unique), len(assays)), :]
df_merged = pd.merge(df_H3K27ac, df_other_assay, how='inner', on=['cell_type', 'chrom', 'position', 'strand'], suffixes=['_H3K27ac', '_'+assays[assay_index_choice-1]])

In [None]:
df_merged

In [None]:
TPMs = np.loadtxt('../../Data/Gene_Expression_Data/TPMs', usecols=(0,))
log10p1_TPMs = [np.log10(x+1) for x in list(TPMs)]
quantiles = [-1000000] + list(np.unique(np.quantile(log10p1_TPMs, np.arange(0, 1, 0.2)))) + [+1000000]
print(quantiles)
df_merged_quantiles = {}
for i in range(1, 6):
    df_merged_quantiles[i] = df_merged.loc[(df_merged["log10p1(TPM)_H3K27ac"] > quantiles[i-1]) & (df_merged["log10p1(TPM)_H3K27ac"] <= quantiles[i])]
    print(i, len(df_merged_quantiles[i]))

In [None]:
CXCR4_TPMs = [0.00, 0.86, 0.72, 12.94, 0.32, 0.25, 3.98, 2.36, 616.72, 0.88, 17.22, 23.13, 11.9742]
TGFBR1_TPMs = [4.40, 5.61, 21.74, 25.04, 15.35, 0.91, 0.99, 2.29, 4.45, 36.01, 6.71, 46.59, 0.724783]
CXCR4_log10p1_TPMs = [np.log10(x+1) for x in CXCR4_TPMs]
TGFBR1_log10p1_TPMs = [np.log10(x+1) for x in TGFBR1_TPMs]
print("CXCR4 HEK293T log10p1 TPM =", round(CXCR4_log10p1_TPMs[-1], 3), 
      "\nTGFBR1 HEK293T log10p1 TPM =", round(TGFBR1_log10p1_TPMs[-1], 3))

In [None]:
# Define quantile to train and test on, in [1, 6)
for quantile in [0]: # [1,2,3,4,5]:

    print("TPM quantile", quantile)
    middle_position_first_feature = window_size // 2
    middle_position_second_feature = -1 + ( (window_size + 6) + (window_size + 6 + window_size + 1) )//2

    df_two_assays = df_merged.iloc[:, 
                                   np.r_[middle_position_first_feature-(operative_half_window_size):
                                         middle_position_first_feature+(operative_half_window_size)+1, 
                                         middle_position_second_feature-(operative_half_window_size):
                                         middle_position_second_feature+(operative_half_window_size)+1,
                                         window_size,
                                         window_size + 1,
                                         window_size + 2,
                                         window_size + 3]]

    df_subsampled = df_two_assays

    # Train on all but CXCR4 and TGFBR1 chromosomes
    training_chromosomes = ["chr" + str(c) for c in range(1, 14, 1)]
    training_chromosomes.remove("chr2") # exclude chr2 where CXCR4 lies
    training_chromosomes.remove("chr9") # exclude chr2 where TGFBR1 lies

    df_training = df_subsampled.loc[df_subsampled['chrom'].isin(training_chromosomes)]
    xTrain = df_training.iloc[:, pairwise_multiplier*(2 * operative_half_window_size + 1):2 * (2 * operative_half_window_size + 1)] 
    yTrain = df_training.iloc[:, 2 * (2 * operative_half_window_size + 1)]

    testing_chromosomes = ["chr" + str(c) for c in range(14, 23, 1)]
    df_testing = df_subsampled.loc[df_subsampled['chrom'].isin(testing_chromosomes)]
    xTest = df_testing.iloc[:, pairwise_multiplier*(2 * operative_half_window_size + 1):2 * (2 * operative_half_window_size + 1)] 
    yTest = df_testing.iloc[:, 2 * (2 * operative_half_window_size + 1)]

    if(len(xTrain) < 10):
        print("len(xTrain) = "+str(len(xTrain)))

    quadratic_transform = PolynomialFeatures(degree=1, interaction_only=True).fit(xTrain)
    # xTrain = quadratic_transform.transform(xTrain)
    # xTest = quadratic_transform.transform(xTest)

    # The quadratic transform step adds an additional feature to the input features
    print(xTrain.shape, xTest.shape, yTrain.shape, yTest.shape)

    # Fit a ridge regression model
    l1_alpha = 0.01
    model = linear_model.ElasticNet(alpha=l1_alpha, l1_ratio=0.0, max_iter=1000)

#     model = RandomForestRegressor(verbose=0, n_estimators=10, n_jobs=8)

    xTrain = np.asarray(xTrain)
    yTrain = np.asarray(yTrain)
    xTest = np.asarray(xTest)
    yTest = np.asarray(yTest)

    model.fit(xTrain, yTrain)

    # Compute statistics
    yTrain_Pred = model.predict(xTrain)
    yTest_Pred = model.predict(xTest)
    mse_Train = mean_squared_error(yTrain, yTrain_Pred)
    pc_Train, _ = pearsonr(yTrain, yTrain_Pred)
    sc_Train, _ = spearmanr(yTrain, yTrain_Pred)
    mse_Test = mean_squared_error(yTest, yTest_Pred)
    pc_Test, _ = pearsonr(yTest, yTest_Pred)
    sc_Test, _ = spearmanr(yTest, yTest_Pred)

    print("Train: MSE = ", round(mse_Train, 3), "Pearson =", round(pc_Train, 3), "Spearman =", round(sc_Train, 3))
    print("Test: MSE = ", round(mse_Test, 3), "Pearson =", round(pc_Test, 3), "Spearman =", round(sc_Test, 3))

    plt.plot(model.coef_)


In [None]:
# Interpret feature effect sizes from chrmt linear model
from tensorflow.keras.models import load_model
model = load_model("../../Models/odd_no_batchnorm_not_zero.hdf5", compile=False)
# model = load_model("../../Models/R7.T13.replicate.1_0_transcriptome_201_4_64_linear_mse_1.hdf5", compile=False)

In [None]:
from matplotlib.pyplot import figure
plt.rcParams["figure.figsize"] = (30, 15)

# model.layers[2].get_weights()
fig, axs = plt.subplots(2, 3)

for j in range(6):
    ax = axs[j//3, j%3]
    ax.plot(model.layers[2].get_weights()[0], 'o-')
    for i in range(7):
        ax.axvline(x=201 * i)
    for i in range(6):
        ax.axvline(x=100 + 201 * i, linestyle="dashed", color="lightblue")
    ax.set_xlim(201*j + 70, 201*(j) + 130) 
    ax.set_ylim(-0.05, 0.05)
    ax.set_title(assays[j])


In [None]:
    sys.path.insert(1, '/home/sbatra/.chrmt/Code/deepENCODE/')
    from chrmt_inference import generate_data_vectors
    from chrmt_inference import ise_pairwise_input
    from chrmt_inference import perform_ise

    # use above ridge regression model
    trained_model = model

    # Generate data vectors for CXCR4 and TGFBR1
    for cell_type_choice in range(-13, 0):
        path_to_save = "../../Data/" + "linear_model"
        xInference, yInference, gene_list, CHROM, TSS, STRAND = generate_data_vectors(cell_type_choice, 2 * operative_half_window_size + 1, path_to_save)

#         for e in xInference:
#             xInference[e] = np.expm1(xInference[e])
        
        peak_width_choices = [6, 12]
        inserted_lnp1_minuslog10_p_value_choices = [1.5]  # corresponds to 0.0003

        for MNase_offset in range(0, 0 + 1):

            fig, axs = plt.subplots(len(peak_width_choices) * len(inserted_lnp1_minuslog10_p_value_choices), 2)

            fig, _ = perform_ise(fig, axs,
                        2 * operative_half_window_size + 1,
                        "../../Data/p300_epigenome_editing_dataset.tsv", ise_pairwise_input,
                        peak_width_choices,
                        inserted_lnp1_minuslog10_p_value_choices,
                        MNase_offset,
                        xInference, yInference,
                        gene_list, CHROM, TSS, STRAND,                
                        trained_model)

            fig.show()

In [None]:
TGFBR1_features = {}
TGFBR1_TPM = {}
for i in list(range(-13, 0)):
    TGFBR1_features[i] = np.load("../../Data/linear_model.TGFBR1.CT_"+str(i)+".npy")
    TGFBR1_features[i] = np.expm1(TGFBR1_features[i])
    TGFBR1_TPM[i] = (np.power(10, np.load("../../Data/linear_model.TGFBR1.CT_"+str(i)+".TPM.npy")) - 1)

for ai in [0, 1, 2, 3, 4, 5]:

    for i in list(range(-13, 0)):  # [-11, -6]:
        plt.rcParams["figure.figsize"] = (25, 15)
        plt.plot(np.arange(-operative_half_window_size, operative_half_window_size + 1), TGFBR1_features[i][0, :, ai] - TGFBR1_features[-1][0, :, ai], label=assays[ai]+" and TPM = "+str(round(TGFBR1_TPM[i][0][0], 2)))  #"_CT"+str(i+14))
#     plt.title("TPM = " + str(round(TGFBR1_TPM[i][0][0], 2)) + " vs in HEK293T = " + str(round(TGFBR1_TPM[-1][0][0], 2)), fontsize=40)
        if(ai == 4):
            plt.ylim(-100, 200)
        else:
            plt.ylim(-5, 50)
    plt.legend(fontsize=20)
    plt.show()
    plt.close()

for ai in [2]:
    for i in list(range(-13, 0)):
        plt.plot(np.arange(-operative_half_window_size, operative_half_window_size + 1), TGFBR1_features[i][0, :, ai], label=assays[ai]+" and TPM = "+str(round(TGFBR1_TPM[i][0][0], 2)))  #"_CT"+str(i+14))

    plt.legend(fontsize=20)
    plt.ylim(-15, 50)
    plt.show()
    plt.close
    
    
# for i in range(6):
#     plt.plot(TGFBR1_T13[0, :, i], label=assays[i])
# plt.show()
# plt.legend("upper")
# plt.close()


In [None]:
CXCR4_features = {}
CXCR4_TPM = {}
for i in list(range(-13, 0)):
    CXCR4_features[i] = np.load("../../Data/linear_model.CXCR4.CT_"+str(i)+".npy")
    CXCR4_features[i] = np.expm1(CXCR4_features[i])
    CXCR4_TPM[i] = (np.power(10, np.load("../../Data/linear_model.CXCR4.CT_"+str(i)+".TPM.npy")) - 1)

for ai in [0, 1, 2, 3, 4, 5]:
    for i in list(range(-13, 0)):
        plt.rcParams["figure.figsize"] = (25, 15)
        plt.plot(np.arange(-operative_half_window_size, operative_half_window_size + 1), CXCR4_features[i][0, :, ai] - CXCR4_features[-1][0, :, ai], label=assays[ai]+" and TPM = "+str(round(CXCR4_TPM[i][0][0], 2)))  #"_CT"+str(i+14))
#         plt.title("TPM = " + str(round(CXCR4_TPM[i][0][0], 2)) + " vs in HEK293T = " + str(round(CXCR4_TPM[-1][0][0], 2)), fontsize=40)
        if(ai == 4):
            plt.ylim(-100, 200)
        else:
            plt.ylim(-15, 50)

    plt.legend(fontsize=20)
    plt.show()
    plt.close()

for ai in [2]:
    for i in list(range(-13, 0)):
        plt.plot(np.arange(-operative_half_window_size, operative_half_window_size + 1), CXCR4_features[i][0, :, ai], label=assays[ai]+" and TPM = "+str(round(CXCR4_TPM[i][0][0], 2)))  #"_CT"+str(i+14))

    plt.legend(fontsize=20)
    plt.ylim(-15, 50)
    plt.show()
    plt.close
# for i in range(6):
#     plt.plot(CXCR4_T13[0, :, i], label=assays[i])
# plt.show()
# plt.legend("upper")
# plt.close()


In [None]:
TGFBR1_log10p1_TPM.keys()

In [None]:
# Compute spearman correlations across genes for each test gene

# f_predictions = open('../../Logs/' + "R6.replicate.1_0_transcriptome_221_4_64_linear_mse_1" + '.testing_metrics.tsv', 'r')
f_predictions = open('../../Logs/' + "test" + '.testing_metrics.tsv', 'r')
yTrue = {}
yPred = {}
for line in f_predictions:
    vec = line.rstrip("\n").split("\t")
    transcript = vec[4]
    if(transcript in yTrue):
        yTrue[transcript].append(float(vec[5]))
        yPred[transcript].append(float(vec[6]))
    else:
        yTrue[transcript] = [float(vec[5])]
        yPred[transcript] = [float(vec[6])]    

f_predictions.close()    

spearman_dict = {}
CXCR4_spearman = -100
TGFBR1_spearman = -100
for transcript in yTrue:

    yT = yTrue[transcript]
    yP = yPred[transcript]
    sc, sp = spearmanr(yT, yP) 
    
    spearman_dict[transcript] = sc






In [None]:
yTrue["ENST00000374994.8"]

In [None]:
high_spearman_transcripts = [x for x in spearman_dict.keys() if spearman_dict[x] > 0.7]
for t in high_spearman_transcripts:
    if(np.max(yTrue[t]) - np.min(yTrue[t]) > 1):
        print(t, spearman_dict[t], np.max(yTrue[t]) - np.min(yTrue[t]), np.median(yTrue[t]))

In [None]:
# Given a transcript name, first compute its chrom, TSS and strand
# then create its features and visualize them
f_TSS = open("../../Data/Gene_Expression_Data/T01.tsv.TPM.headered", 'r')
chrom_dict = {}
strand_dict = {}
TSS_dict = {}
counter = 0
for line in f_TSS:
    counter += 1
    if(counter == 1):
        continue
    vec = line.rstrip("\n").split("\t")
    chrom = vec[0]
    strand = vec[3]
    transcript = vec[4]
    if(strand == "+"):
        TSS = int(vec[1])
    else:
        TSS = int(vec[2])
        
    chrom_dict[transcript] = chrom
    strand_dict[transcript] = strand
    TSS_dict[transcript] = TSS
    
    

In [7]:
from chrmt_generator import TranscriptomePredictor

# high_spearman_transcripts = [x for x in spearman_dict.keys() if spearman_dict[x] > 0.7]
# CRISPRability_transcripts = list(df_genes_2["transcript"])

df_genes_2 = pd.read_csv("../../Data/ridge.2_genes.tsv", sep='\t', header=None)

for i in range(len(df_genes_2)):
    # if(np.max(yTrue[t]) - np.min(yTrue[t]) > 1):

    chrom = df_genes_2.iloc[i, 0]
    TSS = int(df_genes_2.iloc[i, 1])
    strand = df_genes_2.iloc[i, 2]
    
    t = df_genes_2.iloc[i, 4]
    
    for cell_type_choice in tqdm(range(-13, 0)):
        prediction_generator = TranscriptomePredictor(window_size,
                       1,
                       shuffle=False,
                       mode='inference',
                       masking_probability=0.0,
                       chrom=chrom, 
                       start=TSS,
                       strand=strand,
                       cell_type_index=cell_type_choice)

        for i in range(1):
            X, Y = prediction_generator.__getitem__(i)
            print(X.shape, Y.shape)

        np.save("../../Data/saved_npy_arrays/" + "." + t + ".CT_" + str(cell_type_choice) + ".npy", X)
        np.save("../../Data/saved_npy_arrays/" + "." +  t + ".CT_" + str(cell_type_choice) + ".TPM.npy", Y)

  0%|          | 0/13 [00:00<?, ?it/s]Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T

T11 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A07.chr2.npy
T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Da

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr2.npy
T01 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr2.npy
T01 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr2.npy
T01 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr2.npy
T01 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr2.npy
T01 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr2.npy
T01 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr2.-.npy
T02 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr2.npy
T02 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr2.npy
T02 A04 chr2
Loading Epigenome data ../

T11 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr2.-.npy
T12 A02 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr2.npy
T12 A03 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr2.npy
T12 A04 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr2.npy
T12 A05 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr2.npy
T12 A06 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr2.npy
T12 A07 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr2.npy
T12 A10 chr2
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr2.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr2.+.npy ../../Data/Gene_Expression_Data/25bp_g

(1, 401, 7) (1, 1)


  0%|          | 0/13 [00:00<?, ?it/s]Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T

T11 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A07.chr9.npy
T11 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T11A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Da

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T13A02.chr9.npy
T13 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T13A03.chr9.npy
T13 A04 chr9
Loading Epigenome data ../../Data/Transform

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T13A02.chr9.npy
T13 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T13A03.chr9.npy
T13 A04 chr9
Loading Epigenome data ../../Data/Transform

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)


Loading TSS data
T01 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A02.chr9.npy
T01 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A03.chr9.npy
T01 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A04.chr9.npy
T01 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A05.chr9.npy
T01 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A06.chr9.npy
T01 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A07.chr9.npy
T01 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T01A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T01_TPM.chr9.-.npy
T02 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A02.chr9.npy
T02 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T02A03.chr9.npy
T02 A04 chr9
Loading Epigenome data ../

Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T11_TPM.chr9.-.npy
T12 A02 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A02.chr9.npy
T12 A03 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A03.chr9.npy
T12 A04 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A04.chr9.npy
T12 A05 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A05.chr9.npy
T12 A06 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A06.chr9.npy
T12 A07 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A07.chr9.npy
T12 A10 chr9
Loading Epigenome data ../../Data/Transformed_25bp_Data/T12A10.chr9.npy
Loading Transcriptome data ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.+.npy ../../Data/Gene_Expression_Data/25bp_genome_wide_TPM_npy/T12_TPM.chr9.-.npy
T13 A02 chr9
Loading Epigenome data ../../Data/

(1, 401, 7) (1, 1)





In [None]:
# Visualize these genes:
import matplotlib 
import os.path

plt.rcParams["figure.figsize"] = (30, 10)

# high_spearman_transcripts = ["CXCR4", "TGFBR1"] + [x for x in spearman_dict.keys() if spearman_dict[x] > 0.7]
# for t in high_spearman_transcripts:
#     if( (t != "CXCR4") and (t != "TGFBR1") ):
#         if(np.max(yTrue[t]) - np.min(yTrue[t]) <= 1):
#             continue

for i in range(len(df_genes_2)): # ["ENST00000250003", "ENST00000361779", "ENST00000259915"]:

    t = df_genes_2.iloc[i, 4]
    
    print(str(df_genes_2.iloc[i, :]))
    features = {}
    TPM = {}
    fname_flag = True

    for i in list(range(-13, 0)):

        if(t == "CXCR4"):
            features[i] = np.load("../../Data/saved_npy_arrays/linear_model.CXCR4.CT_"+str(i)+".npy")
            features[i] = np.expm1(features[i])
            TPM[i] = (np.power(10, np.load("../../Data/saved_npy_arrays/linear_model.CXCR4.CT_"+str(i)+".TPM.npy")) - 1)

        elif(t == "TGFBR1"):
            features[i] = np.load("../../Data/saved_npy_arrays/linear_model.TGFBR1.CT_"+str(i)+".npy")
            features[i] = np.expm1(features[i])
            TPM[i] = (np.power(10, np.load("../../Data/saved_npy_arrays/linear_model.TGFBR1.CT_"+str(i)+".TPM.npy")) - 1)

        else:
            fname = "../../Data/saved_npy_arrays/." + t + ".CT_"+str(i)+".npy"
            if(os.path.isfile(fname) == False):
                fname_flag = False
                continue
            features[i] = np.load(fname)
            features[i] = np.expm1(features[i])
            TPM[i] = (np.power(10, np.load("../../Data/saved_npy_arrays/." + t + ".CT_"+str(i)+".TPM.npy")) - 1)    

    if(fname_flag == False):
        continue

    ylimit = {0:10, 1:10, 2:100, 3:10, 4:300, 5:10}    



    cmap = plt.cm.get_cmap('spring')
    # cmap.set_over('green')
    norm = matplotlib.colors.Normalize(vmin=0.0, vmax=np.log10( (10+np.max([TPM[x] for x in TPM.keys()])) ) )
    for ai in [0,1,2,3,4,5]:
        for i in list(range(-13, 0)):

            if( (t == "CXCR4") or (t == "TGFBR1") ):
                plt.plot(np.arange(-operative_half_window_size, operative_half_window_size + 1), features[i][0, :, ai], linewidth=2, color=cmap(norm(np.log10(TPM[i][0][0]) )), label=assays[ai]+" and TPM = "+str(round(TPM[i][0][0], 2)))  #"_CT"+str(i+14))            
            else:
                plt.plot(np.arange(-(window_size//2), (window_size//2) + 1), features[i][0, :, ai], linewidth=2, color=cmap(norm(np.log10(TPM[i][0][0]) )), label=assays[ai]+" and TPM = "+str(round(TPM[i][0][0], 2)))  #"_CT"+str(i+14))
                
            plt.ylim(-5, ylimit[ai])
            plt.xlabel("Bin w.r.t. TSS", fontsize=30)
            plt.ylabel("-log10(p-value)", fontsize=30)
            plt.xticks(fontsize=20, rotation=0)
            plt.yticks(fontsize=20, rotation=0)

        plt.title(t, fontsize=40)
        plt.legend(fontsize=20)
        plt.show()
        plt.close()
        
    if(t == "CXCR4"):
        t = "ENST00000241393.3"
    elif(t == "TGFBR1"):
        t = "ENST00000374994.8"

#     plt.plot(yTrue[t][:-1], yPred[t][:-1], 'o', markersize=20, color="purple")
#     plt.plot(yTrue[t][-1], yPred[t][-1], 'o', markersize=20, color="green")
    plt.xlim(-1, 3)
    plt.ylim(-1, 3)
#     plt.title("Linear Model Spearman = " + str(round(spearman_dict[t], 3)), fontsize=40)
    plt.xlabel("True log10p1(TPM)", fontsize=30)
    plt.ylabel("Predicted log10p1(TPM)", fontsize=30)
    plt.xticks(fontsize=20, rotation=0)
    plt.yticks(fontsize=20, rotation=0)
    plt.show()
    plt.close()
    
    

In [None]:
# Now we setup analysis of each gene to find positions for gRNAs
# First load the epigenetic data of the gene, then given a model, plot the model predictions
# We then use CRISPOR to find the gRNAs

sys.path.insert(1, '/home/sbatra/.chrmt/Code/deepENCODE/')
from chrmt_inference import perturb_x

trained_model = load_model("../../Models/even_not_zero.hdf5")
# trained_model = load_model("../../Models/even_no_batchnorm_not_zero.hdf5")

start_index = 401 // 2 - 20
end_index = 401 // 2 + 20

for t in ["ENST00000241393.3"]:

    epigenetic_features = np.load("../../Data/saved_npy_arrays/." + t + ".CT_"+str(-1)+".npy")
    TPM = (np.power(10, np.load("../../Data/saved_npy_arrays/." + t + ".CT_"+str(-1)+".TPM.npy")) - 1)[0][0]    
    # print(epigenetic_features.shape, TPM)

    for inserted_peak_width in [6,7,8]:
        for inserted_lnp1_minuslog10_p_value in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.5]: # , 2.0, 2.5, 3.0, 3.5, 4.0]:

            l_fold_change = []
            for bin_wrt_tss in range(-80, 80):
                perturbed_epigenetic_features = perturb_x(epigenetic_features[:, 100:300 + 1, :], bin_wrt_tss, inserted_peak_width, inserted_lnp1_minuslog10_p_value, 0)
                predicted_log10p1_TPM = trained_model.predict(perturbed_epigenetic_features[:, :, :-1])
                predicted_TPM = (np.power(10, predicted_log10p1_TPM) - 1)[0][0]
                predicted_native_TPM = np.power(10, trained_model.predict(epigenetic_features[:, 100:300 + 1, :-1])[0][0]) -1
                l_fold_change.append((bin_wrt_tss, predicted_TPM / predicted_native_TPM))

    #             plt.imshow(epigenetic_features[:, 100:301, 2])
    #             plt.show()
    #             plt.imshow(perturbed_epigenetic_features[:, :, 2])
    #             plt.show()

            plt.plot([x[0] for x in l_fold_change], [x[1] for x in l_fold_change], 'o-',
                     label=str(inserted_peak_width)+"_"+str(inserted_lnp1_minuslog10_p_value))
            plt.xlim(-10, 10)

    plt.legend()

    plt.show()
    plt.close()

    for a in range(6, 7):
        plt.plot(range(-100, 100 + 1), epigenetic_features[0, 100:300 + 1, a])
        plt.xlim(-10, 10)
        plt.show()
        plt.close()

In [None]:
df_genes_2.loc[(df_genes_2["transcript"] == "ENST00000296498.3") | 
               (df_genes_2["transcript"] == "ENST00000322002.4") |
               (df_genes_2["transcript"] == "ENST00000369887.3") |
               (df_genes_2["transcript"] == "ENST00000380392.3") |
               (df_genes_2["transcript"] == "ENST00000258787.11") |
               (df_genes_2["transcript"] == "ENST00000221972.7") ]


In [None]:
# Compute scatter plots of true vs predicted foe CXCR4 and TGFBR1 for linear and non-linear models

In [None]:
        
        
# Analyze results from the various modeling choices
df_inference_sweep = pd.read_csv("/home/sbatra/.chrmt/Results/inference_sweep.results.txt", sep="\t",
                                 names=["run_name", "window_size", "model_type", "loss_type", "mle_lambda", "CXCR4_spearman", "TGFBR1_spearman"])
df_inference_sweep = df_inference_sweep.iloc[:320, :]
df_inference_sweep

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

with PdfPages("../../Results/inference_sweep.pdf") as pdf:
    
    rename_run = {"R6.replicate.1":"Train on all cell types", "R7.T13.replicate.1":"Train only on HEK293T"}
    for run_name_choice in list(rename_run.keys()):
        for model_type_choice in ["linear", "maxpool"]:
            for loss_type_choice in ["mse", "mle"]:
                for mle_lambda_choice in [0.01, 0.1, 1, 10, 100]:

                    df_subset = df_inference_sweep.loc[(df_inference_sweep.model_type == model_type_choice) &
                                           (df_inference_sweep.loss_type == loss_type_choice) & 
                                           (df_inference_sweep.mle_lambda == mle_lambda_choice) & 
                                           (df_inference_sweep.run_name == run_name_choice)]

                    window_sizes = list(df_subset.iloc[:, 1])
                    CXCR4_spearmans = [float(x) for x in list(df_subset.iloc[:, 5])]
                    TGFBR1_spearmans = [float(x) for x in list(df_subset.iloc[:, 6])]

                    if(loss_type_choice == "mse"):
                        mle_lambda_choice = "NA"

                    plt.rcParams["figure.figsize"] = (10, 10)
                    plt.plot(window_sizes, CXCR4_spearmans, 'o-', label="CXCR4", color="darkgreen", markersize=5)
                    plt.plot(window_sizes, TGFBR1_spearmans, 'o-', label="TGFBR1", color="darkblue", markersize=5)
                    plt.xlim(150, 310)
                    plt.ylim(-0.25, +0.75)
                    plt.xlabel("Input window size", fontsize=10)
                    plt.ylabel("Spearman correlation with Alan's mean fold change")
                    plt.legend(fontsize=15)
                    plt.title("Run: " + str(rename_run[run_name_choice]) + 
                              "\nModel: " + str(model_type_choice) +
                              "\nLoss: " + str(loss_type_choice) + 
                              "\nMLE lambda: " + str(mle_lambda_choice), fontsize=20)
                    pdf.savefig()
                    plt.close()

In [None]:
# Here we process the results from a ridge regression model (without batchnorm) 
def compute_features_list(f_predictions):

    yTrue = {}
    yPred = {}
    for line in f_predictions:
        vec = line.rstrip("\n").split("\t")
        transcript = vec[4]
        if(transcript in yTrue):
            yTrue[transcript].append(float(vec[5]))
            yPred[transcript].append(float(vec[6]))
        else:
            yTrue[transcript] = [float(vec[5])]
            yPred[transcript] = [float(vec[6])]    

    f_predictions.close()    

    spearman_dict = {}
    features_dict = {}
    for transcript in yTrue:

        yT = yTrue[transcript]
        yP = yPred[transcript]
        sc, sp = spearmanr(yT, yP) 

        spearman_dict[transcript] = np.nan_to_num(sc)

        features_dict[transcript] = [np.mean(yT), np.max(yT) - np.min(yT), np.mean(yP), np.max(yP) - np.min(yP), spearman_dict[transcript]]

    return features_dict

# Now we create a dataframe from these results where each row is a gene 
# and the columns are various properties of the true and predicted expression across the cell types 
# for the trained ridge regression model
# df_genes = 

odd_not_zero_features = compute_features_list(open("../../Logs/odd_no_batchnorm_not_zero.testing_metrics.tsv", 'r'))
odd_zero_features = compute_features_list(open("../../Logs/odd_no_batchnorm_zero.testing_metrics.tsv", 'r'))
even_not_zero_features = compute_features_list(open("../../Logs/even_no_batchnorm_not_zero.testing_metrics.tsv", 'r'))
even_zero_features = compute_features_list(open("../../Logs/even_no_batchnorm_zero.testing_metrics.tsv", 'r'))


In [None]:
l_odd = []
for t in odd_not_zero_features.keys():
    l_odd.append([t] + odd_not_zero_features[t] + odd_zero_features[t])

l_even = []
for t in even_not_zero_features.keys():
    l_even.append([t] + even_not_zero_features[t] + even_zero_features[t])

df_genes = pd.DataFrame(l_odd + l_even, columns=["transcript", "mean_log10p1_TPM_True", "max_minus_min_log10p1_TPM_True", 
                                            "mean_log10p1_TPM_Predicted", "max_minus_min_log10p1_TPM_Predicted", "spearman",
                                           "mean_log10p1_TPM_True_zero", "max_minus_min_log10p1_TPM_True_zero",
                                           "mean_log10p1_TPM_Predicted_zero", "max_minus_min_log10p1_TPM_Predicted_zero", "spearman_zero"])

df_genes


In [None]:
f_ENSEMBL = open("../../Data/ENSEMBL_to_transcript.tsv", 'r')
gene_name = {}
for line in f_ENSEMBL:
    vec = line.rstrip("\n").split("\t")
    gene_name[vec[0]] = vec[2]

ll = []
for index, row in df_genes.iterrows():
    t = row[0]
    if(t in gene_name):
        ll.append(list(row) + [gene_name[t]])

df_genes_2 = pd.DataFrame(ll, columns=["transcript", "mean_log10p1_TPM_True", "max_minus_min_log10p1_TPM_True", 
                                            "mean_log10p1_TPM_Predicted", "max_minus_min_log10p1_TPM_Predicted", "spearman",
                                           "mean_log10p1_TPM_True_zero", "max_minus_min_log10p1_TPM_True_zero",
                                           "mean_log10p1_TPM_Predicted_zero", "max_minus_min_log10p1_TPM_Predicted_zero", "spearman_zero", "gene_name"])
df_genes_2.to_csv("../../Data/ridge.tsv", sep='\t')


In [None]:
df_genes_2["spearman_delta"] = df_genes_2["spearman_zero"] - df_genes_2["spearman"]

# Compute percentile thresholds for each quantity
mean_TPM_True_thresholds = list(np.percentile(df_genes_2["mean_log10p1_TPM_True"], [25, 75]))

max_minus_min_TPM_True_thresholds = list(np.percentile(df_genes_2["max_minus_min_log10p1_TPM_True"], [50, 100]))

spearman_thresholds = list(np.percentile(df_genes_2["spearman"], [25, 90]))

spearman_zero_thresholds = list(np.percentile(df_genes_2["spearman_zero"], [25, 90]))

print(mean_TPM_True_thresholds, max_minus_min_TPM_True_thresholds, spearman_thresholds, spearman_zero_thresholds)

In [None]:
# Mean TPM in [25, 75] percentile and Max-Min TPM in [25, 75] percentile

# High CRISPRability: Spearman, Spearman_zero > 75th percentile and Spearman_delta in [-0.1, 0]

df_genes_medium_CRISPRability = df_genes_2.loc[(df_genes_2["mean_log10p1_TPM_True"] > mean_TPM_True_thresholds[0]) & 
               (df_genes_2["mean_log10p1_TPM_True"] < mean_TPM_True_thresholds[1]) &
               (df_genes_2["max_minus_min_log10p1_TPM_True"] > max_minus_min_TPM_True_thresholds[0]) &
               (df_genes_2["max_minus_min_log10p1_TPM_True"] < max_minus_min_TPM_True_thresholds[1]) & 
               (df_genes_2["spearman"] > spearman_thresholds[0]) & 
               (df_genes_2["spearman"] < spearman_thresholds[1]) & 
               (df_genes_2["spearman_delta"] > -0.1) &
               (df_genes_2["spearman_delta"] < 0)]

In [None]:
df_genes_high_CRISPRability.to_csv("../../Data/High_CRISPRability.tsv", sep='\t')
df_genes_medium_CRISPRability.to_csv("../../Data/Medium_CRISPRability.tsv", sep='\t')
df_genes_low_CRISPRability.to_csv("../../Data/Low_CRISPRability.tsv", sep='\t')

In [None]:
# Analyze how things would be if we looked at N gRNAs, each with 2 replicates; for both CXCR4 and TGFBR1:
df_replicates = pd.read_csv("../../Data/p300_epigenome_editing_dataset.tsv", sep="\t")
df_replicates

In [None]:
rep1 = list(df_replicates.loc[df_replicates["Measurement ID"] == "EXP2-IR1-S2"].iloc[:, np.r_[6]]["Measured fold change"])
rep2 = list(df_replicates.loc[df_replicates["Measurement ID"] == "EXP2-IR2-S1"].iloc[:, np.r_[6]]["Measured fold change"])



spearmanr([rep1[i] for i in [1,2,3,7]], [rep2[i] for i in [1,2,3,7]])


In [None]:
np.r_[1,3,5]

In [None]:
# Perhaps include only these genes with unique transcripts

In [None]:
from chrmt_generator import TranscriptomePredictor

t = "ENST00000259915"

for cell_type_choice in range(-13, 0):
            prediction_generator = TranscriptomePredictor(window_size,
                           1,
                           shuffle=False,
                           mode='inference',
                           masking_probability=0.0,
                           chrom="chr6", #chrom_dict[t], 
                           start=31170682, # int(TSS_dict[t]),
                           strand="-", # strand_dict[t],
                           cell_type=cell_type_choice)

            for i in range(1):
                X, Y = prediction_generator.__getitem__(i)
                print(X.shape, Y.shape)

            np.save("../../Data/saved_npy_arrays/" + "." + t + ".CT_" + str(cell_type_choice) + ".npy", X)
            np.save("../../Data/saved_npy_arrays/" + "." +  t + ".CT_" + str(cell_type_choice) + ".TPM.npy", Y)

In [None]:
# The goal is to perform in-silico perturbation using a model trained to predict bulk gene expression from 
# epigenetic features, on CRISPRa data

cell_type_choice = 13 # corresponds to HEK293T
assay_index = assay_index_choice

inserted_pvalue_choice = 1.5 # corresponds to 0.0003
for peak_width_choice in range(6, 7, 2):
    
    for MNase_offset in range(-15, 15):
#         print("peak_width = ", peak_width_choice)
        TPM = {}

        # Perform inference for epigenome editing data
        def ise(cell_type_choice_input, 
                gene,
                gene_strand,
                gRNA_strand_dict,
                trained_model,
                assay_index_value,
                inserted_lnp1_minuslog10p_value = 3,
                peak_width = 2,
                pairwise_features=False):

            # Load epigenetic features for all assays
            X = np.load("../Data/" + gene + ".T" + '%02d' % cell_type_choice_input + ".npy")

            # obtain the middle portion of this
            X = X[:, (window_size//2)-operative_half_window_size:(window_size//2)+operative_half_window_size+1, :]

            # Perform inference by introducing p-value changes with a peak width
            yPred = []
            center = operative_half_window_size
            positions = range(center - center, center + center + 1)
            for pos in positions:
                X_modified = np.copy(X)

                # some positions won't be used in the plot and hence we don't perturb them
                if(pos not in gRNA_strand_dict):
                    gRNA_strand = "plus" # Change 1
                else:
                    gRNA_strand = gRNA_strand_dict[pos]

                ise_start_position = pos - (peak_width // 2)
                ise_end_position = pos + (peak_width // 2 + 1) 

                for p in range(ise_start_position, ise_end_position):
                    if( (p>=0) and (p < max(positions)) ):
                        if(X_modified[:, p, 2] > 10): # Remember this is ln( -log10(p-value) + 1)
                            # If H3K27me3 peak exists, then p300 doesn't work
                            print("H3K27me3 exists!")
                            pass
                        else:
                            if (pairwise_features):
                                # Modify the H3K27ac peak with the shape being derived from MNase (-1) 
                                if(gRNA_strand == "plus"):
                                    X_modified[:, p, 3] = X_modified[:, p, 3] + (X_modified[:, min(p+MNase_offset, 2*operative_half_window_size), -1] * inserted_lnp1_minuslog10p_value)
#                                     print("Delta = ", gene, peak_width_choice, gRNA_strand, pos, p, (X_modified[:, min(p+MNase_offset, 2*operative_half_window_size), -1]), file=sys.stderr)
                                else: #(gRNA_strand == "minus"):
                                    X_modified[:, p, 3] = X_modified[:, p, 3] + (X_modified[:, min(p-MNase_offset, 2*operative_half_window_size), -1] * inserted_lnp1_minuslog10p_value)
#                                     print("Delta = ", gene, peak_width_choice, gRNA_strand, pos, p, (X_modified[:, min(p-MNase_offset, 2*operative_half_window_size), -1]), file=sys.stderr)
#                                 else:
#                                     pass # Change 2
                            else:
                                # Modify the assay itself
                                X_modified[:, p, assay_index_value] += inserted_lnp1_minuslog10p_value

                # Prepare input
                if (pairwise_features):
                    X_modified = np.concatenate([X_modified[:, :, 3], X_modified[:, :, assay_index_value]], axis=1)
                else:                 
                    X_modified = X_modified[:, :, assay_index_value]

                yPred_value = trained_model.predict(quadratic_transform.transform(X_modified))
                yy = yPred_value[0]
                yPred.append(yy)

            # Prepare input for predicting native expression
            X_modified_2 = np.copy(X)
            if(pairwise_features):
                X_modified_2 = np.concatenate([X[:, :, 3], X[:, :, assay_index_value]], axis=1) 
            else:
                X_modified_2 = X[:, :, assay_index_value]

            # Instead of scaling, divide by yPred
            yPred_value = trained_model.predict(quadratic_transform.transform(X_modified_2))[0] + 0.00000001 # to avoid divby0

        #             print("Predicted TPM for ", gene, " = ", yPred_TPM)
        #             print((np.power(10, yPred) -1), (np.power(10, yPred_value) -1))

            yPred = (np.power(10, yPred) -1) / (np.power(10, yPred_value) -1)
            yPred_TPM = (np.power(10, yPred_value) -1)
            return yPred, yPred_TPM


        def p_value_mapping(inserted_lnp1_minuslog10p_value):
            minuslog10p_value = np.expm1(inserted_lnp1_minuslog10p_value)
            p_value = np.power(10, -1 * minuslog10p_value)
            return round(minuslog10p_value, 4)


        def convert_to_2D(idx, nrows, ncols):
            return idx//ncols, idx%ncols


        # Now that we have the helper functions declared, we perform the in silico perturbation

        # Load p300 epigenome editing data
        df_p300_epigenome_editing = pd.read_csv("../Data/p300_epigenome_editing_dataset.tsv", sep="\t")

        # we want to compute 
        TSS = {}
        STRANDS = {}
        CHROMS = {}
        GENES = {}
        gRNA_STRANDS = {}
        for index in range(len(df_p300_epigenome_editing)):
            tss = df_p300_epigenome_editing.iloc[index, 13]
            gene_strand = df_p300_epigenome_editing.iloc[index, 3]
            chrom = df_p300_epigenome_editing.iloc[index, 2]
            gene = df_p300_epigenome_editing.iloc[index, 0]

            TSS[gene] = int(tss)

            if(gene_strand == "plus"):
                STRANDS[gene] = "+"
            elif(gene_strand == "minus"):
                STRANDS[gene] = "-"
            else:
                print("something wrong with strand!")

            CHROMS[gene] = chrom
            GENES[gene] = 1                                                    

        GENES_LIST = set(list(GENES.keys()))    

        df_GENES_values = {}
        df_GENES_means = {}
        gene_strand_dict = {}
        for gene in GENES_LIST:
            df_GENES_values[gene] = df_p300_epigenome_editing[df_p300_epigenome_editing["p300 target gene"] == gene]

            df_GENES_values[gene]["Position_wrt_TSS"] = ( (pd.to_numeric(df_GENES_values[gene]["gRNA position  wrt TSS (hg38)"])) / RESOLUTION ) # Change 3

            df_GENES_values[gene]["gRNA_strand"] = df_GENES_values[gene]["gRNA target strand"].apply(lambda s: "red" if(s=="minus") else "blue")
            df_GENES_values[gene]["gRNA_strand_bool"] = df_GENES_values[gene]["gRNA target strand"].apply(lambda s: -1 if(s=="minus") else +1)

            gene_strand_dict[gene] = STRANDS[gene]
        #     print(gene_strand_dict)

            df_GENES_means[gene] = df_GENES_values[gene].groupby('gRNA ID').mean()
            df_GENES_means[gene].index.name = 'gRNA ID'
            df_GENES_means[gene].reset_index(inplace=True)
#             print(gene, df_GENES_means[gene])

        # Perform in-silico epigenesis
        assay_color = ['black', 'red', 'green', 'blue', 'cyan', 'pink', 'brown']
        xticklabels = range(-operative_half_window_size, operative_half_window_size + 1)

        GENES_LIST = ["CXCR4", "TGFBR1"]

        fig, axes = plt.subplots(nrows=len(GENES_LIST), ncols=2, figsize=(40, 30), sharey=False)
        fig.tight_layout(pad=1, w_pad=20, h_pad=25)

        for idx, gene in enumerate(sorted(GENES_LIST)):
            TPM[gene] = {}

            idx_x, idx_y = convert_to_2D(idx, nrows=len(GENES_LIST), ncols=1)
            ax_1 = axes[idx_x, 0]
            ax_2 = axes[idx_x, 1]

            gene_features = np.squeeze(np.load("../Data/" + gene + ".T" + '%02d' % cell_type_choice + ".npy"), axis=0)
            gene_features = gene_features[(window_size//2)-operative_half_window_size:(window_size//2)+operative_half_window_size+1, :]

            df_values = df_GENES_values[gene]
            df_means = df_GENES_means[gene]
            # print(gene, df_values) 
            # print(gene, df_means)

            inserted_lnp1_minuslog10p_value = inserted_pvalue_choice
            peak_width = peak_width_choice

            # Compute gRNA strands for all positions
            gRNA_strand_dict = {}
            for p_idx in range(len(df_means)):
                position_m = int( (df_means.iloc[p_idx, 9] + operative_half_window_size) )
                gRNA_strand = df_means.iloc[p_idx, 10]
                if(gRNA_strand == +1):
                    gRNA_strand_dict[position_m] = "plus"
                elif(gRNA_strand == -1):
                    gRNA_strand_dict[position_m] = "minus"
                else:
                    print("Something wrong with parsing gRNA strand", file=sys.stderr)

            # Pass assay_index as -1 to make sure we select MNase
            gene_ise, yPred_TPM = ise(cell_type_choice, gene, gene_strand_dict[gene], gRNA_strand_dict, model, 1*assay_index - 1, inserted_lnp1_minuslog10p_value, peak_width, pairwise_flag)

            # Create a scatter plot of the means with the predictions of those positions
            gene_ise_at_means = []
            alan_means = []
            gRNA_strands = []

            for p_idx in range(len(df_means)):
                position_m = df_means.iloc[p_idx, 9]
                alan_mean = df_means.iloc[p_idx, 1]
                gRNA_strand = df_means.iloc[p_idx, 10]
                if(position_m + operative_half_window_size < 0):
                    continue
                elif( (int(position_m) + operative_half_window_size) >= len(gene_ise) ):
                    continue
                else:
        #             print(len(gene_ise), position_m, operative_half_window_size)
                    gene_ise_at_means.append(gene_ise[int(position_m) + operative_half_window_size])
                    alan_means.append(alan_mean)
                    gRNA_strands.append(gRNA_strand)

            pc, pp = pearsonr(list(alan_means), gene_ise_at_means)
            sc, sp = spearmanr(list(alan_means), gene_ise_at_means)

            print(gene, sc, sp, MNase_offset, file=sys.stderr)

            gRNA_strands_colors = []
            for iii in gRNA_strands:
                if(iii == -1):
                    gRNA_strands_colors.append("red")
                else:
                    gRNA_strands_colors.append("blue")

            ax_1.scatter(list(alan_means), gene_ise_at_means, color="#FF1493", s=1000) # ="#FF1493")
            ax_1.set_xlim(0, 1.1 * max(alan_means))
            ax_1.set_ylim(0, 1.1 * max(gene_ise_at_means))
            ax_1.tick_params(axis='both', which='major', labelsize=40)
            ax_1.tick_params(axis='both', which='minor', labelsize=40)
            ax_1.set_xlabel("Mean experimental fold change", size=60)
            ax_1.set_ylabel("Model prediction's fold change", size=45)
        #     if(pp > 0.05):
        #         p_asterisk = "NS"
        #     if(sp > 0.05):
        #         s_asterisk = "NS"
            ax_1.set_title("Correlation between experimental and model predictions fold change\nPearson = "+
                           str(round(pc, 2))+
                           " ("+str(round(pp, 3))+
                           ") Spearman = "+
                           str(round(sc, 2))+
                           " ("+str(round(sp, 3))+
                           ")", size=40)

            # Determine whether we are doing H3K27ac ISE in the background of another assay's features
            # or we have marginal features and are doing that track's ISE
            if(pairwise_flag):
                epigenetic_features = gene_features[:, -1] # MNase
                epigenetic_features_2 = gene_features[:, 3] # H3K27ac
                color_for_assay = assay_color[3]
                label_for_assay = assays[3]
            else:
                epigenetic_features = gene_features[:, assay_index-1]
                color_for_assay = assay_color[assay_index-1]
                label_for_assay = assays[assay_index-1]

            # Scale the model predictions     
            scaling_ratio = np.median(df_means['Measured fold change'])/np.median(gene_ise - 0.0)
            scaled_model_predictions = 0.5 * (scaling_ratio * (gene_ise - 0.0)) + 1.0

            # Scale the epigenetic features
            epigenetic_features_scaling_ratio = max(df_means['Measured fold change'])/max(epigenetic_features - 0.0)
            scaled_epigenetic_features = (epigenetic_features_scaling_ratio * (epigenetic_features - 0.0)) + 0.5

            epigenetic_features_scaling_ratio_2 = max(df_means['Measured fold change'])/max(epigenetic_features_2 - 0.0)
            scaled_epigenetic_features_2 = (epigenetic_features_scaling_ratio_2 * (epigenetic_features_2 - 0.0)) + 0.0 

            ax_2.plot(xticklabels, scaled_model_predictions, 'o-', color="#4daf4a", linewidth=5, markersize=2, label="(Scaled) Model Predictions " + label_for_assay)
            ax_2.plot(xticklabels, scaled_epigenetic_features, 'o-', color="#8470FF", linewidth=5, markersize=1, label="(Scaled) Epigenetic Features MNase") # + label_for_assay)
            ax_2.plot(xticklabels, scaled_epigenetic_features_2, 'o-', color="darkblue", linewidth=5, markersize=1, label="(Scaled) Epigenetic Features H3K27ac") # + label_for_assay)

            ax_2.bar(df_means['Position_wrt_TSS'], 0.0 + (df_means['Measured fold change']), color="#f781bf", bottom=0, width=2, label="Experimental mean from qPCR")

            gRNA_strand_groups = df_values.groupby("gRNA target strand")

            color_dict = {}
            color_dict["+"] = {"plus":"blue", "minus":"red"}
            color_dict["-"] = {"plus":"red", "minus":"blue"}
            direction = {"blue":"rightward", "red":"leftward"}
            color_index = 0
            for name, group in gRNA_strand_groups:
                ax_2.plot(group['Position_wrt_TSS'], 0.0 + (group['Measured fold change']), 'o', color=color_dict[STRANDS[gene]][name], label="qPCR facing "+direction[color_dict[STRANDS[gene]][name]], markersize=15)
                color_index += 1

            ax_2.set_xlim(-operative_half_window_size-10, operative_half_window_size+10)
            ax_2.set_ylim(-1, 1.0 + max(df_means['Measured fold change'])*1.5)
            x_vals = ax_2.get_xticks()
            ax_2.set_xticklabels(['{:3.0f}'.format(x * RESOLUTION) for x in x_vals])
            ax_2.yaxis.set_major_locator(MaxNLocator(integer=True))
            ax_2.tick_params(axis='both', which='major', labelsize=35)
            ax_2.tick_params(axis='both', which='minor', labelsize=35)
            ax_2.set_xlabel("Peak Position (in bp) w.r.t TSS", size=50)
            ax_2.set_ylabel("Gene expression fold change", size=50)
            ax_2.set_title(gene+" with H3K27ac + "+assays[assay_index-1]+"\nincreasing " + str(peak_width * RESOLUTION) + "bp peaks by -log10(p_value)="+str(p_value_mapping(inserted_lnp1_minuslog10p_value)), size=40) #, y=1.1)

            ax_2.legend(loc='upper center', prop={'size': 30}, ncol=2)

            # Now also keep storing the actual TPM vs predicted TPM for each gene, for each cell type
            actual_TPM = np.load("../Data/" + gene + ".T" + '%02d' % cell_type_choice + ".TPM.npy")
        #             print(gene, cell_type_choice, (np.power(10, actual_TPM[0][0]) -1), yPred_TPM)
            TPM[gene][cell_type_choice] = ((np.power(10, actual_TPM[0][0]) -1), yPred_TPM)

#         plt.show()
        plt.close()




In [None]:
# Plot spearman as a function of MNase offset
plt.rcParams["figure.figsize"] = (20, 10)
colnames = ["gene", "sc", "sp", "offset"] 
df_offset = pd.read_csv('../Data/offset_Spearman.plus_plus', sep=" ", names=colnames, low_memory=False)
df_offset_sorted = df_offset.sort_values(['gene', 'offset'], ascending=[1, 1])
df_offset_sorted
plt.plot(df_offset_sorted.iloc[1:29, 3], df_offset_sorted.iloc[1:29, 1],  'o-', color="#4daf4a", linewidth=5, markersize=10, label="CXCR4")
plt.plot(df_offset_sorted.iloc[30:, 3], df_offset_sorted.iloc[30:, 1],  'o-', color="#8470FF", linewidth=5, markersize=10, label="TGFBR1")
plt.xlabel("Offset / 25bp ", fontsize=25)
plt.ylabel("Spearman of linear model's predictions with Alan's data", fontsize=20)
plt.xlim(-16, 16)
plt.ylim(-0.1, 1)
plt.xticks(fontsize=15, rotation=0)
plt.yticks(fontsize=15, rotation=0)
plt.title("(strand-specific) Offset to extract MNase-seq for perturbation", fontsize=35)
plt.legend(loc='upper right', prop={'size': 30}, ncol=2)
plt.show()
plt.close()

In [None]:
# We analyze the HER2 gene dataset here:
colnames = ["chrom", "gRNA_start", "gRNA_end", "gRNA_ID", "average_log2(fold_change)", "d_TSS"] 
df_HER2_DHS = pd.read_csv('../Data/HER2.DHS.hg38.tsv', sep="\t", names=colnames, low_memory=False)
df_HER2_sat = pd.read_csv('../Data/HER2.sat.hg38.tsv', sep="\t", names=colnames, low_memory=False)
plt.rcParams["figure.figsize"] = (20, 10)
plt.plot(df_HER2_sat['d_TSS'], df_HER2_sat['average_log2(fold_change)'], 'o', color="skyblue", label="sat ("+str(len(df_HER2_sat))+" gRNAs)")
plt.plot(df_HER2_DHS['d_TSS'], df_HER2_DHS['average_log2(fold_change)'], 'o', color="red", label="DHS ("+str(len(df_HER2_DHS))+" gRNAs)")
plt.xlabel("distance of gRNA to TSS in bp", fontsize=25)
plt.ylabel("average(log2(fold-change)) between high and low expression", fontsize=20)
plt.xlim(-2100, 2100)
plt.ylim(-4, 4)
plt.xticks(fontsize=15, rotation=0)
plt.yticks(fontsize=15, rotation=0)
plt.title("HER2 NBT17", fontsize=35)
plt.legend(loc='upper center', prop={'size': 30}, ncol=2, markerscale=2)
plt.show()



In [None]:
# Perform perturbation analysis in HER2
df_HER2_p300_epigenome_editing = pd.read_csv("../Data/HER2.p300_epigenome_editing_dataset.tsv", sep="\t")

def her2_ise(position, cell_type_choice, gene_features, gRNA_strand, trained_model, assay_index_value, inserted_lnp1_minuslog10p_value, peak_width):

    # Load epigenetic features for all assays
    X = gene_features

    # obtain the middle portion of this
    X = X[:, (window_size//2)-operative_half_window_size:(window_size//2)+operative_half_window_size+1, :]

    # Perform inference by introducing p-value changes with a peak width
    yPred = []

    positions = [position + operative_half_window_size]
    for pos in positions:
        X_modified = np.copy(X)

        ise_start_position = max(0, pos - (peak_width // 2))
        ise_end_position = min(pos + (peak_width // 2 + 1), 2*operative_half_window_size)

        for p in range(ise_start_position, ise_end_position):
            if( (p>=0) and (p < max(positions)) ):
                if(X_modified[:, p, 2] > 10): # Remember this is ln( -log10(p-value) + 1)
                    # If H3K27me3 peak exists, then p300 doesn't work
                    print("H3K27me3 exists!")
                    pass
                else:
                    # Modify the H3K27ac peak with the shape being derived from MNase (-1) 
                    if(gRNA_strand == "plus"):
                        X_modified[:, p, 3] = X_modified[:, p, 3] + (X_modified[:, min(p+MNase_offset, 2*operative_half_window_size), -1] * inserted_lnp1_minuslog10p_value)
#                         print("Delta = ", gene, peak_width_choice, gRNA_strand, pos, p, (X_modified[:, min(p+MNase_offset, 2*operative_half_window_size), -1]), file=sys.stderr)
                    elif(gRNA_strand == "minus"):
                        X_modified[:, p, 3] = X_modified[:, p, 3] + (X_modified[:, min(p-MNase_offset, 2*operative_half_window_size), -1] * inserted_lnp1_minuslog10p_value)
#                         print("Delta = ", gene, peak_width_choice, gRNA_strand, pos, p, (X_modified[:, min(p-MNase_offset, 2*operative_half_window_size), -1]), file=sys.stderr)
                    else:
                        pass # Change 2

        # Prepare input
        X_modified = np.concatenate([X_modified[:, :, 3], X_modified[:, :, assay_index_value]], axis=1)

        yPred_value = trained_model.predict(quadratic_transform.transform(X_modified))
        yy = yPred_value[0]
        yPred.append(yy)

    # Prepare input for predicting native expression
    X_modified_2 = np.copy(X)
    X_modified_2 = np.concatenate([X[:, :, 3], X[:, :, assay_index_value]], axis=1) 
    
    # Instead of scaling, divide by yPred
    yPred_value = trained_model.predict(quadratic_transform.transform(X_modified_2))[0] + 0.00000001 # to avoid divby0

#             print("Predicted TPM for ", gene, " = ", yPred_TPM)
#             print((np.power(10, yPred) -1), (np.power(10, yPred_value) -1))

    yPred = (np.power(10, yPred) -1) / (np.power(10, yPred_value) -1)
    yPred_TPM = (np.power(10, yPred_value) -1)
    
    assert(len(yPred) == 1)
    
    return yPred[0]/yPred_TPM


cell_type_choice = 13 # corresponds to HEK293T
assay_index = assay_index_choice

inserted_pvalue_choice = 1.5 # corresponds to 0.0003
peak_width_choice = 6

for MNase_offset in range(-15,15):

    # Perform in-silico epigenesis
    assay_color = ['black', 'red', 'green', 'blue', 'cyan', 'pink', 'brown']
    xticklabels = range(-operative_half_window_size, operative_half_window_size + 1)

    GENES_LIST = ["HER2", "HER2"] # ["CXCR4", "TGFBR1"]

    fig, axes = plt.subplots(nrows=len(GENES_LIST), ncols=2, figsize=(40, 30), sharey=False)
    fig.tight_layout(pad=1, w_pad=20, h_pad=25)

    for idx, gene in enumerate(sorted(GENES_LIST)):
        TPM[gene] = {}

        idx_x, idx_y = convert_to_2D(idx, nrows=len(GENES_LIST), ncols=1)
        ax_1 = axes[idx_x, 0]
        ax_2 = axes[idx_x, 1]
        
    # we want to compute 
    TSS = {}
    STRANDS = {}
    CHROMS = {}
    GENES = {}
    gRNA_STRANDS = {}
    gRNA_strands_colors = []
        
    measured_fold_change_list = []
    predicted_fold_change_list = []
    
    for index in range(len(df_HER2_p300_epigenome_editing)):
        tss = df_HER2_p300_epigenome_editing.iloc[index, 13]
        gene_strand = df_HER2_p300_epigenome_editing.iloc[index, 3]
        chrom = df_HER2_p300_epigenome_editing.iloc[index, 2]
        gene = df_HER2_p300_epigenome_editing.iloc[index, 0]

        if(gene != "HER2"):
            continue
        
        TSS[gene] = int(tss)

        if(gene_strand == "plus"):
            STRANDS[gene] = "+"
        elif(gene_strand == "minus"):
            STRANDS[gene] = "-"
        else:
            print("something wrong with strand!")

        CHROMS[gene] = chrom
        GENES[gene] = 1                                                    

        
        
        inserted_lnp1_minuslog10p_value = inserted_pvalue_choice
        peak_width = peak_width_choice

        gRNA_strand = df_HER2_p300_epigenome_editing.iloc[index, 8]
        position = pd.to_numeric(df_HER2_p300_epigenome_editing.iloc[index, 15]) // RESOLUTION
                
        gene_features = np.load("../Data/" + gene + ".T" + '%02d' % cell_type_choice + ".npy")

        predicted_fold_change = her2_ise(position, cell_type_choice, gene_features, gRNA_strand, model, assay_index - 1, inserted_lnp1_minuslog10p_value, peak_width)
        predicted_fold_change_list.append(predicted_fold_change)
        
        measured_fold_change = df_HER2_p300_epigenome_editing.iloc[index, 7]
        measured_fold_change_list.append(measured_fold_change)
        
    measured_fold_change_list = np.asarray(measured_fold_change_list)    
    predicted_fold_change_list = np.asarray(predicted_fold_change_list)    
        
    # Create a scatter plot of the means with the predictions of those positions
    pc, pp = pearsonr(measured_fold_change_list, predicted_fold_change_list)
    sc, sp = spearmanr(measured_fold_change_list, predicted_fold_change_list)

    print(gene, sc, sp, MNase_offset, file=sys.stderr)

    gRNA_strands_colors.append("blue")

    ax_1.scatter(measured_fold_change_list, predicted_fold_change_list, color="#FF1493", s=100) # ="#FF1493")
    ax_1.set_xlim(-4, 1.1 * max(measured_fold_change_list))
    ax_1.set_ylim(-0.1, 1.1 * max(predicted_fold_change_list))
    ax_1.tick_params(axis='both', which='major', labelsize=40)
    ax_1.tick_params(axis='both', which='minor', labelsize=40)
    ax_1.set_xlabel("Measured experimental log2 fold change", size=60)
    ax_1.set_ylabel("Model prediction's fold change", size=45)
#     if(pp > 0.05):
#         p_asterisk = "NS"
#     if(sp > 0.05):
#         s_asterisk = "NS"
    ax_1.set_title("Correlation between experimental and model predictions fold change\nPearson = "+
                   str(round(pc, 2))+
                   " ("+str(round(pp, 3))+
                   ") Spearman = "+
                   str(round(sc, 2))+
                   " ("+str(round(sp, 3))+
                   ")", size=40)

    # Determine whether we are doing H3K27ac ISE in the background of another assay's features
    # or we have marginal features and are doing that track's ISE
    epigenetic_features = gene_features[:, -1] # MNase
    epigenetic_features_2 = gene_features[:, 3] # H3K27ac
    color_for_assay = assay_color[3]
    label_for_assay = assays[3]

    # Scale the model predictions     
    scaling_ratio = np.median(measured_fold_change_list)/np.median(predicted_fold_change_list)
    scaled_model_predictions = 0.5 * (scaling_ratio * (predicted_fold_change_list))

    # Scale the epigenetic features
    epigenetic_features_scaling_ratio = max(measured_fold_change_list)/max(epigenetic_features)
    scaled_epigenetic_features = (epigenetic_features_scaling_ratio * (epigenetic_features))

    epigenetic_features_scaling_ratio_2 = max(measured_fold_change_list)/max(epigenetic_features_2)
    scaled_epigenetic_features_2 = (epigenetic_features_scaling_ratio_2 * (epigenetic_features_2))

#     ax_2.plot(xticklabels, scaled_model_predictions, 'o-', color="#4daf4a", linewidth=5, markersize=2, label="(Scaled) Model Predictions " + label_for_assay)
#     ax_2.plot(xticklabels, scaled_epigenetic_features, 'o-', color="#8470FF", linewidth=5, markersize=1, label="(Scaled) Epigenetic Features MNase") # + label_for_assay)
#     ax_2.plot(xticklabels, scaled_epigenetic_features_2, 'o-', color="darkblue", linewidth=5, markersize=1, label="(Scaled) Epigenetic Features H3K27ac") # + label_for_assay)

#     ax_2.bar(positions_TSS, (measured_fold_change_list), color="#f781bf", bottom=0, width=2, label="Experimental mean from qPCR")

#     color_dict = {}
#     color_dict["+"] = {"plus":"blue", "minus":"red"}
#     color_dict["-"] = {"plus":"red", "minus":"blue"}
#     direction = {"blue":"rightward", "red":"leftward"}
#     color_index = 0
#     for name, group in gRNA_strand_groups:
#         ax_2.plot(group['Position_wrt_TSS'], 0.0 + (group['Measured fold change']), 'o', color=color_dict[STRANDS[gene]][name], label="qPCR facing "+direction[color_dict[STRANDS[gene]][name]], markersize=15)
#         color_index += 1

#     ax_2.set_xlim(-operative_half_window_size-10, operative_half_window_size+10)
#     ax_2.set_ylim(-1, 1.0 + max(measured_fold_change_list)*1.5)
#     x_vals = ax_2.get_xticks()
#     ax_2.set_xticklabels(['{:3.0f}'.format(x * RESOLUTION) for x in x_vals])
#     ax_2.yaxis.set_major_locator(MaxNLocator(integer=True))
#     ax_2.tick_params(axis='both', which='major', labelsize=35)
#     ax_2.tick_params(axis='both', which='minor', labelsize=35)
#     ax_2.set_xlabel("Peak Position (in bp) w.r.t TSS", size=50)
#     ax_2.set_ylabel("Gene expression fold change", size=50)
#     ax_2.set_title(gene+" with H3K27ac + "+assays[assay_index-1]+"\nincreasing " + str(peak_width * RESOLUTION) + "bp peaks by -log10(p_value)="+str(p_value_mapping(inserted_lnp1_minuslog10p_value)), size=40) #, y=1.1)

#     ax_2.legend(loc='upper center', prop={'size': 30}, ncol=2)

#     # Now also keep storing the actual TPM vs predicted TPM for each gene, for each cell type
#     actual_TPM = np.load("../Data/" + gene + ".T" + '%02d' % cell_type_choice + ".TPM.npy")
# #             print(gene, cell_type_choice, (np.power(10, actual_TPM[0][0]) -1), yPred_TPM)
#     TPM[gene][cell_type_choice] = ((np.power(10, actual_TPM[0][0]) -1), yPred_TPM)

    plt.show()
    plt.close()



In [None]:
# Analyze predicted TPMs across cell types for a given gene
plt.rcParams["figure.figsize"] = (10, 10)
for gene in TPM.keys():
    gene_TPM = [(v[0], v[1]) for v in TPM[gene].values()]
#     print(gene, gene_TPM)

    gene_TPM = np.asarray(gene_TPM)
    pc, pp = pearsonr(gene_TPM[:, 0], gene_TPM[:, 1])
    sc, sp = spearmanr(gene_TPM[:, 0], gene_TPM[:, 1])

    plt.plot(gene_TPM[:, 0], gene_TPM[:, 1], 'o', markersize=10)
    plt.xlabel("True TPM", fontsize=20)
    plt.ylabel("Predicted TPM", fontsize=20)
    plt.xlim(-1, 50)
    plt.ylim(-1, 5)
    plt.title(gene+ ": Pearson = "+str(round(pc, 2))+" Spearman = "+str(round(sc, 2)), fontsize=25)
    plt.show()
    plt.close()

In [None]:
list( zip( [x[1] for x in TPM["CXCR4"].values()], [x[1] for x in TPM["TGFBR1"].values()] ) )

In [None]:
# Analysis of MNase-seq and H3K27ac in K562
import pyBigWig
K562_H3K27ac = pyBigWig.open("../Data/MNase_H3K27ac/K562.H3K27ac.ENCFF469JMR.bigWig")
K562_MNase = pyBigWig.open("../Data/MNase_H3K27ac/K562.MNase.hg38.sorted.chromosomes.non_overlapping.ENCFF000VNN.bigWig")
chrom = "chr9"
K562_H3K27ac_values = np.nan_to_num( K562_H3K27ac.values(chrom, 0, K562_H3K27ac.chroms(chrom), numpy=True) )
K562_MNase_values = np.nan_to_num( K562_MNase.values(chrom, 0, K562_MNase.chroms(chrom), numpy=True) )

print(K562_H3K27ac_values.shape, K562_MNase_values.shape)

In [None]:
plt.rcParams["figure.figsize"] = (10, 10)
plt.plot(K562_MNase_values, K562_H3K27ac_values, 'o', markersize=0.1)
plt.xlabel("MNase-seq __", fontsize=20)
plt.ylabel("H3K27ac -log10(p-values)", fontsize=20)
plt.xlim(-1, 20)
plt.ylim(-1, 200)
plt.title(gene+ ": Pearson = "+str(round(pc, 2))+" Spearman = "+str(round(sc, 2)), fontsize=25)
plt.show()
plt.close()

In [None]:
for mnase_level in [0, 1, 2, 3, 4, 5, 10, 20]:
    for H3K27ac_threshold in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20]:
        print(mnase_level, H3K27ac_threshold, np.where(K562_H3K27ac_values[np.where(K562_MNase_values >= mnase_level)] >= H3K27ac_threshold)[0].shape )

In [None]:
# TODO: Update with CRISPOR.tsv for both DHS and sat (why does sat CRISPOR have so many missing gRNAs?)
df_HER2_DHS = pd.read_csv("../Data/HER2_MAGeCK/HER2.DHS.hg19.MAGeCK.fold_change", sep="\t")
df_HER2_sat = pd.read_csv("../Data/HER2_MAGeCK/HER2.sat.hg19.MAGeCK.fold_change", sep="\t")

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(30, 15), sharey=False)
# fig.tight_layout(pad=1, w_pad=20, h_pad=25)

axes[0, 0].plot(df_HER2_DHS['H_L_LFC'], df_HER2_DHS['U_L_LFC'], 'o')
axes[0, 0].set_xlim(-5, 5)
axes[0, 0].set_ylim(-5, 5)
axes[0, 0].set_xlabel("High vs Low Log Fold Change", size=20)
axes[0, 0].set_ylabel("Unsorted vs Low Log Fold Change", size=20)
axes[0, 0].set_title("DHS: Spearman = " + str(round( spearmanr(df_HER2_DHS['H_L_LFC'], df_HER2_DHS['U_L_LFC'])[0], 3) ), size=30)

axes[0, 1].plot(df_HER2_DHS['H_L_LFC'], df_HER2_DHS['H_U_LFC'], 'o')
axes[0, 1].set_xlim(-5, 5)
axes[0, 1].set_ylim(-5, 5)
axes[0, 1].set_xlabel("High vs Low Log Fold Change", size=20)
axes[0, 1].set_ylabel("High vs Unsorted Log Fold Change", size=20)
axes[0, 1].set_title("DHS: Spearman = " + str(round( spearmanr(df_HER2_DHS['H_L_LFC'], df_HER2_DHS['H_U_LFC'])[0], 3) ), size=30)

axes[0, 2].plot(df_HER2_DHS['H_U_LFC'], df_HER2_DHS['U_L_LFC'], 'o')
axes[0, 2].set_xlim(-5, 5)
axes[0, 2].set_ylim(-5, 5)
axes[0, 2].set_xlabel("High vs Unsorted Log Fold Change", size=20)
axes[0, 2].set_ylabel("Unsorted vs Low Log Fold Change", size=20)
axes[0, 2].set_title("DHS: Spearman = " + str(round( spearmanr(df_HER2_DHS['H_U_LFC'], df_HER2_DHS['U_L_LFC'])[0], 3) ), size=30)

axes[1, 0].plot(df_HER2_sat['H_L_LFC'], df_HER2_sat['U_L_LFC'], 'o')
axes[1, 0].set_xlim(-5, 5)
axes[1, 0].set_ylim(-5, 5)
axes[1, 0].set_xlabel("High vs Low Log Fold Change", size=20)
axes[1, 0].set_ylabel("Unsorted vs Low Log Fold Change", size=20)
axes[1, 0].set_title("sat: Spearman = " + str(round( spearmanr(df_HER2_sat['H_L_LFC'], df_HER2_sat['U_L_LFC'])[0], 3) ), size=30)

axes[1, 1].plot(df_HER2_sat['H_L_LFC'], df_HER2_sat['H_U_LFC'], 'o')
axes[1, 1].set_xlim(-5, 5)
axes[1, 1].set_ylim(-5, 5)
axes[1, 1].set_xlabel("High vs Low Log Fold Change", size=20)
axes[1, 1].set_ylabel("High vs Unsorted Log Fold Change", size=20)
axes[1, 1].set_title("sat: Spearman = " + str(round( spearmanr(df_HER2_sat['H_L_LFC'], df_HER2_sat['H_U_LFC'])[0], 3) ), size=30)

axes[1, 2].plot(df_HER2_sat['H_U_LFC'], df_HER2_sat['U_L_LFC'], 'o')
axes[1, 2].set_xlim(-5, 5)
axes[1, 2].set_ylim(-5, 5)
axes[1, 2].set_xlabel("High vs Unsorted Log Fold Change", size=20)
axes[1, 2].set_ylabel("Unsorted vs Low Log Fold Change", size=20)
axes[1, 2].set_title("sat: Spearman = " + str(round( spearmanr(df_HER2_sat['H_U_LFC'], df_HER2_sat['U_L_LFC'])[0], 3) ), size=30)


# ax = plt.axes(projection ="3d")
# ax.plot3D(df_HER2['H_L_LFC'], df_HER2['H_U_LFC'], df_HER2['U_L_LFC'], 'o', color = "green", markersize=1)
# ax.set_xlabel('H_L_LFC')
# ax.set_ylabel('H_U_LFC')
# ax.set_zlabel('U_L_LFC')
# ax.view_init(10, 120)

plt.show()
# plt.close()

In [None]:
# Analyze gRNA scores for HER2 in juxtaposition with MAGeCK LFC and FDR

def parse_df(file_name):
    df_DHS = pd.read_csv(file_name, sep="\t", index_col=False,
                                 names=["gRNA", "Protospacer_PAM", "CRISPOR_search_window", "CRISPOR_pos_strand",
                                        "CRISPOR_Protospacer_PAM", "mitSpecScore", "cfdSpecScore", "offtargetCount",
                                        "targetGenomeGeneLocus", "Doench16Score", "Moreno-Mateos-Score", "Out-Of-Frame-Score", "Lindel-Score", "GrafEtAlStatus",
                                        "chromosome", "start", "end", "gRNA_2", "Protospacer", "PAM",
                                        "H_L_LFC", "H_U_LFC", "U_L_LFC", 
                                        "H_L_FDR", "H_U_FDR", "U_L_FDR"])
    df_DHS["strand"] = [(lambda x: "+" if x == "w" else "-")(x[-1]) for x in df_DHS["CRISPOR_pos_strand"]]
    df_DHS["d_TSS"] = [(x - 37844347) for x in df_DHS["start"]] # TODO: Refine taking into account strand information
    df_DHS["log10p1_d_TSS"] = [np.log10(abs(x - 37844347) + 1) for x in df_DHS["start"]]
    
    return df_DHS

df_training = parse_df("../Data/HER2_MAGeCK/HER2.sat.hg19.MAGeCK.CRISPOR.tsv")
df_testing = parse_df("../Data/HER2_MAGeCK/HER2.DHS.hg19.MAGeCK.CRISPOR.tsv")


In [None]:
df_testing[df_testing['H_L_FDR']<0.05]

In [None]:
plt.rcParams["figure.figsize"] = (10, 10)
plt.plot(df_training['start'], df_training['H_L_FDR'], 'o')
plt.axvline(x=37844347, color='grey')
plt.ylabel("FDR q-value")
plt.xlim(37844347 - 50000, 37844347 + 50000)
plt.ylim(-1.1, 1.1)
plt.show()

In [None]:
df_common = pd.read_csv("../Data/HER2_MAGeCK/T1", sep="\t", index_col=False,
                       names=["gRNA_1", "Protospacer_PAM_1", "CRISPOR_search_window_1", "CRISPOR_pos_strand_1",
                                        "CRISPOR_Protospacer_PAM_1", "mitSpecScore_1", "cfdSpecScore_1", "offtargetCount_1",
                                        "targetGenomeGeneLocus_1", "Doench16Score_1", "Moreno-Mateos-Score_1", "Out-Of-Frame-Score_1", "Lindel-Score_1", "GrafEtAlStatus_1",
                                        "chromosome_1", "start_1", "end_1", "gRNA_2_1", "Protospacer_1", "PAM_1",
                                        "H_L_LFC_1", "H_U_LFC_1", "U_L_LFC_1", 
                                        "H_L_FDR_1", "H_U_FDR_1", "U_L_FDR_1",
                              "gRNA_2", "Protospacer_PAM_2", "CRISPOR_search_window_2", "CRISPOR_pos_strand_2",
                                        "CRISPOR_Protospacer_PAM_2", "mitSpecScore_2", "cfdSpecScore_2", "offtargetCount_2",
                                        "targetGenomeGeneLocus_2", "Doench16Score_2", "Moreno-Mateos-Score_2", "Out-Of-Frame-Score_2", "Lindel-Score_2", "GrafEtAlStatus_2",
                                        "chromosome_2", "start_2", "end_2", "gRNA_2_2", "Protospacer_2", "PAM_2",
                                        "H_L_LFC_2", "H_U_LFC_2", "U_L_LFC_2", 
                                        "H_L_FDR_2", "H_U_FDR_2", "U_L_FDR_2"])
df_common

plt.rcParams["figure.figsize"] = (10, 10)
plt.plot(df_common['H_L_FDR_1'], df_common['H_L_FDR_2'], 'o', markersize=5, color="#FF1493")
plt.title("Concordance between 1,076 gRNAs common to DHS and Saturation screen", fontsize=20)
plt.xlabel("DHS gRNA FDR q-value", fontsize=20)
plt.ylabel("Satration gRNA FDR q-value", fontsize=20)
plt.xlim(-0.1, 1.1)
plt.ylim(-0.1, 1.1)
plt.show()
plt.close()



In [None]:
df_common[(df_common['H_L_FDR_1'] > 0.8) & (df_common['H_L_FDR_2'] > 0.8)]

In [None]:
# Now compute pairs of gRNAs that are within D (for now 1) of each other on the 
D = 100
FDR_threshold = 0.05

# close_gRNAs = [x for x in [(lambda v: v[0] if ( (v[1][2] - v[1][0] < D) and (v[1][1] == v[1][3]) ) else -1)(x) for x in enumerate(list(zip(df_DHS["start"][0:-1], df_DHS["strand"][0:-1], df_DHS["start"][1:], df_DHS["strand"][1:])))] if x >= 0]

def find_close_gRNAs(df_DHS):

    starts = df_DHS["start"]
    strands = df_DHS["strand"]
    FDRs = df_DHS["H_L_FDR"]

    close_gRNAs = []
    random_gRNAs = []
    for i in tqdm(range(len(df_DHS))):
        for j in range(max(0, i-100), min(len(df_DHS), i+100)):
            if(i == j):
                continue
            else:
                if( (strands[i] == strands[i]) and # This controls whether we require the strands to be the same 
                    (np.abs(starts[i] - starts[j]) < D) and
                    (min(FDRs[i], FDRs[j]) < FDR_threshold) and
                    (max(FDRs[i], FDRs[j]) >= FDR_threshold) ):

                    u, v = random.sample(range(len(df_DHS)), 2)
                    random_gRNAs.append((u, v))

                    if(FDRs[i] > FDRs[j]):
                        close_gRNAs.append((i, j))
                    else:
                        close_gRNAs.append((j, i))

    close_gRNAs = np.unique(close_gRNAs, axis=0)                    
    print(D, len(close_gRNAs), len(df_DHS))
    
    return close_gRNAs, random_gRNAs

close_gRNAs_training, random_gRNAs_training = find_close_gRNAs(df_training)
close_gRNAs_testing, random_gRNAs_testing = find_close_gRNAs(df_testing)


In [None]:
# Train logistic regression for binary classification
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import Normalizer
import random

def create_augmented_data(N, features_class_1):
    
    features_class_0 = []
    n = features_class_1.shape[0]
    D = features_class_1.shape[1]
    for i in range(N):
        feature_class_1 = features_class_1[random.randint(0, n-1), :]
        
        reduced_feature_class_0 = np.copy(feature_class_1)
        sampled_feature = random.randint(3, 3)
        reduced_feature_class_0[sampled_feature] = max(reduced_feature_class_0[sampled_feature] - 30, 0)

        features_class_0.append(reduced_feature_class_0)
        
    return np.asarray(features_class_0)


def create_features_for_classification(df_DHS, close_gRNAs, N_augmented):
    
    features_class_0 = np.unique(np.asarray([list(df_DHS.iloc[x[0], np.r_[5, 6, 7, 9, 10]]) for x in close_gRNAs]), axis=0)
    features_class_1 = np.unique(np.asarray([list(df_DHS.iloc[x[1], np.r_[5, 6, 7, 9, 10]]) for x in close_gRNAs]), axis=0)

    xTrain_augmented = create_augmented_data(N_augmented, features_class_1)
    fp = features_class_1.shape[0] / (features_class_0.shape[0] + features_class_1.shape[0] + N_augmented)

#     print(features_class_0.shape, features_class_1.shape, xTrain_augmented.shape)
    
    xTrain = np.concatenate([features_class_0, features_class_1, xTrain_augmented], axis=0) 
    yTrain = np.concatenate([[0]*len(features_class_0), [1]*len(features_class_1), [0] * len(xTrain_augmented)], 
                            axis=0)

    return xTrain, yTrain, fp

xTrain, yTrain, fpTrain = create_features_for_classification(df_training, close_gRNAs_training, 10)
xTest, yTest, fpTest = create_features_for_classification(df_testing, close_gRNAs_testing, 1)

normalizer = Normalizer().fit(xTrain)                                             
xTrain_normalized = normalizer.transform(xTrain)
xTest_normalized = normalizer.transform(xTest)

subsampled_xTrain_negatives = [i for i in range(len(xTrain_normalized)) if ((yTrain[i] == 0) and 
                                                                            (random.uniform(0, 1) < 0.2))]
xTrain_subsampled_negatives = xTrain[subsampled_xTrain_negatives, :]
yTrain_subsampled_negatives = yTrain[subsampled_xTrain_negatives]

subsampled_xTrain_positives = [i for i in range(len(xTrain_normalized)) if ((yTrain[i] == 1) and 
                                                                            (random.uniform(0, 1) < 2))]
xTrain_subsampled_positives = xTrain[subsampled_xTrain_positives, :]
yTrain_subsampled_positives = yTrain[subsampled_xTrain_positives]

xTrain_normalized = np.concatenate([xTrain_subsampled_positives, xTrain_subsampled_negatives], axis=0)
yTrain_normalized = np.concatenate([yTrain_subsampled_positives, yTrain_subsampled_negatives], axis=0)

model = logreg(random_state=0).fit(xTrain_normalized, yTrain_normalized)
print(list(zip(gRNA_scores.keys(), [round(x, 3) for x in model.coef_[0]])))
yTrain_pred = model.predict(xTrain_normalized)
yTest_pred = model.predict(xTest_normalized)

print("Fraction of positives training:", 
      round(len(xTrain_subsampled_positives) / (len(xTrain_subsampled_positives) + 
                                                len(xTrain_subsampled_negatives)), 3), 
      "\tAUPRC training:", round(average_precision_score(yTrain_normalized, yTrain_pred), 3))

print("Fraction of positives testing:", round(fpTest, 3), "\tAUPRC testing:", 
      round(average_precision_score(yTest, yTest_pred), 3))


In [None]:
# Train logistic regression for binary classification but with differences of scores
from sklearn.metrics import average_precision_score
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier as rfc
import random

def create_augmented_data(N, features_class_1):
    
    xTrain_augmented = []
    yTrain_augmented = []
    n = features_class_1.shape[0]
    D = features_class_1.shape[1]
    for i in range(N):
        feature_class_1 = features_class_1[random.randint(0, n-1), :]
        
        reduced_feature_class_0 = np.copy(feature_class_1)
        sampled_feature = random.randint(0, 4)
        if(sampled_feature == 0):
            reduced_feature_class_0[sampled_feature] = max(reduced_feature_class_0[sampled_feature] - 2, 0)
        elif(sampled_feature == 1):
            reduced_feature_class_0[sampled_feature] = max(reduced_feature_class_0[sampled_feature] - 2, 0)
        elif(sampled_feature == 2):
            reduced_feature_class_0[sampled_feature] = min(reduced_feature_class_0[sampled_feature] + 2, 1000)
        elif(sampled_feature == 3):
            reduced_feature_class_0[sampled_feature] = max(reduced_feature_class_0[sampled_feature] - 2, 0)
        elif(sampled_feature == 4):
            reduced_feature_class_0[sampled_feature] = max(reduced_feature_class_0[sampled_feature] - 20, 0)
    
        positive_data_point = feature_class_1 - reduced_feature_class_0
        negative_data_point = - feature_class_1 + reduced_feature_class_0
    
        xTrain_augmented.append(list(feature_class_1) + list(reduced_feature_class_0))
        xTrain_augmented.append(list(reduced_feature_class_0) + list(feature_class_1))
        
        yTrain_augmented.append(1)
        yTrain_augmented.append(0)
        
    return np.asarray(xTrain_augmented), np.asarray(yTrain_augmented)


def create_features_for_classification(df_DHS, close_gRNAs, N_augmented):
    
    features_class_0 = np.asarray([list(df_DHS.iloc[x[0], np.r_[5, 6, 7, 9, 10]]) for x in close_gRNAs])
    features_class_1 = np.asarray([list(df_DHS.iloc[x[1], np.r_[5, 6, 7, 9, 10]]) for x in close_gRNAs])

    positive_data_points = [list(features_class_1[i, :]) + list(features_class_0[i, :]) for i in range(features_class_1.shape[0])]
    negative_data_points = [list(features_class_0[i, :]) + list(features_class_1[i, :]) for i in range(features_class_1.shape[0])]    
    
    xTrain_augmented, yTrain_augmented = create_augmented_data(N_augmented, features_class_1)
    fp = features_class_1.shape[0] / (features_class_0.shape[0] + features_class_1.shape[0] + len(xTrain_augmented))

    print(features_class_0.shape, features_class_1.shape, xTrain_augmented.shape)
    
    xTrain = np.concatenate([negative_data_points, positive_data_points, xTrain_augmented], axis=0) 
    yTrain = np.concatenate([[0]*len(negative_data_points), [1]*len(positive_data_points), yTrain_augmented], axis=0)

    print("x shape: ", xTrain.shape)
    
    return xTrain, yTrain, fp

xTrain, yTrain, fpTrain = create_features_for_classification(df_training, close_gRNAs_training, 10000)
xTest, yTest, fpTest = create_features_for_classification(df_testing, close_gRNAs_testing, 1)

# model = logreg(random_state=0).fit(xTrain, yTrain)
# model = svm.SVC(kernel='rbf', C=1).fit(xTrain, yTrain)
model = rfc(max_depth=2, random_state=0).fit(xTrain, yTrain)
# print(list(zip(gRNA_scores.keys(), [round(x, 3) for x in model.coef_[0]])))
yTrain_pred = model.predict(xTrain)
yTest_pred = model.predict(xTest)

print("Fraction of positives training:", round(fpTrain, 3), "\tAUPRC training:", round(average_precision_score(yTrain, yTrain_pred), 3))
print("Fraction of positives testing:", round(fpTest, 3), "\tAUPRC testing:", round(average_precision_score(yTest, yTest_pred), 3))


In [None]:
%matplotlib inline

df_DHS = df_training
close_gRNAs = close_gRNAs_training
random_gRNAs = random_gRNAs_training

gRNA_score_names = {1:"mitSpecScore", 2:"cfdSpecScore", 3:"offtargetCount", 4:"Doench16Score", 5:"Moreno-Mateos-Score", 6:"Out-Of-Frame-Score", 7:"Lindel-Score", 8:"log10p1_d_TSS"}
gRNA_scores = {"mitSpecScore":5, "cfdSpecScore":6, "offtargetCount":7, "Doench16Score":9, "Moreno-Mateos-Score":10, "Out-Of-Frame-Score":11, "Lindel-Score":12, "log10p1_d_TSS":28}

axis_limits = {1:(-25, 125), 2:(-25, 125), 3:(-100, 1000), 4:(-25, 125), 5:(-25, 125), 6:(-25, 125), 7:(-25, 125), 8:(-1, 10)}

for gRNA_score_index in range(1, 9):
    gRNA_score_name = gRNA_score_names[gRNA_score_index]

    x_1 = [float(df_DHS.iloc[x[0], gRNA_scores[gRNA_score_name]]) for x in close_gRNAs]
    x_2 = [float(df_DHS.iloc[x[1], gRNA_scores[gRNA_score_name]]) for x in close_gRNAs]

    random_x_1 = [float(df_DHS.iloc[x[0], gRNA_scores[gRNA_score_name]]) for x in random_gRNAs]
    random_x_2 = [float(df_DHS.iloc[x[1], gRNA_scores[gRNA_score_name]]) for x in random_gRNAs]

    plt.rcParams["figure.figsize"] = (10, 10)
    plt.scatter(random_x_1, random_x_2, 10, color="darkgrey")
    plt.scatter(x_1, x_2, 2, color="purple")
    plt.plot([axis_limits[gRNA_score_index][0], axis_limits[gRNA_score_index][1]], [axis_limits[gRNA_score_index][0], axis_limits[gRNA_score_index][1]], 'o-', color="grey")
    plt.title("Score: " + gRNA_score_name, fontsize=30)
    plt.xlabel("gRNA with non-significant FC", fontsize=20)
    plt.ylabel("gRNA with significant FC", fontsize=20)
    plt.xlim(axis_limits[gRNA_score_index][0], axis_limits[gRNA_score_index][1])
    plt.ylim(axis_limits[gRNA_score_index][0], axis_limits[gRNA_score_index][1])
    plt.legend(loc="upper left", labels=["y=x", "close gRNAs", "random gRNAs"], fontsize=15)
    plt.show()
    plt.close()