This notebook contains code for heatmaps creations.

In [1]:
import pickle
import matplotlib.pyplot as plt

# %matplotlib inline
# plt.rcParams['figure.figsize'] = (15.0, 8.0)

In [2]:
# heatmap plotting functions

import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as dist
import matplotlib
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt


def RedBlackGreen():
    """
    Creates red to RedtoGreenColors
    """
    cdict = {'red':   ((0.0, 0.0, 0.0),
                       (0.5, 0.0, 0.1),
                       (1.0, 1.0, 1.0)),

             'blue': ((0.0, 0.0, 0.0),
                       (1.0, 0.0, 0.0)),

             'green':  ((0.0, 0.0, 1.0),
                       (0.5, 0.1, 0.0),
                       (1.0, 0.0, 0.0))
            }
    my_cmap = matplotlib.colors.LinearSegmentedColormap('my_colormap',cdict,256)
    return my_cmap


def vals_norm(x):
    """
    Normalization for plotting
    """
    vmin=x.min()
    vmax=x.max()
    vmax = max([vmax,abs(vmin)])
    vmin = vmax*-1
    return mpl.colors.Normalize(vmin/2, vmax/2)

def get_order(my_mat):
    """
    Returns first and second dimension indencies after hirachical clustering
    """
    d_mat = dist.pdist(my_mat, 'euclidean')
    h_clust = sch.linkage(d_mat, method='complete')
    dim1_order = sch.leaves_list(h_clust)

    my_mat = my_mat.T
    d_mat = dist.pdist(my_mat, 'euclidean')
    h_clust = sch.linkage(d_mat, method='complete')
    dim2_order = sch.leaves_list(h_clust)

    return dim1_order, dim2_order


def reorder_matrix( my_mat, dim1_rnk, dim2_rnk):
    """
    Reorders matrix by given indencies
    """
    my_mat = my_mat[dim1_rnk, :]
    return my_mat[:, dim2_rnk]


def heat_map(in_mat, save_file, title='empty', xy_axis= ['Genes', 'Proteins'], cmap='gray',
             norm=None, dpi=1000):
    """
    Creates a heatmap
    """

    fig = plt.figure()
    plot_array = np.array(in_mat)
    if not norm:
        norm=vals_norm(plot_array)
    plt.pcolor(plot_array, cmap=cmap, norm=norm)
    plt.title(title)
    plt.xlabel(xy_axis[0])
    plt.ylabel(xy_axis[1])
    plt.colorbar()
    fig.savefig(save_file, format="png", dpi=dpi)


# Heatmap plot

In [3]:
# Data selection
prot_rna = pickle.load(open('../../matrices/pickles/cr_z-score_raw.pkl', 'rb'))

# remove data for genes with no catrapid prediction
selection = prot_rna.any(axis=1)
prot_rna = prot_rna[selection,:]


In [4]:
prot_rna = prot_rna
# plot graphs
heat_map(prot_rna, './figures/interactions.png', cmap=RedBlackGreen())

In [4]:
# threshold check
prot_rna = prot_rna > 2.5
prot_rna = prot_rna.astype(int)
c_vals = vals_norm(prot_rna)
xnorm = mpl.colors.Normalize(0,1)

heat_map(prot_rna, './figures/interactions_gt_2_5.png', title='interactions greater than 2.5', norm=xnorm)

# Reconstruction Plot

In [3]:
# Plots HEATMAP: 1/4 of the points (After reconstruction)


from skfusion import fusion
from math import ceil
import sys
sys.path.append('..')
from cross_validation import *

k_select = lambda x: max((ceil(x * 0.05), 2))

def dfmf_n_reconstruct(relationships, random_state=1):
    """
    Given relationships, reconstructs the matrix
    """
    fusion_graph = fusion.FusionGraph(relations)

    fuser = fusion.Dfmf(random_state=random_state)
    fuser.fuse(fusion_graph)
    return fuser.complete(fusion_graph['catrapid'])


def cross_validate_heatmaps(relations, savefile, cv_fold=10, th=2.5, random_state=1, dpi=600):
    """
    Performs cross valiation on give first matrix of relation and 
    returns average AUC score and average deviation.
    """
    auc_vals = []
    avr_dev = []
    orig_matrix = relations[0].data
    # heat_map(orig_matrix[:int(orig_matrix.shape[0]/4),:int(orig_matrix.shape[1]/4)], 
    #          './figures/reconstruction/interactions_quorter.png', cmap=RedBlackGreen(), dpi=600)
    
    n = 0
    for cv_matrix, elements in matrix_cv_setup(orig_matrix, n_fold=cv_fold, alternative=0, by_row=True):
        relations[0].data = cv_matrix
        reconst_mat = dfmf_n_reconstruct(relations, random_state=random_state)
        n+=1
        heat_map(reconst_mat[:int(reconst_mat.shape[0]/4),:int(reconst_mat.shape[1]/4)], 
                 './figures/reconstruction/{}{}.png'.format(savefile, n), cmap=RedBlackGreen(), dpi=600)

prot_rna = pickle.load(open('../../matrices/pickles/cr_z-score_raw.pkl', 'rb'))
selection = prot_rna.any(axis=1)
prot_rna = prot_rna[selection,:]

prot = fusion.ObjectType('AAprot', k_select(prot_rna.shape[0]))
genes = fusion.ObjectType('HSgenes', k_select(prot_rna.shape[1]))

rna_expression = pickle.load(open('../../matrices/pickles/MA_exp.pkl', 'rb'))
rna_expression = rna_expression.reshape(rna_expression.shape[0],1)
rna_expression = np.hstack((rna_expression, rna_expression, rna_expression, rna_expression, rna_expression))
expr = fusion.ObjectType('HSexpression', k_select(rna_expression.shape[1]))


In [None]:
relations = [fusion.Relation(prot_rna, prot, genes, name='catrapid')]
cross_validate_heatmaps(relations, savefile='interactions_quorter_CVreconstruction')


In [4]:
relations = [fusion.Relation(prot_rna, prot, genes, name='catrapid'),
             fusion.Relation(rna_expression, genes, expr)]
cross_validate_heatmaps(relations, savefile='interactions_quorter_expr_CVreconstruction')



KeyboardInterrupt: 