# Discriminative Biclustering Algorithm 
Proposed by Odibat & Reddy, 2014 in **Efficient mining of discriminative co-clusters from gene
expression data**

In [6]:
%load_ext pycodestyle_magic

In [33]:
%matplotlib inline
import numpy as np
import math
from matplotlib import pyplot as plt
from sklearn.metrics import consensus_score

In [12]:
# !pip install pycodestyle
# !pip install pycodestyle_magic

### Definition 1 - Coherence Measure H

In [108]:
#%pycodestyle
class CoherenceMeasure(object):
    def __init__(self, data):
        self.data = data
        self.n, self.m = data.shape
        self.xiJ = np.mean(data, axis=1)
        self.xIj = np.mean(data, axis=0)
        self.xIJ = np.mean(data)
        self._H = None

    @property
    def H(self):
        if self._H is None:
            print("Computing coherence measure")
            self._H = self._compute_H()
            print("H value: " + str(self._H))
        return self._H
    
    def _compute_H(self):
        H = 0
        for i in range(self.n):
            for j in range(self.m):
                H += (self.data[i, j] - self.xIj[j] -
                      self.xiJ[i] + self.xIJ)**2
        H *= 1.0/math.fabs(self.m*self.n)
        H = 1 - H
        return H

#### Loading test data for Coherence Measure

In [109]:
import random
data = np.random.random((50, 50))
print(data)

[[  6.88227454e-02   8.69286523e-01   9.39309423e-01 ...,   4.01059206e-01
    8.80684981e-01   2.12975344e-01]
 [  5.88589952e-01   1.82084080e-02   2.77328720e-01 ...,   3.03886824e-01
    7.42037713e-01   5.81847395e-01]
 [  1.77672159e-01   3.55086191e-02   2.57813268e-01 ...,   6.10602703e-04
    5.69565356e-01   7.96284668e-01]
 ..., 
 [  2.68156554e-01   1.04229194e-01   8.51128579e-01 ...,   2.54590950e-01
    9.50749186e-01   5.88278111e-01]
 [  8.86663497e-01   8.82423383e-01   4.03006035e-01 ...,   6.45916512e-01
    7.79249400e-01   3.31922164e-01]
 [  2.99875037e-01   9.01604365e-01   2.63093259e-01 ...,   9.28822904e-01
    3.34648509e-01   8.63327902e-02]]


In [110]:
# Testing Coherence
coherence_measure = CoherenceMeasure(data)
print("H = " + str(coherence_measure.H))

Computing coherence measure
H value: 0.919827674087
H = 0.919827674087


### Definition 2 - Positive and negative correlations

In [166]:
# input: rows x and y and J columns
# output: positive and negative correlations


class PositiveNegativeCorrelation(object):
    def __init__(self, x, y, J):
        self._x = x
        self._y = y
        self._J = J
        self._x_mean = np.mean(x)
        self._y_mean = np.mean(y)
        self._H_pos = None
        self._H_neg = None

    @property
    def H_pos(self):
        if self._H_pos is None:
            # print("Computing H positive...")
            self._H_pos = self._compute_H_pos()
            # print("H positive value: " + str(self._H_pos))
        return self._H_pos

    @property
    def H_neg(self):
        if self._H_neg is None:
            # print("Computing H negative...")
            self._H_neg = self._compute_H_neg()
            # print("H negative value: " + str(self._H_neg))
        return self._H_neg

    def _compute_H_pos(self):
        H_pos = 0
        for j in range(self._J):
            aux = (((self._x[j] - self._x_mean) -
                    (self._y[j] - self._y_mean))/2.0)**2
            H_pos += aux
        H_pos *= 1.0/math.fabs(self._J)
        H_pos = 1 - H_pos
        return H_pos

    def _compute_H_neg(self):
        H_neg = 0
        for j in range(self._J):
            aux = (((self._x[j] - self._x_mean) +
                    (self._y[j] - self._y_mean))/2.0)**2
            H_neg += aux
        H_neg *= 1.0/math.fabs(self._J)
        H_neg = 1 - H_neg
        return H_neg

#### Loading test data for positive and negative correlation

In [167]:
x = np.random.random((5))
y = np.random.random((5))
J = 5
print("Row x " + str(x))
print("Row y " + str(y))
print("J value " + str(J))

Row x [ 0.01056907  0.5596486   0.94952226  0.93760255  0.51601939]
Row y [ 0.58608058  0.35197964  0.54166782  0.65102372  0.85103602]
J value 5


In [168]:
# Testing correlation
positive_negative_correlation = PositiveNegativeCorrelation(x,y,J)
print("H positive " + str(positive_negative_correlation.H_pos))
print()
print("H negative " + str(positive_negative_correlation.H_neg))

H positive 0.963248274688

H negative 0.964474141807


### Definition 3 - Pair-based coherence

In [169]:
#%%pycodestyle

# input: co-cluster X of I rows and J columns
# output: paired-based coherence


class PairBasedCoherence(object):
    def __init__(self, X):
        self._X = X
        self._I, self._J = X.shape
        self._HP = None

    @property
    def HP(self):
        if self._HP is None:
            print("Calculating Pair based coherence..")
            self._HP = self._compute_HP()
            print("Paired based coherence value: " + str(self._HP))
        return self._HP

    def _compute_HP(self):
        HP = 0
        for i in range(self._I):
            for j in range(i+1, self._I):
                x = self._X[i]
                y = self._X[j]
                correlation = PositiveNegativeCorrelation(x, y,self._J)
                H0 = correlation.H_pos + correlation.H_neg
                HP += H0
        HP *= math.fabs(2.0)/(math.fabs(self._I)*(math.fabs(self._I)-1))
        return HP

#### Loading test data for pair-based coherence

In [170]:
data = np.random.random((50, 50))
print(data)

[[ 0.61400046  0.11124706  0.6409931  ...,  0.35719981  0.74372135
   0.28739854]
 [ 0.74982472  0.06211092  0.08651113 ...,  0.3951018   0.58673882
   0.86795709]
 [ 0.60418684  0.04275803  0.55372429 ...,  0.65232087  0.90273129
   0.52181769]
 ..., 
 [ 0.61446937  0.942022    0.67920245 ...,  0.17773549  0.04377659
   0.01637212]
 [ 0.60518174  0.84742634  0.58732348 ...,  0.53257446  0.02864755
   0.51165736]
 [ 0.61770262  0.81762325  0.10826602 ...,  0.08613834  0.24666771
   0.8361224 ]]


In [171]:
pair_based_coherence = PairBasedCoherence(data)
print("H value " + str(pair_based_coherence.HP))

Calculating Pair based coherence..
Paired based coherence value: 1.91810456284
H value 1.91810456284
