# Discriminative Biclustering Algorithm 
Proposed by Odibat & Reddy, 2014 in **Efficient mining of discriminative co-clusters from gene
expression data**

In [23]:
%load_ext pycodestyle_magic

In [31]:
%matplotlib inline
import numpy as np
import math
from matplotlib import pyplot as plt
from sklearn.metrics import consensus_score

In [12]:
# !pip install pycodestyle
# !pip install pycodestyle_magic

### Definition 1 - Coherence Measure H

In [32]:
#%pycodestyle
class CoherenceMeasure(object):
    def __init__(self, data):
        self.data = data
        self.n, self.m = data.shape
        self.xiJ = np.mean(data, axis=1)
        self.xIj = np.mean(data, axis=0)
        self.xIJ = np.mean(data)
        self._H = None

    @property
    def H(self):
        if self._H is None:
            print("Computing coherence measure")
            self._H = self._compute_H()
            print("H value: " + str(self._H))
        return self._H
    
    def _compute_H(self):
        H = 0
        for i in range(self.n):
            for j in range(self.m):
                H += (self.data[i, j] - self.xIj[j] -
                      self.xiJ[i] + self.xIJ)**2
        H *= 1.0/math.fabs(self.m*self.n)
        H = 1 - H
        return H

#### Loading test data for Coherence Measure

In [33]:
import random
data = np.random.random((50, 50))
print(data)

[[ 0.35779111  0.1981447   0.75484529 ...,  0.12677929  0.80098934
   0.63717344]
 [ 0.33662312  0.82382991  0.20018157 ...,  0.05626579  0.05358202
   0.5585882 ]
 [ 0.35426033  0.32118346  0.14860435 ...,  0.68286624  0.32424369
   0.18879278]
 ..., 
 [ 0.22720169  0.17157908  0.34526332 ...,  0.37069878  0.2894646
   0.06601885]
 [ 0.4601971   0.91401153  0.64542208 ...,  0.15425447  0.62457906
   0.81089089]
 [ 0.06262623  0.89912391  0.8359954  ...,  0.70600938  0.30561598
   0.01092593]]


In [34]:
# Testing Coherence
coherence_measure = CoherenceMeasure(data)
print("H = " + str(coherence_measure.H))

Computing coherence measure
H value: 0.92075954071
H = 0.92075954071


### Definition 2 - Positive and negative correlations

In [35]:
# input: rows x and y and J columns
# output: positive and negative correlations


class PositiveNegativeCorrelation(object):
    def __init__(self, x, y, J):
        self._x = x
        self._y = y
        self._J = J
        self._x_mean = np.mean(x)
        self._y_mean = np.mean(y)
        self._H_pos = None
        self._H_neg = None

    @property
    def H_pos(self):
        if self._H_pos is None:
            # print("Computing H positive...")
            self._H_pos = self._compute_H_pos()
            # print("H positive value: " + str(self._H_pos))
        return self._H_pos

    @property
    def H_neg(self):
        if self._H_neg is None:
            # print("Computing H negative...")
            self._H_neg = self._compute_H_neg()
            # print("H negative value: " + str(self._H_neg))
        return self._H_neg

    def _compute_H_pos(self):
        H_pos = 0
        for j in range(self._J):
            aux = (((self._x[j] - self._x_mean) -
                    (self._y[j] - self._y_mean))/2.0)**2
            H_pos += aux
        H_pos *= 1.0/math.fabs(self._J)
        H_pos = 1 - H_pos
        return H_pos

    def _compute_H_neg(self):
        H_neg = 0
        for j in range(self._J):
            aux = (((self._x[j] - self._x_mean) +
                    (self._y[j] - self._y_mean))/2.0)**2
            H_neg += aux
        H_neg *= 1.0/math.fabs(self._J)
        H_neg = 1 - H_neg
        return H_neg

#### Loading test data for positive and negative correlation

In [36]:
x = np.random.random((5))
y = np.random.random((5))
J = 5
print("Row x " + str(x))
print("Row y " + str(y))
print("J value " + str(J))

Row x [ 0.26297461  0.31455552  0.84407633  0.22063837  0.02034884]
Row y [ 0.2444782   0.77392845  0.88506412  0.46420098  0.08163258]
J value 5


In [37]:
# Testing correlation
positive_negative_correlation = PositiveNegativeCorrelation(x,y,J)
print("H positive " + str(positive_negative_correlation.H_pos))
print()
print("H negative " + str(positive_negative_correlation.H_neg))

H positive 0.992382934068

H negative 0.923495174778


### Definition 3 - Pair-based coherence

In [38]:
#%%pycodestyle

# input: co-cluster X of I rows and J columns
# output: paired-based coherence


class PairBasedCoherence(object):
    def __init__(self, X):
        self._X = X
        self._I, self._J = X.shape
        self._HP = None

    @property
    def HP(self):
        if self._HP is None:
            print("Calculating Pair based coherence..")
            self._HP = self._compute_HP()
            print("Paired based coherence value: " + str(self._HP))
        return self._HP

    def _compute_HP(self):
        HP = 0
        for i in range(self._I):
            for j in range(i+1, self._I):
                x = self._X[i]
                y = self._X[j]
                correlation = PositiveNegativeCorrelation(x, y,self._J)
                H0 = correlation.H_pos + correlation.H_neg
                HP += H0
        HP *= math.fabs(2.0)/(math.fabs(self._I)*(math.fabs(self._I)-1))
        return HP

#### Loading test data for pair-based coherence

In [39]:
data = np.random.random((50, 50))
print(data)

[[ 0.15199144  0.63008937  0.90358693 ...,  0.29760182  0.65744572
   0.40240051]
 [ 0.7794862   0.83203971  0.08689127 ...,  0.8315592   0.71293676
   0.02716718]
 [ 0.64971567  0.48254974  0.76397678 ...,  0.69962595  0.83481372
   0.51719683]
 ..., 
 [ 0.49933158  0.87221137  0.64874624 ...,  0.22253926  0.74915835
   0.40616362]
 [ 0.30644487  0.69142799  0.31984516 ...,  0.21081635  0.42060735
   0.21811896]
 [ 0.85687281  0.39885066  0.56610978 ...,  0.39736494  0.49379715
   0.69550967]]


In [40]:
pair_based_coherence = PairBasedCoherence(data)
print("H value " + str(pair_based_coherence.HP))

Calculating Pair based coherence..
Paired based coherence value: 1.91560683438
H value 1.91560683438


### Coherence for a new z in in X

In [41]:
%%latex
Define H for a new term 'z' in X
$$
H_{1}(I,J,X,z) =H_{0}(I,J,X) \cdot \frac{(I-1)}{(I+1)} + \frac{|2|}{(I)(I+1)} \cdot \sum_{x \epsilon X} {h(x,z,J)}
$$

<IPython.core.display.Latex object>

### RAPOOC

This algorithm is proposed to efficiently extract the most coherent and large co-clusters that area arbitrarily positioned in the data matrix.



#### Algorithm 1 RAPOOC (D,k,l,K)
Input: Data matrix D, number of row clusters (k), number of column clusters (l), number of optimized co-clusters (K)

Output: A set of K co-clusters({X})

In [47]:
%%pycodestyle

class Rapooc(object):
    def __init__(self, D, k, l, K):
        self._D = D
        self._k = k
        self._l = l
        self._K = K
        self._rho = np.arange(D.shape[0])
        self._gamma = np.arange(D.shape[1])

    def initialize(self):
        while (i<k or k <l):
            if i<k:
                

50
