# Discriminative Biclustering Algorithm 
Proposed by Odibat & Reddy, 2014 in **Efficient mining of discriminative co-clusters from gene
expression data**

In [23]:
%load_ext pycodestyle_magic

In [3]:
%matplotlib inline
import numpy as np
import math
from matplotlib import pyplot as plt
from sklearn.metrics import consensus_score

In [12]:
# !pip install pycodestyle
# !pip install pycodestyle_magic

### Definition 1 - Coherence Measure H

In [4]:
#%pycodestyle
class CoherenceMeasure(object):
    def __init__(self, data):
        self.data = data
        self.n, self.m = data.shape
        self.xiJ = np.mean(data, axis=1)
        self.xIj = np.mean(data, axis=0)
        self.xIJ = np.mean(data)
        self._H = None

    @property
    def H(self):
        if self._H is None:
            print("Computing coherence measure")
            self._H = self._compute_H()
            print("H value: " + str(self._H))
        return self._H
    
    def _compute_H(self):
        H = 0
        for i in range(self.n):
            for j in range(self.m):
                H += (self.data[i, j] - self.xIj[j] -
                      self.xiJ[i] + self.xIJ)**2
        H *= 1.0/math.fabs(self.m*self.n)
        H = 1 - H
        return H

#### Loading test data for Coherence Measure

In [5]:
import random
data = np.random.random((50, 50))
print(data)

[[ 0.04914531  0.9572028   0.94535936 ...,  0.85891268  0.41304448
   0.93116258]
 [ 0.84789067  0.96371668  0.97077777 ...,  0.13808962  0.79932262
   0.97079347]
 [ 0.18173049  0.31646238  0.25384354 ...,  0.41142627  0.56415205
   0.53379538]
 ..., 
 [ 0.29434825  0.62732374  0.76449198 ...,  0.11581151  0.83476087
   0.02184251]
 [ 0.8341904   0.26331017  0.12202982 ...,  0.69667083  0.05969135
   0.20069536]
 [ 0.16279857  0.41871593  0.5883461  ...,  0.35590374  0.84861866
   0.8667552 ]]


In [6]:
# Testing Coherence
coherence_measure = CoherenceMeasure(data)
print("H = " + str(coherence_measure.H))

Computing coherence measure
H value: 0.919446373795
H = 0.919446373795


### Definition 2 - Positive and negative correlations

In [7]:
# input: rows x and y and J columns
# output: positive and negative correlations


class PositiveNegativeCorrelation(object):
    def __init__(self, x, y, J):
        self._x = x
        self._y = y
        self._J = J
        self._x_mean = np.mean(x)
        self._y_mean = np.mean(y)
        self._H_pos = None
        self._H_neg = None

    @property
    def H_pos(self):
        if self._H_pos is None:
            # print("Computing H positive...")
            self._H_pos = self._compute_H_pos()
            # print("H positive value: " + str(self._H_pos))
        return self._H_pos

    @property
    def H_neg(self):
        if self._H_neg is None:
            # print("Computing H negative...")
            self._H_neg = self._compute_H_neg()
            # print("H negative value: " + str(self._H_neg))
        return self._H_neg

    def _compute_H_pos(self):
        H_pos = 0
        for j in range(self._J):
            aux = (((self._x[j] - self._x_mean) -
                    (self._y[j] - self._y_mean))/2.0)**2
            H_pos += aux
        H_pos *= 1.0/math.fabs(self._J)
        H_pos = 1 - H_pos
        return H_pos

    def _compute_H_neg(self):
        H_neg = 0
        for j in range(self._J):
            aux = (((self._x[j] - self._x_mean) +
                    (self._y[j] - self._y_mean))/2.0)**2
            H_neg += aux
        H_neg *= 1.0/math.fabs(self._J)
        H_neg = 1 - H_neg
        return H_neg

#### Loading test data for positive and negative correlation

In [8]:
x = np.random.random((5))
y = np.random.random((5))
J = 5
print("Row x " + str(x))
print("Row y " + str(y))
print("J value " + str(J))

Row x [ 0.84186975  0.19487128  0.94875558  0.60870315  0.42080347]
Row y [ 0.608898    0.64420469  0.07367384  0.89090051  0.79754973]
J value 5


In [9]:
# Testing correlation
positive_negative_correlation = PositiveNegativeCorrelation(x,y,J)
print("H positive " + str(positive_negative_correlation.H_pos))
print()
print("H negative " + str(positive_negative_correlation.H_neg))

H positive 0.93782412588

H negative 0.984270082487


### Definition 3 - Pair-based coherence

In [10]:
#%%pycodestyle

# input: co-cluster X of I rows and J columns
# output: paired-based coherence


class PairBasedCoherence(object):
    def __init__(self, X):
        self._X = X
        self._I, self._J = X.shape
        self._HP = None

    @property
    def HP(self):
        if self._HP is None:
            print("Calculating Pair based coherence..")
            self._HP = self._compute_HP()
            print("Paired based coherence value: " + str(self._HP))
        return self._HP

    def _compute_HP(self):
        HP = 0
        for i in range(self._I):
            for j in range(i+1, self._I):
                x = self._X[i]
                y = self._X[j]
                correlation = PositiveNegativeCorrelation(x, y,self._J)
                H0 = correlation.H_pos + correlation.H_neg
                HP += H0
        HP *= math.fabs(2.0)/(math.fabs(self._I)*(math.fabs(self._I)-1))
        return HP

#### Loading test data for pair-based coherence

In [11]:
data = np.random.random((50, 50))
print(data)

[[ 0.67242523  0.62890579  0.43041429 ...,  0.79808678  0.92199503
   0.89328871]
 [ 0.16262588  0.4654826   0.92642096 ...,  0.18213544  0.95639     0.24270124]
 [ 0.38567372  0.00384511  0.76428096 ...,  0.13829024  0.14000618
   0.48283193]
 ..., 
 [ 0.04142936  0.45547228  0.46725902 ...,  0.20213516  0.77450168
   0.14050489]
 [ 0.15023154  0.89688193  0.19546222 ...,  0.71165114  0.24031153
   0.09355635]
 [ 0.77956938  0.37543933  0.38609572 ...,  0.37450869  0.57388332
   0.86744984]]


In [12]:
pair_based_coherence = PairBasedCoherence(data)
print("H value " + str(pair_based_coherence.HP))

Calculating Pair based coherence..
Paired based coherence value: 1.91838249243
H value 1.91838249243


### Coherence for a new z in in X

In [22]:
%%latex
Define H for a new term 'z' in X
$$
H_{1}(I,J,X,z) =H_{0}(I,J,X) \cdot \frac{(I-1)}{(I+1)} + \frac{|2|}{(I)(I+1)} \cdot \sum_{x \epsilon X} {h(x,z,J)}
$$

<IPython.core.display.Latex object>

### RAPOOC

This algorithm is proposed to efficiently extract the most coherent and large co-clusters that area arbitrarily positioned in the data matrix.



#### Algorithm 1 RAPOOC (D,k,l,K)
Input: Data matrix D, number of row clusters (k), number of column clusters (l), number of optimized co-clusters (K)

Output: A set of K co-clusters({X})

In [25]:
%%pycodestyle
class Rapooc(object):
    def __init__(self,)

UsageError: %%pycodestyle is a cell magic, but the cell body is empty.
