# Discrminitive Biclustering

In [4]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import math

from mpl_toolkits.mplot3d import Axes3D

In [5]:
def three_dimensional_correlation(x, y):
    
    _x = x
    _y = y
    mean_point_x = np.mean(_x, axis=0)
    mean_point_y = np.mean(_y, axis=0)
    _J = len(_x)
    
    acc = 0
    
    for j in range(0,_J):
        
        x_axis_diff = _x[j][0] - mean_point_x[0]
        y_axis_diff = _x[j][1] - mean_point_x[1]
        
        x_distance = math.hypot(x_axis_diff, y_axis_diff)
        
        x_axis_diff = _y[j][0] - mean_point_y[0]
        y_axis_diff = _y[j][1] - mean_point_y[1]
        
        y_distance = math.hypot(x_axis_diff, y_axis_diff)
        
        diff_term = (x_distance - y_distance) / 2.0
        
        diff_term = diff_term ** 2.0
        
        acc += diff_term
        
    return (1-acc/abs(_J))

In [41]:
def three_dimensional_pair_coherence(X):
    
    _I = len(X)
    HP = 0
    
    for i in range(0, len(X)):
    
        for j in range(i+1, len(X)):
            
            if i == j:
                
                break
                
            x = X[i]
            y = X[j]
            correlation = three_dimensional_correlation(x,y)
            HP += correlation
            
    HP *= math.fabs(2.0)/(math.fabs(_I)*(math.fabs(_I)-1.0)) if _I > 1 else 0
    
    return HP

In [42]:
_x = np.array([[0.0,0.0],[0.25,0.25],[0.5,0.5],[0.25,0.25],[0.0,0.0],[-0.25,-0.25]])
test_vectors = np.array([_x] * 25)
test_data = []

for i in range(0,100):
    
    test_data.append(np.random.random((len(_x),2)) * 2 - 1)

final_test = np.concatenate([np.array(test_data),test_vectors])

In [52]:
final_test[len(final_test)-25:]

array([[[ 0.  ,  0.  ],
        [ 0.25,  0.25],
        [ 0.5 ,  0.5 ],
        [ 0.25,  0.25],
        [ 0.  ,  0.  ],
        [-0.25, -0.25]],

       [[ 0.  ,  0.  ],
        [ 0.25,  0.25],
        [ 0.5 ,  0.5 ],
        [ 0.25,  0.25],
        [ 0.  ,  0.  ],
        [-0.25, -0.25]],

       [[ 0.  ,  0.  ],
        [ 0.25,  0.25],
        [ 0.5 ,  0.5 ],
        [ 0.25,  0.25],
        [ 0.  ,  0.  ],
        [-0.25, -0.25]],

       [[ 0.  ,  0.  ],
        [ 0.25,  0.25],
        [ 0.5 ,  0.5 ],
        [ 0.25,  0.25],
        [ 0.  ,  0.  ],
        [-0.25, -0.25]],

       [[ 0.  ,  0.  ],
        [ 0.25,  0.25],
        [ 0.5 ,  0.5 ],
        [ 0.25,  0.25],
        [ 0.  ,  0.  ],
        [-0.25, -0.25]],

       [[ 0.  ,  0.  ],
        [ 0.25,  0.25],
        [ 0.5 ,  0.5 ],
        [ 0.25,  0.25],
        [ 0.  ,  0.  ],
        [-0.25, -0.25]],

       [[ 0.  ,  0.  ],
        [ 0.25,  0.25],
        [ 0.5 ,  0.5 ],
        [ 0.25,  0.25],
        [ 0.  ,  0.  ],
    

In [38]:
len(final_test)

250

In [27]:
avg_trajectory = np.array([np.mean(final_test[:,i], axis = 0) for i in range(final_test.shape[1])])
avg_trajectory

array([[ 0.00595623,  0.02869055],
       [ 0.16032397,  0.15246542],
       [ 0.32841348,  0.30318653],
       [ 0.13409034,  0.15495344],
       [-0.01318911,  0.0206617 ],
       [-0.14092852, -0.14330522]])

In [28]:
def compute_average_trajectory(bicluster):
    
    average_trajectory = np.array([np.mean(bicluster.data[:,i], axis = 0) 
                                   for i in range(bicluster.data.shape[1])])
    
    return average_trajectory

In [67]:
def compute_average_coherence(avg_trajectory, bicluster):
    
    acc = 0.0
    data = bicluster.data
    for i in range(data.shape[0]-1):
        
        acc += three_dimensional_correlation(avg_trajectory, data[i])
        
    return acc/(data.shape[0]*1.0)

In [92]:
def discriminative_score(pos_data, bicluster, neg_data):
    
    avg_trajectory = compute_average_trajectory(bicluster)
    avg_coherence = compute_average_coherence(avg_trajectory, bicluster)
    pos_rate = 1.0 * (len(bicluster.rho)/len(pos_data))
    test_data = neg_data[:,bicluster.gamma]
    neg_instances = 0
    
    for i in range(test_data.shape[1]):
        
        correlation = three_dimensional_correlation(test_data[i], avg_trajectory)
        
        if avg_coherence <= correlation:
            
            neg_instances += 1
        
    neg_rate = 1.0 * (neg_instances / len(neg_data))
    
    return (pos_rate >= neg_rate)

# Test data

In [93]:
class Bicluster(object):
    
    def __init__(self, data, rho, gamma):
        
        self.rho = rho
        self.gamma = gamma
        self.data = data[np.ix_(rho, gamma)]
        

In [94]:
neg_data = []

for i in range(0,100):
    
    neg_data.append(np.random.random((len(_x),2)) * 2 - 1)

neg_data = np.array(neg_data)

In [95]:
rho = np.arange(len(final_test)-10, len(final_test))
gamma = np.array([0,1,2])

In [96]:
bicluster = Bicluster(final_test, rho,gamma)

In [97]:
avg_trajectory = compute_average_trajectory(bicluster)

In [98]:
compute_average_coherence(avg_trajectory, bicluster)

0.9

In [99]:
discriminative_score(final_test, bicluster, neg_data)

True