#Hidden Naive Bayes

In [2]:
import numpy as np  
from collections import defaultdict
import scipy.spatial as ss
from scipy.special import digamma,gamma
from math import log,pi
import numpy.random as nr
import random
import math

In [34]:
def P_c(labels):
    '''Compute and Return prior probabilities for different classes in the dataset'''
    '''
    I/P : target labels for different instances [3,2,3,2,2,3,1,1,3]
    O/P : 1 ->  2 /9 , 2 -> 3/9, 3 -> 4/9
    '''
    total_examples = len(labels)

    prior_probs = defaultdict(lambda : 0.0)

    for x in labels:
        prior_probs[x] += 1

    for k in prior_probs.keys():
        prior_probs[k] = float(prior_probs[k])/total_examples

    return prior_probs

In [35]:
P_c([3,2,3,2,2,3,1,1,3])

defaultdict(<function <lambda> at 0x00000000046FF198>, {1: 0.2222222222222222, 2: 0.3333333333333333, 3: 0.4444444444444444})

In [36]:
def P_x_y_z(XYZ_mat,x,y,z,e):
    '''
    Computes P(x,y,z) and returns the value
    I/P : Input matrix with 3 columns - X,Y,Z
    O/P : P(X=x,Y=y,Z=z)
    '''
    
    normalize_factor = XYZ_mat.shape[0]    
    numerator = 0.0
    
    for row in XYZ_mat:
        if row[0] == x and row[1] == y and row[2] == z:
            numerator += 1
    
    return float(numerator + e)/(normalize_factor + (2*e))
    

In [37]:
XYZ_mat = np.array([[1,2,3],[4,5,6],[7,8,9],[4,5,6]])
print XYZ_mat

P_x_y_z(XYZ_mat,4,5,6,1)

[[1 2 3]
 [4 5 6]
 [7 8 9]
 [4 5 6]]


0.5

In [38]:
def P_x_y_given_z(XYZ_mat,x,y,z,e):
    '''
    Computes P(x,y|z) and returns the value
    I/P : Input matrix with 3 columns - X,Y,Z
    O/P : P(X=x,Y=y|Z=z)
    '''    
    normalize_factor = 0
    numerator = 0
    
    for row in XYZ_mat:
        if row[2] == z:
            normalize_factor += 1            
            if row[0] == x and row[1] == y:
                numerator += 1
    
    return float(numerator + e)/ (normalize_factor + (2*e))

In [39]:
XYZ_mat = np.array([[1,2,3],[4,5,6],[7,8,9],[4,5,2],[3,5,6],[4,5,6]])
print XYZ_mat

P_x_y_given_z(XYZ_mat,4,5,6,1)

[[1 2 3]
 [4 5 6]
 [7 8 9]
 [4 5 2]
 [3 5 6]
 [4 5 6]]


0.6

In [40]:
def P_x_given_z(XZ_mat,x,z,e):
    '''
    Computes P(x|z) and returns the value
    I/P : Input matrix with 2 columns - X,Z
    O/P : P(X=x|Z=z)
    '''
    normalize_factor = 0
    numerator = 0
    
    for row in XZ_mat:
        if row[1] == z:            
            normalize_factor += 1
            if row[0] == x:
                numerator += 1            
    
    return float(numerator+e)/(normalize_factor + (2*e))

In [41]:
XZ_mat = np.array([[1,2],[1,2],[5,9],[1,6]])
print XZ_mat

P_x_given_z(XZ_mat,1,2,1)

[[1 2]
 [1 2]
 [5 9]
 [1 6]]


0.75

In [42]:
def P_x_given_y_z(XYZ_mat,x,y,z,e):
    '''
    Computes P(x|z) and returns the value
    I/P : Input matrix with 3 columns - X,Y,Z
    O/P : P(X=x|Y=y,Z=z)
    '''
    normalize_factor = 0
    numerator = 0
    
    for row in XYZ_mat:
        if row[1] == y and row[2] == z:            
            normalize_factor += 1
            if row[0] == x:
                numerator += 1            
    
    return float(numerator+e)/(normalize_factor + (2*e))

In [43]:
XYZ_mat = np.array([[1,2,3],[1,2,3],[5,2,3],[1,3,2]])
print XYZ_mat

P_x_given_y_z(XYZ_mat,1,2,3,1)

[[1 2 3]
 [1 2 3]
 [5 2 3]
 [1 3 2]]


0.6

$I_{p}(X;Y|Z) = \sum_{x,y,z}P(x,y,z)log\frac{P(x,y|z)}{P(x|z)P(y|z)}$

In [44]:
def np_col_to_list(np_col):
    return [x[0] for x in np_col]

In [45]:
def cond_mutual_info(XYZ_mat,e):
    X = np_col_to_list(XYZ_mat[:,0:1])
    Y = np_col_to_list(XYZ_mat[:,1:2])
    Z = np_col_to_list(XYZ_mat[:,2:3])
    
    unique_X = set(X)
    unique_Y = set(Y)
    unique_Z = set(Z)
    
    cond_mutual_info = 0
    
    XZ_mat = np.delete(XYZ_mat,1,1)
    YZ_mat = np.delete(XYZ_mat,0,1)
    
    for x in unique_X:
        for y in unique_Y:
            for z in unique_Z:
                part1 = P_x_y_z(XYZ_mat,x,y,z,e)                
                part2_numer = P_x_y_given_z(XYZ_mat,x,y,z,e)
                part2_denom = P_x_given_z(XZ_mat,x,z,e) * P_x_given_z(YZ_mat,y,z,e)                
                part2 = math.log(float(part2_numer)/part2_denom)
                
                cond_mutual_info += part1 * part2
    return cond_mutual_info

In [46]:
x = np.array([[0,1,1],[2,4,2]])
cond_mutual_info(x,0.000001)

1.481549924273279e-05

In [47]:
def delete_all_except_ij(i,j,data_matrix):
    '''
    Delete all columns except i,j
    ''' 
    no_of_attr = data_matrix.shape[1]-1
    
    col_id_list = []
    for col_id in range(0,no_of_attr):
        if col_id != i and col_id != j:            
            col_id_list.append(col_id)
                
    return np.delete(data_matrix,col_id_list,1)

In [48]:
XYZ_mat = np.array([[1,2,3,7,1],[1,2,3,8,1],[5,2,3,4,2],[1,3,2,9,2]])
print XYZ_mat

delete_all_except_ij(1,3,XYZ_mat)

[[1 2 3 7 1]
 [1 2 3 8 1]
 [5 2 3 4 2]
 [1 3 2 9 2]]


array([[2, 7, 1],
       [2, 8, 1],
       [2, 4, 2],
       [3, 9, 2]])

In [49]:
def Wij(i,j,data_matrix,e):
    
    XYZ_mat = delete_all_except_ij(i,j,data_matrix)
    num = cond_mutual_info(XYZ_mat,e)
    
    den = 0
    no_of_attr = data_matrix.shape[1]-1
    
    for col_id in range(0,no_of_attr):
        if col_id != i:
            XYZ_mat = delete_all_except_ij(i,col_id,data_matrix)
            den += cond_mutual_info(XYZ_mat,e)
    
    return float(num)/den

In [53]:
XYZ_mat = np.array([[1,2,3,7,1],[1,2,3,8,1],[5,2,3,4,2],[1,3,2,9,2]])
print XYZ_mat

Wij(0,1,XYZ_mat,0.005)

[[1 2 3 7 1]
 [1 2 3 8 1]
 [5 2 3 4 2]
 [1 3 2 9 2]]


0.3292997271179316

In [66]:
def train_Wij_matrix(data_matrix,e):
    '''Get the weights matrix. Its dimension will be n*n where n is the number of features'''
    no_of_attr = data_matrix.shape[1]-1
    
    W_ij_mat = np.zeros((no_of_attr,no_of_attr))
    
    for x in range(0,no_of_attr):
        for y in range(0,no_of_attr):
            if x!=y:                
                W_ij_mat[x,y] = Wij(x,y,data_matrix,e)
                print x, y
    return W_ij_mat

In [67]:
XYZ_mat = np.loadtxt('iris.data')

train_Wij_matrix(XYZ_mat,0.005)

0 1
0 2
0 3
1 0
1 2
1 3
2 0
2 1
2 3
3 0
3 1
3 2


array([[ 0.        ,  0.32723632,  0.41888009,  0.25388359],
       [ 0.38814902,  0.        ,  0.36821267,  0.24363831],
       [ 0.41839328,  0.31006783,  0.        ,  0.27153888],
       [ 0.3472424 ,  0.28093554,  0.37182206,  0.        ]])