In [95]:
import numpy as np
import scipy

X1 = np.array([-2.4, -2.1, -1.7, -1.6, -1.5, -1.2, -1.1, -0.5, 0.0, 0.0,
               0.1, 0.1, 0.1, 0.2, 0.3, 0.4, .8, 1.0, 1.7, 2.0], dtype='float')
X2 = np.array([0.4, -0.3, -1.6, -1.3, 1.5, 1.9, -2.0, 0.1, 0.4, 2.0, -0.7,
               -0.6, 0.0, -0.5, -0.5, 0.9, 0.2, 0.1, -1.0, 0.4], dtype='float')
C = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], dtype='int')
 


$$P(A_1, A_2 | C_k)= P(A_1|C_k)P(A_2|C_k)$$

In [96]:
# Converting the feature space to binary:
A1 = 1.0*(np.absolute(X1) > 1)
A2 = 1.0*(np.absolute(X2) > 1)

# Computing the prior:
prior_C0 = np.count_nonzero(C==0)/len(C)
prior_C1 = np.count_nonzero(C==1)/len(C)

print("\n Prior: ")
print("P(C=1) = ", prior_C1)
print("P(C=0) = ", prior_C0)

# Assuming a Naive Bayes Classifier (features are independents):
C0 = C==0
C1 = C==1

#
p_A1_given_C0 = np.count_nonzero(A1[C0] == 0)/len(C0)
p_A2_given_C0 = np.count_nonzero(A2[C0] == 0)/len(C0)

#
p_A1_given_C1 = np.count_nonzero(A1[C1] == 1)/len(C1)
p_A2_given_C1 = np.count_nonzero(A2[C1] == 1)/len(C1)



print("\n Marginal prob.: ")
print("P(A1|C=1) = ", p_A1_given_C1)
print("P(A2|C=1) = ", p_A2_given_C1)
print("P(A1|C=0) = ", p_A1_given_C0)
print("P(A2|C=0) = ", p_A2_given_C0)


 Prior: 
P(C=1) =  0.5
P(C=0) =  0.5

 Marginal prob.: 
P(A1|C=1) =  0.0
P(A2|C=1) =  0.0
P(A1|C=0) =  0.05
P(A2|C=0) =  0.2


In [97]:

test_point = np.array([.9, .9], dtype='float')
A_test = 1.0 * (np.abs(test_point) > 1)


# P(C0|x) = P(x|C)P(C)/P(x) if independence of the attributes is considered:
# P(C0|x) = P(x1|C)*P(x2|C)*...*P(xD|C)P(C)/P(x)

# P(A1|C0) * P(C0) * P(A2|C1) * P(C0) which is proportional to P(C0|A1, A2)
P_C0_A1A2 = p_A1_given_C0 ** A_test[0] * (1 - p_A1_given_C0)**(1 - A_test[0]) * \
            p_A2_given_C0 ** A_test[1] * (1 - p_A2_given_C0)**(1 - A_test[1]) * \
            prior_C0

P_C1_A1A2 = p_A1_given_C1 ** A_test[0] * (1 - p_A1_given_C1)**(1 - A_test[0]) * \
            p_A2_given_C1 ** A_test[1] * (1 - p_A2_given_C1)**(1 - A_test[1]) * \
            prior_C1


evidence = P_C0_A1A2 + P_C1_A1A2

print(P_C0_A1A2/evidence)
print(P_C1_A1A2/evidence)


0.4318181818181818
0.5681818181818182


In [111]:
data = np.array([4, 5, 5, 6, 12, 14, 15, 15, 16, 17], dtype='float')
test_x = np.array([3, 10, 15], dtype='float')

h = 1.0
p = np.zeros_like(test_x)
for i, x in enumerate(test_x):
    lower_bound = x - h/2
    upper_bound = x + h/2
    
    points_belonging = (data > lower_bound) & (data < upper_bound)
    p[i] = np.count_nonzero(points_belonging)
    
print(p)
print("-------------------=")

h = 3.0
p = np.zeros_like(test_x)
for i, x in enumerate(test_x):
    lower_bound = x - h/2
    upper_bound = x + h/2
    
    points_belonging = (data > lower_bound) & (data < upper_bound)
    p[i] = np.count_nonzero(points_belonging)
    
print(p)
    

[0. 0. 2.]
-------------------=
[1. 0. 4.]
