## Class Exercises. Set 3. Generative Classifiers

 1. In a two-class, two-dimensional classification task the feature vectors are generated by two normal distributions sharing the same covariance matrix: $\begin{bmatrix} 1.1 & 0.3 \\ 0.3 & 1.9 \end{bmatrix}$ and the mean vectors are $\mu_1 = \begin{bmatrix} 0 & 0 \end{bmatrix}^t$ and $\mu_2 = \begin{bmatrix} 3 & 3 \end{bmatrix}^t$, respectively. Classify the vector $\begin{bmatrix} 1 & 2.2 \end{bmatrix}^t$ according to the Bayesian classifier, assuming equal priors

In [13]:
import numpy as np
import scipy

cov_matrix = np.array([[1.1, 0.3], [0.3, 1.9]], dtype='float').transpose()
mean_1 = np.array([0, 0], dtype='float')
mean_2 = np.array([3, 3], dtype='float')

samples = np.array([1, 2.2], dtype='float')

# It is stated that they have the same prior:
prior_1 = prior_2 = .5

likelihood_1 = lambda x : scipy.stats.multivariate_normal(
    mean=mean_1,
    cov=cov_matrix).pdf(x)
likelihood_2 = lambda x : scipy.stats.multivariate_normal(
    mean=mean_2,
    cov=cov_matrix).pdf(x)

for sample in samples:
    den_1 = likelihood_1(sample) * prior_1
    den_2 = likelihood_2(sample) * prior_2
    evidence = den_1 + den_2
    posterior_1 = den_1 / evidence
    posterior_2 = den_2 / evidence
    if posterior_1 > posterior_2:
        idx = 1
    else:
        idx = 2
    print(f"{sample} probably belongs to C_{idx}. P(C_1|X)={posterior_1:.4f} and P(C_2|X)={posterior_2:.4f}")

1.0 probably belongs to C_1. P(C_1|X)=0.8581 and P(C_2|X)=0.1419
2.2 probably belongs to C_2. P(C_1|X)=0.0745 and P(C_2|X)=0.9255


In [37]:
# Import the data for the assigment:
data = np.loadtxt("heightWeightData.csv", delimiter=",", dtype=float)

# Split the data between each atribute:
G = data[:, 0]
H = data[:, 1]
W = data[:, 2]

# Finds which samples are male:
is_male = G == 1

male_data = np.array([H[is_male], W[is_male]])
female_data = np.array([H[1 - is_male ], W[1 - is_male]])

# For the male:
def compute_stats_param(data, idxs):
    means = np.mean(data, axis=1)
    cov_matrix = np.cov(data)
    likelihood = lambda x : scipy.stats.multivariate_normal(
        mean=means,
        cov=cov_matrix).pdf(x)
    prior = np.sum(idxs)/len(idxs)
    return likelihood, prior

likelihood_male, prior_male = compute_stats_param(male_data, is_male)
likelihood_female, prior_female = compute_stats_param(male_data, is_male)

# For the female:

def train_MLE_Gaussian(X):
    mean = np.mean(X, axis=0)
    print(mean)
    cov_matrix = np.cov(X)
    return mean, cov_matrix

def classifier_1(trainning_data, testing_data, label):
    mean_vector_male, cov_matrix_male = train_MLE_Gaussian(trainning_data[:, label])
    mean_vector_female, cov_matrix_female = train_MLE_Gaussian(trainning_data[:, 1 - label])
    
    likelihood_male = lambda x : scipy.stats.multivariate_normal(
        mean=mean_vector_male,
        cov=cov_matrix_male).pdf(x)
    prior_male = np.sum(label)/len(label)
    
    likelihood_female = lambda x : scipy.stats.multivariate_normal(
        mean=mean_vector_female,
        cov=cov_matrix_female).pdf(x)
    prior_female = np.sum(1 - label)/len(1 - label)
    
    
    evidence = prior_male + prior_female
    
    posteriori_male = likelihood_male(testing_data) * prior_male / evidence
    posteriori_female = likelihood_female(testing_data) * prior_female / evidence

    guess_male = (posteriori_male > posteriori_female) * 1
    guess_female = (posteriori_male < posteriori_female) * 2
    
    guesses = guess_male + guess_female
    
    return guesses


n_trainning = 100
n_testing = 110
trainning_data = data[1:, 0:100]
testing_data = data[1:, 100:]
print(is_male[:100].shape)
print(trainning_data[is_male[:100], :])
classifier_1(trainning_data, testing_data, is_male[:100])

(100,)


IndexError: boolean index did not match indexed array along dimension 0; dimension is 209 but corresponding boolean dimension is 100