## Imports

In [1]:
import math
import os
import numpy as np
import clustering

## Datasets

In [2]:
DATA_BASE_PATH = "./data"

FAC_FILE = os.path.join(DATA_BASE_PATH, "mfeat-fac")
FOU_FILE = os.path.join(DATA_BASE_PATH, "mfeat-fou")
KAR_FILE = os.path.join(DATA_BASE_PATH, "mfeat-kar")

fac = np.loadtxt(FAC_FILE, dtype=int)
fou = np.loadtxt(FOU_FILE, dtype=float)
kar = np.loadtxt(KAR_FILE, dtype=float)

## Importando melhor resultado

In [3]:
best_result = clustering.import_best_result("data/melhor_resultado_todas.pickle")
partition, _ = clustering.get_hard_patitions(best_result["membership_degree"])


## Probabilidade à priori das classes

In [4]:
Pw = np.array([len(c)/2000 for c in partition])
Pw, Pw.sum()

(array([0.0465, 0.185 , 0.1655, 0.236 , 0.175 , 0.0255, 0.0025, 0.0955,
        0.0565, 0.012 ]),
 1.0)

## Probabilidade à priori dos exemplos

In [15]:
def calc_p_x_w(xk, d, means, var, cov_matrix):
    coef = math.pow(2*math.pi, d/2) 
    sqrt_det_cov = math.sqrt(np.linalg.det(cov_matrix))
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    diff = xk - means
    exp_exp = np.dot(np.dot(diff.T, inv_cov_matrix), diff)*(-1/2)
    exp_func = math.exp(exp_exp)
    
    return exp_func/(coef * sqrt_det_cov)

# def calc_p_x_w(xk, d, means, var, cov_matrix):
#     coef = math.pow(2*math.pi, d/2) 
#     inv_cov_matrix = np.linalg.inv(cov_matrix)
#     det_inv =  np.linalg.det(inv_cov_matrix)
#     diff = xk - means
#     exp_exp = np.dot(np.dot((-1/2)*(diff.T), inv_cov_matrix), diff)
#     exp_func = np.exp(exp_exp)
    
#     return exp_func/(coef * math.pow(det_inv, 1/2))

def calc_gaussian_bayesian_data(x, partition):
    n,d = x.shape
    qtd_w = len(partition)
    means = np.array([x[idxs].mean(axis=0) for idxs in partition])
#     var = np.array([((x[idxs]-means[i])**2).mean(axis=0) for i, idxs in enumerate(partition)])
    cov_matrix = [np.zeros((d,d)) for _ in range(qtd_w)]
    
    var2 = np.empty((qtd_w, d))
    
    for i in range(qtd_w):
        for j in range(d):
            var2[i,j] = sum([(x[k][j] - means[i,j])**2 for k in partition[i]])/len(partition[i])
            
    for i in range(qtd_w):
        np.fill_diagonal(cov_matrix[i], var2[i])
        print(cov_matrix[i])
    p_x_w = np.empty((qtd_w, n))
    
    for i in range(qtd_w):
        for k in range(n):
            p_x_w[i, k] = calc_p_x_w(x[k], d, means[i], var2[i], cov_matrix[i])
            
        
    
    return p_x_w


## Densidades por dataset

In [16]:
fac_p_x_w = calc_gaussian_bayesian_data(fac, partition)
fou_p_x_w = calc_gaussian_bayesian_data(fou, partition)
kar_p_x_w = calc_gaussian_bayesian_data(kar, partition)

[[5.07043866e+03 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 4.82449948e+03 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 7.84024350e+03 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 6.69325934e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  2.59475084e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 1.41345820e+01]]
[[5.19361657e+03 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 6.03412316e+03 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 1.54249429e+04 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 6.59997078e+00
  0.00000000e+00 0.00000000e+00]

  r = _umath_linalg.det(a, signature=signature)


[[0.00315313 0.         0.         ... 0.         0.         0.        ]
 [0.         0.02706529 0.         ... 0.         0.         0.        ]
 [0.         0.         0.01082173 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.01479747 0.         0.        ]
 [0.         0.         0.         ... 0.         0.00299217 0.        ]
 [0.         0.         0.         ... 0.         0.         0.01225577]]
[[0.00498938 0.         0.         ... 0.         0.         0.        ]
 [0.         0.01869944 0.         ... 0.         0.         0.        ]
 [0.         0.         0.00749734 ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.01246139 0.         0.        ]
 [0.         0.         0.         ... 0.         0.00622261 0.        ]
 [0.         0.         0.         ... 0.         0.         0.00885843]]
[[0.00349716 0.         0.         ... 0.         0.         0.        ]
 [0.         0.00927674 0.         ... 

In [14]:
fac_p_x_w[0,0], fou_p_x_w[0,0], kar_p_x_w[0,0]

(0.0, 8.801645857541105e+42, 4.65396983641508e-58)