In [13]:
import math
import collections 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, mean_squared_error

In [14]:
def pre_prob(y, classes):
    y_dict = collections.Counter(y)
    pre_probab = np.ones(len(classes))
    for i in range(len(classes)):
        pre_probab[i] = float(y_dict[i] / float(y.shape[0]))
    return pre_probab

In [15]:
def mean_var(X, y, classes):
    n_features = X.shape[1]

    m = np.ones((len(classes), n_features))
    v = np.ones((len(classes), n_features))

    n = []
    w = []
    
    
    for c in classes:
        n.append(y[y == c].shape[0])
        
        w.append(np.ones((n[c], n_features)))

        k = 0
        for i in range(0, X.shape[0]):
            if y[i] == c:
                w[c][k] = X[i]
                k = k + 1

        for j in range(0, n_features):
            m[c][j] = np.mean(w[c].T[j])

            eps = 1e-9 * np.var(X, axis=0).max()

            try:
                v[c][j] = np.var(w[c].T[j]) * ((n[c] / (n[c] - 1)))
            except:
                v[c][j] = np.var(w[c].T[j]) * ((n[c] / (1e-9)))

            if v[c][j]==0:

                v[c][j]=eps

    return m, v  # mean and variance 


In [16]:
def prob_feature_class(m, v, x, classes):
    n_features = m.shape[1]
    pfc = np.ones(len(classes))
    for i in range(len(classes)):
        product = 1
        for j in range(0, n_features):
            product = product * (1. / float(math.sqrt(2 * np.pi * v[i][j]))) * np.exp(-0.5 * pow((x[j] - m[i][j]), 2) / float(v[i][j]))

        pfc[i] = product
    return pfc


In [17]:
def GNB(X, y, X_test, Y_test, classes, name):

    out_predic = []

    m, v = mean_var(X, y, classes)

    idx = 0
    for x in X_test:
        #try:
        pfc = prob_feature_class(m, v, x, classes)
        #except:
            #pfc = np.array([1, 1, 1, 1, 1,1])
        #	pfc = np.ones(len(classes))

        pre_probab = pre_prob(y, classes)
        pcf = np.ones(len(classes))
        total_prob = 0
        for i in range(len(classes)):
            total_prob = total_prob + (pfc[i] * pre_probab[i])
        for i in range(len(classes)):
            pcf[i] = (pfc[i] * pre_probab[i]) / total_prob
        prediction = int(pcf.argmax())
        perc = float(pcf.max())

        out_predic.append(prediction)


        per = float((idx * 100) / len(X_test))
        #print ("Class: {} Precision: {:.6f} --->{:.0f}%".format(prediction, perc, per))
        idx += 1

    pred_Y = np.array(out_predic)
    accuracy = accuracy_score(Y_test, pred_Y)
    mse = mean_squared_error(Y_test, pred_Y)

    print("{} - Acuracy: {:.6f}% Loss: {:.6f}".format(name, accuracy*100, mse))
    return accuracy


In [18]:
scaler = MinMaxScaler(feature_range=(0, 1))

X_train1 = np.loadtxt("read_data/mfeat-fou")[:,:]
X_train2 = np.loadtxt("read_data/mfeat-fac")[:,:]
X_train3 = np.loadtxt("read_data/mfeat-kar")[:,:]

X_train1 = scaler.fit_transform(X_train1)
X_train2 = scaler.fit_transform(X_train2)
X_train3 = scaler.fit_transform(X_train3)
Y_train = np.array(np.loadtxt("aux_data/u_crisp.txt"), dtype=np.int16)[:]

In [19]:

Y_train=np.where(Y_train==6, 5, Y_train) 

In [20]:
class_dict = collections.Counter(Y_train)
classes = sorted(class_dict.keys())
print(class_dict)
print(classes)

Counter({1: 605, 4: 493, 2: 371, 3: 283, 5: 134, 0: 114})
[0, 1, 2, 3, 4, 5]


In [21]:
#GNB(X_train, Y_train, X_train, Y_train, classes)


StratifiedKFold==============================================================

In [22]:
from sklearn.model_selection import StratifiedKFold

In [23]:
skf = StratifiedKFold(n_splits=10, shuffle = True)
skf.get_n_splits(X_train2, Y_train)

10

In [24]:
list_executions=[]
# Execution
for exe_id in range(30):  
    print("Execution: {}".format(exe_id))
    
    posterior_probab=[]
    tracker=[]
    for train_idx, test_idx in skf.split(X_train2, Y_train):

        x_train1, x_test1 = X_train1[train_idx], X_train1[test_idx]
        x_train2, x_test2 = X_train2[train_idx], X_train2[test_idx]
        x_train3, x_test3 = X_train3[train_idx], X_train3[test_idx]
        y_train, y_test = Y_train[train_idx], Y_train[test_idx]

        view1=GNB(x_train1, y_train, x_test1, y_test, classes, "View1")
        view2=GNB(x_train2, y_train, x_test2, y_test, classes, "View2")
        view3=GNB(x_train3, y_train, x_test3, y_test, classes, "View3")

        summ=view1+view2+view3
        posterior_probab.append(summ)
        tracker.append((view1, view2,view3))

    posterior_probab=np.array(posterior_probab)
    best_sum=np.max(posterior_probab)
    v1,v2,v3=tracker[np.argmax(posterior_probab)]
    
    print("Max sum: {:.6f}".format(best_sum))
    print("Best sum view1: {:.6f} view2: {:.6f} view3: {:.6f}".format(v1,v2,v3))
    
    list_executions.append(best_sum)
    
np.savetxt("aux_data/gnb_executions.txt", np.array(list_executions))
    

Execution: 0
View1 - Acuracy: 50.980392% Loss: 2.495098
View2 - Acuracy: 68.137255% Loss: 1.686275
View3 - Acuracy: 70.098039% Loss: 1.642157
View1 - Acuracy: 50.738916% Loss: 2.901478
View2 - Acuracy: 72.906404% Loss: 2.034483
View3 - Acuracy: 73.399015% Loss: 1.546798
View1 - Acuracy: 51.724138% Loss: 2.881773
View2 - Acuracy: 71.921182% Loss: 1.753695
View3 - Acuracy: 66.995074% Loss: 2.083744
View1 - Acuracy: 52.238806% Loss: 2.915423
View2 - Acuracy: 73.631841% Loss: 1.184080
View3 - Acuracy: 72.636816% Loss: 1.497512
View1 - Acuracy: 56.783920% Loss: 2.738693
View2 - Acuracy: 70.351759% Loss: 2.050251
View3 - Acuracy: 66.834171% Loss: 2.381910
View1 - Acuracy: 45.454545% Loss: 3.348485
View2 - Acuracy: 71.717172% Loss: 2.075758
View3 - Acuracy: 68.181818% Loss: 2.464646
View1 - Acuracy: 51.010101% Loss: 2.828283
View2 - Acuracy: 70.202020% Loss: 1.813131
View3 - Acuracy: 68.686869% Loss: 2.545455
View1 - Acuracy: 53.535354% Loss: 3.176768
View2 - Acuracy: 77.272727% Loss: 1.80303

View1 - Acuracy: 50.490196% Loss: 2.823529
View2 - Acuracy: 75.490196% Loss: 1.504902
View3 - Acuracy: 68.627451% Loss: 1.769608
View1 - Acuracy: 51.724138% Loss: 3.137931
View2 - Acuracy: 69.950739% Loss: 1.507389
View3 - Acuracy: 70.935961% Loss: 1.512315
View1 - Acuracy: 53.694581% Loss: 2.517241
View2 - Acuracy: 74.876847% Loss: 1.620690
View3 - Acuracy: 70.443350% Loss: 1.729064
View1 - Acuracy: 52.238806% Loss: 2.820896
View2 - Acuracy: 73.134328% Loss: 1.716418
View3 - Acuracy: 66.666667% Loss: 1.766169
View1 - Acuracy: 52.763819% Loss: 3.020101
View2 - Acuracy: 74.874372% Loss: 1.814070
View3 - Acuracy: 74.874372% Loss: 1.864322
View1 - Acuracy: 52.525253% Loss: 2.818182
View2 - Acuracy: 70.707071% Loss: 1.949495
View3 - Acuracy: 68.686869% Loss: 2.651515
View1 - Acuracy: 47.979798% Loss: 3.015152
View2 - Acuracy: 71.717172% Loss: 1.575758
View3 - Acuracy: 70.202020% Loss: 2.065657
View1 - Acuracy: 51.515152% Loss: 2.838384
View2 - Acuracy: 76.767677% Loss: 1.646465
View3 - Acu

View1 - Acuracy: 54.411765% Loss: 2.063725
View2 - Acuracy: 73.529412% Loss: 1.671569
View3 - Acuracy: 64.705882% Loss: 1.857843
View1 - Acuracy: 51.724138% Loss: 2.581281
View2 - Acuracy: 69.458128% Loss: 2.004926
View3 - Acuracy: 73.399015% Loss: 1.793103
View1 - Acuracy: 54.679803% Loss: 2.669951
View2 - Acuracy: 74.876847% Loss: 1.807882
View3 - Acuracy: 73.399015% Loss: 1.674877
View1 - Acuracy: 48.258706% Loss: 2.970149
View2 - Acuracy: 67.164179% Loss: 1.990050
View3 - Acuracy: 70.646766% Loss: 1.910448
View1 - Acuracy: 53.266332% Loss: 2.874372
View2 - Acuracy: 72.864322% Loss: 1.472362
View3 - Acuracy: 69.849246% Loss: 2.216080
View1 - Acuracy: 47.979798% Loss: 3.575758
View2 - Acuracy: 75.252525% Loss: 1.601010
View3 - Acuracy: 68.686869% Loss: 2.323232
View1 - Acuracy: 51.515152% Loss: 2.934343
View2 - Acuracy: 78.787879% Loss: 1.186869
View3 - Acuracy: 69.696970% Loss: 1.878788
View1 - Acuracy: 54.545455% Loss: 2.515152
View2 - Acuracy: 74.242424% Loss: 1.909091
View3 - Acu

View1 - Acuracy: 55.882353% Loss: 2.357843
View2 - Acuracy: 73.529412% Loss: 1.759804
View3 - Acuracy: 70.098039% Loss: 2.289216
View1 - Acuracy: 48.768473% Loss: 3.103448
View2 - Acuracy: 78.325123% Loss: 1.566502
View3 - Acuracy: 71.428571% Loss: 2.088670
View1 - Acuracy: 51.724138% Loss: 3.034483
View2 - Acuracy: 69.950739% Loss: 1.955665
View3 - Acuracy: 69.458128% Loss: 1.625616
View1 - Acuracy: 51.741294% Loss: 2.955224
View2 - Acuracy: 72.636816% Loss: 1.731343
View3 - Acuracy: 70.149254% Loss: 1.651741
View1 - Acuracy: 47.738693% Loss: 3.010050
View2 - Acuracy: 67.336683% Loss: 2.105528
View3 - Acuracy: 67.336683% Loss: 1.959799
View1 - Acuracy: 53.030303% Loss: 2.585859
View2 - Acuracy: 70.707071% Loss: 1.631313
View3 - Acuracy: 69.696970% Loss: 1.878788
View1 - Acuracy: 54.040404% Loss: 3.131313
View2 - Acuracy: 74.747475% Loss: 1.424242
View3 - Acuracy: 73.232323% Loss: 1.924242
View1 - Acuracy: 58.585859% Loss: 2.479798
View2 - Acuracy: 74.747475% Loss: 1.974747
View3 - Acu

View1 - Acuracy: 49.509804% Loss: 3.088235
View2 - Acuracy: 75.980392% Loss: 1.936275
View3 - Acuracy: 73.529412% Loss: 2.171569
View1 - Acuracy: 48.275862% Loss: 3.147783
View2 - Acuracy: 74.384236% Loss: 1.807882
View3 - Acuracy: 69.458128% Loss: 1.906404
View1 - Acuracy: 52.216749% Loss: 2.669951
View2 - Acuracy: 69.458128% Loss: 1.911330
View3 - Acuracy: 69.950739% Loss: 2.241379
View1 - Acuracy: 50.248756% Loss: 2.940299
View2 - Acuracy: 74.129353% Loss: 1.452736
View3 - Acuracy: 65.174129% Loss: 2.119403
View1 - Acuracy: 53.768844% Loss: 2.899497
View2 - Acuracy: 75.376884% Loss: 1.582915
View3 - Acuracy: 69.346734% Loss: 1.788945
View1 - Acuracy: 50.505051% Loss: 2.525253
View2 - Acuracy: 72.727273% Loss: 1.828283
View3 - Acuracy: 70.202020% Loss: 1.914141
View1 - Acuracy: 57.070707% Loss: 2.661616
View2 - Acuracy: 77.272727% Loss: 1.176768
View3 - Acuracy: 75.252525% Loss: 1.484848
View1 - Acuracy: 51.010101% Loss: 3.146465
View2 - Acuracy: 71.212121% Loss: 1.818182
View3 - Acu