In [20]:
import math
import operator
import collections 
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, mean_squared_error

In [21]:
def find_distance(A, B):
    return np.linalg.norm(A - B)

In [22]:
def get_neighbors(train_data, test_data, k):
    list_dst = []
    for x in range(len(train_data)):
        dst = find_distance(test_data, train_data[x])
        list_dst.append((train_data[x], dst))
    list_dst.sort(key=operator.itemgetter(1))

    neighbors = []
    for x in range(k):
        neighbors.append(list_dst[x][0])

    return np.array(neighbors)


In [23]:
def get_response(neighbors):
    classVotes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1
        sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)

    return sortedVotes[0][0]


In [24]:
def KNN(X_train, Y_train, X_test, Y_test, name):
    out_predic = []
    k = 3
    idx = 0
    for x in range(len(X_test)):
        neighbors = get_neighbors(X_train, X_test[x], k)

        prediction = get_response(neighbors)
        out_predic.append(prediction)

        per = float((idx * 100) / len(X_test))
        #print ("Class: {} actual: {} --->{:.0f}%".format(repr(prediction), repr(Y_test[x]), per))
        idx += 1

    pred_Y = np.array(out_predic)
    accuracy = accuracy_score(Y_test, pred_Y)
    mse = mean_squared_error(Y_test, pred_Y)

    print("{} - Acuracy: {:.6f}% Loss: {:.6f}".format(name, accuracy*100, mse))
    return accuracy


In [25]:
scaler = MinMaxScaler(feature_range=(0, 1))

X_train1 = np.loadtxt("read_data/mfeat-fou")[:,:]
X_train2 = np.loadtxt("read_data/mfeat-fac")[:,:]
X_train3 = np.loadtxt("read_data/mfeat-kar")[:,:]

X_train1 = scaler.fit_transform(X_train1)
X_train2 = scaler.fit_transform(X_train2)
X_train3 = scaler.fit_transform(X_train3)
Y_train = np.array(np.loadtxt("aux_data/u_crisp.txt"), dtype=np.int16)[:]

In [26]:
X_train1 = np.array([np.hstack((X_train1[i], Y_train[i])) for i in range(Y_train.shape[0])])
X_train2 = np.array([np.hstack((X_train2[i], Y_train[i])) for i in range(Y_train.shape[0])])
X_train3 = np.array([np.hstack((X_train3[i], Y_train[i])) for i in range(Y_train.shape[0])])

In [27]:
class_dict = collections.Counter(Y_train)
classes = sorted(class_dict.keys())
print(class_dict)
print(classes)

Counter({1: 605, 4: 493, 2: 371, 3: 283, 6: 134, 0: 114})
[0, 1, 2, 3, 4, 6]


In [None]:
# KNN(X_train, Y_train, X_test, Y_test, "view1")


StratifiedKFold==============================================================

In [None]:
from sklearn.model_selection import StratifiedKFold

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle = True)
skf.get_n_splits(X_train2, Y_train)

10

In [None]:
list_executions=[]
# Execution
for exe_id in range(30):  
    print("Execution: {}".format(exe_id))
    
    posterior_probab=[]
    tracker=[]
    for train_idx, test_idx in skf.split(X_train2, Y_train):

        x_train1, x_test1 = X_train1[train_idx], X_train1[test_idx]
        x_train2, x_test2 = X_train2[train_idx], X_train2[test_idx]
        x_train3, x_test3 = X_train3[train_idx], X_train3[test_idx]
        y_train, y_test = Y_train[train_idx], Y_train[test_idx]

        view1=KNN(x_train1, y_train, x_test1, y_test, "View1")
        view2=KNN(x_train2, y_train, x_test2, y_test, "View2")
        view3=KNN(x_train3, y_train, x_test3, y_test, "View3")

        summ=view1+view2+view3
        posterior_probab.append(summ)
        tracker.append((view1, view2,view3))

    posterior_probab=np.array(posterior_probab)
    best_sum=np.max(posterior_probab)
    v1,v2,v3=tracker[np.argmax(posterior_probab)]
    
    print("Max sum: {:.6f}".format(best_sum))
    print("Best sum view1: {:.6f} view2: {:.6f} view3: {:.6f}".format(v1,v2,v3))
    
    list_executions.append(best_sum)
    
np.savetxt("aux_data/knn_executions.txt", np.array(list_executions))
    

Execution: 0
View1 - Acuracy: 100.000000% Loss: 0.000000
View2 - Acuracy: 97.058824% Loss: 0.029412
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 99.014778% Loss: 0.009852
View2 - Acuracy: 98.522167% Loss: 0.014778
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 99.014778% Loss: 0.009852
View2 - Acuracy: 96.551724% Loss: 0.034483
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 99.502488% Loss: 0.004975
View2 - Acuracy: 99.004975% Loss: 0.024876
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 98.492462% Loss: 0.015075
View2 - Acuracy: 98.492462% Loss: 0.015075
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 98.484848% Loss: 0.015152
View2 - Acuracy: 97.979798% Loss: 0.020202
View3 - Acuracy: 99.494949% Loss: 0.005051
View1 - Acuracy: 98.484848% Loss: 0.015152
View2 - Acuracy: 95.454545% Loss: 0.045455
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 97.474747% Loss: 0.025253
View2 - Acuracy: 97.979798% Loss: 

View3 - Acuracy: 100.000000% Loss: 0.000000
Max sum: 2.984848
Best sum view1: 0.994949 view2: 0.989899 view3: 1.000000
Execution: 6
View1 - Acuracy: 97.549020% Loss: 0.024510
View2 - Acuracy: 96.078431% Loss: 0.039216
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 99.507389% Loss: 0.004926
View2 - Acuracy: 97.044335% Loss: 0.029557
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 99.507389% Loss: 0.004926
View2 - Acuracy: 97.044335% Loss: 0.029557
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 97.014925% Loss: 0.029851
View2 - Acuracy: 98.009950% Loss: 0.019900
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 98.492462% Loss: 0.015075
View2 - Acuracy: 96.984925% Loss: 0.030151
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 98.989899% Loss: 0.010101
View2 - Acuracy: 95.959596% Loss: 0.040404
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 98.989899% Loss: 0.010101
View2 - Acuracy: 96.464646% Loss: 0.035354
Vi

View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 97.979798% Loss: 0.020202
View2 - Acuracy: 95.959596% Loss: 0.040404
View3 - Acuracy: 100.000000% Loss: 0.000000
Max sum: 2.989899
Best sum view1: 1.000000 view2: 0.989899 view3: 1.000000
Execution: 12
View1 - Acuracy: 97.549020% Loss: 0.024510
View2 - Acuracy: 97.549020% Loss: 0.024510
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 97.536946% Loss: 0.024631
View2 - Acuracy: 96.059113% Loss: 0.039409
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 100.000000% Loss: 0.000000
View2 - Acuracy: 96.551724% Loss: 0.034483
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 99.004975% Loss: 0.009950
View2 - Acuracy: 96.019900% Loss: 0.039801
View3 - Acuracy: 100.000000% Loss: 0.000000
View1 - Acuracy: 98.492462% Loss: 0.015075
View2 - Acuracy: 97.487437% Loss: 0.025126
View3 - Acuracy: 99.497487% Loss: 0.005025
View1 - Acuracy: 97.979798% Loss: 0.020202
View2 - Acuracy: 97.474747% Loss: 0.055556
V