Z-normalization vs Distance Metric Learning
======

In [None]:
import csv
import numpy as np
import scipy.linalg as la
import matplotlib.pyplot as plt
import pandas as pd

from svecon.KNNClassifierPerClass import KNNClassifierPerClass

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA

import plotly.offline as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs
# download_plotlyjs('https://cdn.plot.ly/plotly-latest.min.js')
py.init_notebook_mode()

defaultScatterMarker=dict(
    size=10,
    colorscale='Viridis',
    opacity=0.5
)

# np.set_printoptions(precision=7, suppress=True, threshold=np.nan)
np.set_printoptions(threshold=np.nan)
np.set_printoptions(formatter={'float': lambda x: "{0:0.10f}".format(x)})

from metric_learn import LMNN
from metric_learn import NCA, LFDA, MetricEvolution, NeuralNetworkTransformer

In [None]:
def evaluateKnn(X_train, y_train, X_test, y_test, k=1):
    knn = KNeighborsClassifier(k)
    knn.fit(X_train, y_train)
    
    predicted = knn.predict(X_test)
    wrongVec = predicted!=y_test
    N = len(y_test)
    print('{}% success ({}/{})'.format((1-sum(wrongVec)/N)*100, N-sum(wrongVec), N) )
    
    print(confusion_matrix(y_test, predicted, labels=range(len(set(y_test)))))
    
    return wrongVec

In [None]:
def evaluateKnnNormPerClass(X_train, y_train, X_test, y_test, k=1):
    knn = KNNClassifierPerClass(k)
    knn.fit(X_train, y_train)
    
    predicted = knn.predict(X_test)
    wrongVec = predicted!=y_test
    N = len(y_test)
    print('{}% success ({}/{})'.format((1-sum(wrongVec)/N)*100, N-sum(wrongVec), N) )
    
    print(confusion_matrix(y_test, predicted, labels=range(len(set(y_test)))))
    
    return wrongVec

In [None]:
def plotScatter(X, y, wrong=None):
    if wrong is None: wrong=np.array([0]*len(y))
    
    if X.shape[1]==2:
        X_train_pca = X.T
    else:
        pca = PCA(n_components=2)
        pca.fit(X)
        X_train_pca = pca.transform(X).T
        print('USING PCA')
#     print(pca.explained_variance_ratio_)
    
    assert(X_train_pca.shape[0]==2)
    trace1 = go.Scatter(x=X_train_pca[0], y=X_train_pca[1], #z=X_train_pca[2],
        text=y+1, mode='markers', marker={**defaultScatterMarker, 'color':y, 'size':wrong*10+10}
    )

    layout = go.Layout(
         margin=dict(l=0, r=0, b=0, t=0),
    )
    
    fig = go.Figure(data=[trace1], layout={})
    py.iplot(fig)

def plotScatterTT(X, y, X_test, y_test, wrong=None):
    trainLen = len(y)
    
    if wrong is None: wrong=np.zeros(trainLen)
    
    wrong = np.concatenate((np.zeros(trainLen), wrong))
    X = np.vstack((X, X_test))
    y = np.concatenate((y, y_test))
    
#     y[:trainLen] = y[:trainLen]+len(set(y))
    
    if X.shape[1]==2:
        X_train_pca = X.T
    else:
        pca = PCA(n_components=2)
        pca.fit(X)
        X_train_pca = pca.transform(X).T
        print('USING PCA')
#     print(pca.explained_variance_ratio_)

    assert(X_train_pca.shape[0]==2)
    trace1 = go.Scatter(x=X_train_pca[0], y=X_train_pca[1], #z=X_train_pca[2],
        text=y+1, mode='markers', marker={**defaultScatterMarker, 'color':y, 'size':wrong*10+10}
    )

    minl = np.amin(X_train_pca)
    maxl = np.amax(X_train_pca)
    minl -= (maxl-minl)/25
    maxl += (maxl-minl)/25
    
    layout = go.Layout(
        margin=dict(l=0, r=0, b=0, t=0),
#         xaxis=dict(
#             range=[minl, maxl]
#         ),
#         yaxis=dict(
#             range=[minl, maxl]
#         ),
#         width=800,height=800,
    )
    
    fig = go.Figure(data=[trace1], layout=layout)
    py.iplot(fig)

    
def plotScatter3d(X, y, wrong=None):
    if wrong is None: wrong=np.array([0]*len(y))
    
    if(X.shape[1] <= 3):
        X_train_pca = X.T
    else:
        pca = PCA(n_components=3)
        pca.fit(X)

        print(pca.explained_variance_ratio_)

        X_train_pca = pca.transform(X).T
        print(X_train_pca.shape)
    
    trace1 = go.Scatter3d(x=X_train_pca[0], y=X_train_pca[1], z=X_train_pca[2],
        text=y+1, mode='markers', marker={**defaultScatterMarker, 'color':y, 'size':wrong*10+10}
    )

    layout = go.Layout(
         margin=dict(l=0, r=0, b=0, t=0),
    )
    
    fig = go.Figure(data=[trace1], layout={})
    py.iplot(fig)
    
def plotScatter3dTT(X, y, X_test, y_test, wrong=None):
    trainLen = len(y)
    
    if wrong is None: wrong=np.zeros(trainLen)
    
    wrong = np.concatenate((np.zeros(trainLen), wrong))
    X = np.vstack((X, X_test))
    y = np.concatenate((y, y_test))
    
    y[:trainLen] = y[:trainLen]+len(set(y))
    
    if(X.shape[1] <= 3):
        X_train_pca = X.T
    else:
        pca = PCA(n_components=3)
        pca.fit(X)

        print(pca.explained_variance_ratio_)

        X_train_pca = pca.transform(X).T
        print(X_train_pca.shape)
    
    trace1 = go.Scatter3d(x=X_train_pca[0], y=X_train_pca[1], z=X_train_pca[2],
        text=y+1, mode='markers', marker={**defaultScatterMarker, 'color':y, 'size':wrong*10+10}
    )

    layout = go.Layout(
         margin=dict(l=0, r=0, b=0, t=0),
    )
    
    fig = go.Figure(data=[trace1], layout={})
    py.iplot(fig)

In [None]:
from sklearn.datasets import load_iris

iris_data = load_iris()

y_iris = iris_data['target']
X_iris = pd.DataFrame(data=iris_data['data'], index=range(len(y_iris)), columns=['F1', 'F2', 'F3', 'F4'])

X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, train_size=0.5, stratify=y_iris)

X_iris.describe()

Original data
======

In [None]:
evaluateKnn(X_iris_train, y_iris_train, X_iris_test, y_iris_test, k=5)
evaluateKnnNormPerClass(X_iris_train.values, y_iris_train, X_iris_test.values, y_iris_test, k=5)
plotScatter(X_iris_train, y_iris_train)

Normalized data
======

In [None]:
X_iris_train_norm = (X_iris_train - X_iris_train.mean()) / X_iris_train.std()
X_iris_test_norm = (X_iris_test - X_iris_train.mean()) / X_iris_train.std()

X_iris_train_norm.describe()

In [None]:
evaluateKnn(X_iris_train_norm, y_iris_train, X_iris_test_norm, y_iris_test, k=7)
evaluateKnnNormPerClass(X_iris_train.values, y_iris_train, X_iris_test.values, y_iris_test, k=7)
plotScatter(X_iris_train_norm, y_iris_train)

LMNN
======

In [None]:
lmnn = LMNN(k=10, learn_rate=1e-5, max_iter=500, verbose=False)
lmnn.fit(X_iris_train.values, y_iris_train)
print("matrix L:", lmnn.L)
print("metric:", lmnn.metric())

In [None]:
evaluateKnn(lmnn.transform(X_iris_train), y_iris_train, lmnn.transform(X_iris_test), y_iris_test, k=7)
plotScatter(lmnn.transform(X_iris_train), y_iris_train)

Normalized data + LMNN
======

In [None]:
lmnn_norm = LMNN(k=7, learn_rate=1e-5, max_iter=1000, verbose=False)
lmnn_norm.fit(X_iris_train_norm.values, y_iris_train)
print(lmnn_norm.L)

In [None]:
evaluateKnn(lmnn_norm.transform(X_iris_train_norm), y_iris_train, lmnn_norm.transform(X_iris_test_norm), y_iris_test, k=30)
plotScatter(lmnn_norm.transform(X_iris_train_norm), y_iris_train)

NCA
=====

In [None]:
nca = NCA()
nca.fit(X_iris_train.values, y_iris_train)

In [None]:
evaluateKnn(nca.transform(X_iris_train), y_iris_train, nca.transform(X_iris_test), y_iris_test, k=7)
plotScatter(nca.transform(X_iris_train), y_iris_train)

Normalized data + NCA
=====

In [None]:
nca_norm = NCA()
nca_norm.fit(X_iris_train_norm.values, y_iris_train)

In [None]:
evaluateKnn(nca_norm.transform(X_iris_train_norm), y_iris_train, nca_norm.transform(X_iris_test_norm), y_iris_test, k=15)
plotScatter(nca_norm.transform(X_iris_train_norm), y_iris_train)

Multivariate normal distributions
====

In [None]:
c = 6
N = 100
X_mn =                   np.random.multivariate_normal([0, 15, 0], [[10, 0, 0], [0, 10, 0], [0, 0, 10]], N)
X_mn = np.vstack( [X_mn, np.random.multivariate_normal([0, -15, 0], [[10, 0, 0], [0, 10, 0], [0, 0, 10]], N)] )
X_mn = np.vstack( [X_mn, np.random.multivariate_normal([15, 0, 0], [[10, 0, 0], [0, 10, 0], [0, 0, 10]], N)] )
X_mn = np.vstack( [X_mn, np.random.multivariate_normal([-15, 0, 0], [[10, 0, 0], [0, 10, 0], [0, 0, 10]], N)] )
X_mn = np.vstack( [X_mn, np.random.multivariate_normal([0, 0, 15], [[10, 0, 0], [0, 10, 0], [0, 0, 10]], N)] )
X_mn = np.vstack( [X_mn, np.random.multivariate_normal([0, 0, -15], [[10, 0, 0], [0, 10, 0], [0, 0, 10]], N)] )

y_mn = np.ndarray.flatten(np.array([[x]*N for x in range(c)]))

X_mn_train, X_mn_test, y_mn_train, y_mn_test = train_test_split(X_mn, y_mn, train_size=0.7)
print(X_mn_train.shape, y_mn_train.shape, X_mn_test.shape, y_mn_test.shape)

In [None]:
evaluateKnn(X_mn_train, y_mn_train, X_mn_test, y_mn_test, k=5)
plotScatter(X_mn, y_mn)
plotScatterTT(X_mn_train, y_mn_train, X_mn_test, y_mn_test, wrong)

In [None]:
pca = PCA(n_components=2)
pca.fit(X_mn_train, y_mn_train)
wrong = evaluateKnn(pca.transform(X_mn_train), y_mn_train, pca.transform(X_mn_test), y_mn_test, k=1)
plotScatterTT(pca.transform(X_mn_train), y_mn_train, pca.transform(X_mn_test), y_mn_test, wrong)

In [None]:
nca = MetricEvolution(n_gens=500, fitnesses=[('knn', {'n_neighbors':3})], t__n_components=2)
nca.fit(X_mn_train, y_mn_train)
wrong = evaluateKnn(nca.transform(X_mn_train), y_mn_train, nca.transform(X_mn_test), y_mn_test, k=1)
plotScatterTT(nca.transform(X_mn_train), y_mn_train, nca.transform(X_mn_test), y_mn_test, wrong)

In [None]:
nca = MetricEvolution(n_gens=500, fitnesses=[('knn', {'n_neighbors':3})], t__n_components=3)
nca.fit(X_mn_train, y_mn_train)
wrong = evaluateKnn(nca.transform(X_mn_train), y_mn_train, nca.transform(X_mn_test), y_mn_test, k=1)
plotScatterTT(nca.transform(X_mn_train), y_mn_train, nca.transform(X_mn_test), y_mn_test, wrong)

Normalized data
=====

In [None]:
X_mn_train_norm = (X_mn_train - X_mn_train.mean(axis=0)) / X_mn_train.std(axis=0)
X_mn_test_norm = (X_mn_test - X_mn_train.mean(axis=0)) / X_mn_train.std(axis=0)

In [None]:
evaluateKnn(X_mn_train_norm, y_mn_train, X_mn_test_norm, y_mn_test, k=10)
plotScatter(X_mn_train_norm, y_mn_train)

In [None]:
pca = PCA(n_components=2)
pca.fit(X_mn_train_norm, y_mn_train)
wrong = evaluateKnn(pca.transform(X_mn_train_norm), y_mn_train, pca.transform(X_mn_test_norm), y_mn_test, k=1)
plotScatterTT(pca.transform(X_mn_train_norm), y_mn_train, pca.transform(X_mn_test_norm), y_mn_test, wrong)

LMNN
=====

In [None]:
lmnn = LMNN(k=5, learn_rate=1e-7, max_iter=500, verbose=False, )
lmnn.fit(X_mn_train, y_mn_train)
print(lmnn.L)

In [None]:
wrong = evaluateKnn(lmnn.transform(X_mn_train), y_mn_train, lmnn.transform(X_mn_test), y_mn_test, k=2)
plotScatter(lmnn.transform(X_mn_train), y_mn_train)
plotScatterTT(lmnn.transform(X_mn_train), y_mn_train, lmnn.transform(X_mn_test), y_mn_test, wrong)

Normalized + LMNN
=====

In [None]:
lmnn_norm = LMNN(k=7, learn_rate=1e-5, max_iter=1000, verbose=False)
lmnn_norm.fit(X_mn_train_norm, y_mn_train)
print(lmnn.L)

In [None]:
evaluateKnn(lmnn_norm.transform(X_mn_train_norm), y_mn_train, lmnn_norm.transform(X_mn_test_norm), y_mn_test, k=30)
plotScatter(lmnn_norm.transform(X_mn_train_norm), y_mn_train)

NCA
=====

In [None]:
nca = NCA(max_iter=100)
nca.fit(X_mn_train, y_mn_train)

In [None]:
evaluateKnn(nca.transform(X_mn_train), y_mn_train, nca.transform(X_mn_test), y_mn_test, k=7)
plotScatter(nca.transform(X_mn_train), y_mn_train)

Normalized + NCA
=====

In [None]:
nca_norm = NCA()
nca_norm.fit(X_mn_train_norm, y_mn_train)

In [None]:
evaluateKnn(nca_norm.transform(X_mn_train_norm), y_mn_train, nca_norm.transform(X_mn_test_norm), y_mn_test, k=15)
plotScatter(nca_norm.transform(X_mn_train_norm), y_mn_train)

Unscaled 2D data
=======

In [None]:
Ns = [50, 500]
X_ns =                   np.random.multivariate_normal([0, 0], [[2, 0], [0, 100]], Ns[0])
X_ns = np.vstack( [X_ns, np.random.multivariate_normal([5, -15], [[2, 0], [0, 100]], Ns[1])] )
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0, 0, -110], [[10, 0, 0], [0, 1000, 0], [0, 0, 10]], N)] )
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0, 0, -100], [[10, 0, 0], [0, 1000, 0], [0, 0, 10]], N)] )
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0, 0, 15], [[10, 0, 0], [0, 10, 0], [0, 0, 10]], N)] )
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0, 0, -15], [[10, 0, 0], [0, 10, 0], [0, 0, 1000]], N)] )

y_ns = np.array([])
for i,N in enumerate(Ns):
    y_ns = np.concatenate( (y_ns, [i]*N) )

X_ns_train, X_ns_test, y_ns_train, y_ns_test = train_test_split(X_ns, y_ns, train_size=0.7)
print(X_ns_train.shape, y_ns_train.shape, X_ns_test.shape, y_ns_test.shape)

In [None]:
wrong = evaluateKnn(X_ns_train, y_ns_train, X_ns_test, y_ns_test, k=1)
plotScatterTT(X_ns_train, y_ns_train, X_ns_test, y_ns_test, wrong)

Normalized data
=====

In [None]:
X_ns_train_norm = (X_ns_train - X_ns_train.mean(axis=0)) / X_ns_train.std(axis=0)
X_ns_test_norm = (X_ns_test - X_ns_train.mean(axis=0)) / X_ns_train.std(axis=0)

In [None]:
wrong = evaluateKnn(X_ns_train_norm, y_ns_train, X_ns_test_norm, y_ns_test, k=10)
plotScatterTT(X_ns_train_norm, y_ns_train, X_ns_test_norm, y_ns_test, wrong)

Unscaled data v2
=========

In [None]:
# Ns = [100,100]
# X_ns =                   np.random.multivariate_normal([0,0], [[100000000,0], [0,100]], Ns[0])
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0,75], [[100000000,0], [0,100]], Ns[1])] )

# Ns = [100, 100, 100, 100, 100]
# X_ns =                   np.random.multivariate_normal([0,0,0], [[100000,0,0], [0,100,0], [0,0,2]], Ns[0])
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0,10,0], [[100000,0,0], [0,100,0], [0,0,2]], Ns[1])] )
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0,-10,0], [[100000,0,0], [0,100,0], [0,0,2]], Ns[2])] )
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0,0,10], [[100000,0,0], [0,100, 0], [0,0,2]], Ns[3])] )
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0,0,-10], [[100000,0,0], [0,100,0], [0,0,2]], Ns[4])] )

Ns = [100, 100, 100, 100]#, 100, 100]
X_ns =                   np.random.multivariate_normal([10,0,0,2], [[100000000,0,0,0], [0,100,0,0], [0,0,2,0], [0,0,0,1]], Ns[0])
X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0,10,0,-2], [[100000000,0,0,0], [0,100,0,0], [0,0,2,0], [0,0,0,1]], Ns[1])] )
# X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0,-10,0,0], [[100000000,0,0,0], [0,100,0,0], [0,0,2,0], [0,0,0,1]], Ns[2])] )
X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0,0,10,-2], [[100000000,0,0,0], [0,100, 0,0], [0,0,2,0], [0,0,0,1]], Ns[1])] )
X_ns = np.vstack( [X_ns, np.random.multivariate_normal([0,0,-10,2], [[100000000,0,0,0], [0,100,0,0], [0,0,2,0], [0,0,0,1]], Ns[2])] )

y_ns = np.array([])
for i,N in enumerate(Ns):
    y_ns = np.concatenate( (y_ns, [i]*N) )

X_ns_train, X_ns_test, y_ns_train, y_ns_test = train_test_split(X_ns, y_ns, train_size=0.7, stratify=y_ns)
print(X_ns_train.shape, y_ns_train.shape, X_ns_test.shape, y_ns_test.shape)

In [None]:
wrong = evaluateKnn(X_ns_train, y_ns_train, X_ns_test, y_ns_test, k=1)
plotScatterTT(X_ns_train, y_ns_train, X_ns_test, y_ns_test, wrong)

In [None]:
X_ns_train_norm = (X_ns_train - X_ns_train.mean(axis=0)) / X_ns_train.std(axis=0)
X_ns_test_norm = (X_ns_test - X_ns_train.mean(axis=0)) / X_ns_train.std(axis=0)

In [None]:
wrong1 = evaluateKnn(X_ns_train_norm, y_ns_train, X_ns_test_norm, y_ns_test, k=1)
wrong2 = evaluateKnnNormPerClass(X_ns_train, y_ns_train, X_ns_test, y_ns_test, k=1)
plotScatterTT(X_ns_train_norm, y_ns_train, X_ns_test_norm, y_ns_test, wrong1)

In [None]:
nca = LFDA(k=2, dim=2)
nca.fit(X_ns_train, y_ns_train)
evaluateKnn(nca.transform(X_ns_train), y_ns_train, nca.transform(X_ns_test), y_ns_test, k=7)
plotScatter(nca.transform(X_ns_train), y_ns_train)

In [None]:
nca = MetricEvolution(n_gens=100, fitnesses=[('knn', {'n_neighbors':1})])
nca.fit(X_ns_train, y_ns_train)
print(nca.transformer().L)
nca
evaluateKnn(nca.transform(X_ns_train), y_ns_train, nca.transform(X_ns_test), y_ns_test, k=7)
plotScatter(nca.transform(X_ns_train), y_ns_train)

In [None]:
nca = MetricEvolution(n_gens=100, fitnesses=[('knn', {'n_neighbors':7})], transformer=NeuralNetworkTransformer(layers=(4,3,2), activation=None))
nca.fit(X_ns_train, y_ns_train)

In [None]:
knn = KNeighborsClassifier(n_neighbors=2)
knn.fit(nca.transform(X_ns_train), y_ns_train)
wrong = knn.predict(nca.transform(X_ns_test)) != y_ns_test
wrong
X_ns_test[0], nca.transform(X_ns_test[0])

In [None]:
wrong = evaluateKnn(nca.transform(X_ns_train), y_ns_train, nca.transform(X_ns_test), y_ns_test, k=2)
plotScatter(nca.transform(X_ns_train), y_ns_train)
plotScatter(nca.transform(X_ns_test), y_ns_test, wrong)
plotScatterTT(nca.transform(X_ns_train), y_ns_train, nca.transform(X_ns_test), y_ns_test, wrong)