In [63]:
%load_ext autoreload
%autoreload 2

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

from texttable import Texttable
import latextable

# import sys
# sys.path.append('../MMD/')
import ml_lmnn

from sklearn import decomposition

np.random.seed(40)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [89]:
X_iris, y_iris = datasets.load_iris().data, datasets.load_iris().target
X_wine, y_wine = datasets.load_wine().data, datasets.load_wine().target

print(X_iris.shape, X_wine.shape, set(y_iris), set(y_wine))

results = {}

#dimensions to use with rbf-KPCA
rs = [2, 3, 4]

(150, 4) (178, 13) {0, 1, 2} {0, 1, 2}


In [90]:
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.3, random_state=40)
X_wine_train, X_wine_test, y_wine_train, y_wine_test = train_test_split(X_wine, y_wine, test_size=0.3, random_state=40)

NN1_accuracies = (ml_lmnn.score_1NN(X_iris_train, y_iris_train, X_iris_test, y_iris_test),
      ml_lmnn.score_1NN(X_wine_train, y_wine_train, X_wine_test, y_wine_test))

NN1_accuracies_after_lmnn = (
    ml_lmnn.score_1NN_after_lmnn(X_iris_train, y_iris_train, X_iris_test, y_iris_test, k=1),
    ml_lmnn.score_1NN_after_lmnn(X_wine_train, y_wine_train, X_wine_test, y_wine_test, k=1)
    )

dists_original_space = ml_lmnn.mmd_dists(X_iris_train, y_iris_train, X_wine_train, y_wine_train)

dists_after_lmnn = ml_lmnn.mmd_dists_after_lmnn(X_iris_train, y_iris_train, X_wine_train, y_wine_train)

#results on original space
results['os'] = {'1NN_os': NN1_accuracies, '1NN_lmnn': NN1_accuracies_after_lmnn, 
                 'mmd_os': dists_original_space, 'mmd_lmnn': dists_after_lmnn}


#results when using rbf-Kernel with different dimensions
# results['rbf'] = dict()
for r in rs:
    #transform data
    X_iris_rbf = decomposition.KernelPCA(n_components=r, kernel='rbf').fit_transform(X_iris)
    X_wine_rbf = decomposition.KernelPCA(n_components=r, kernel='rbf').fit_transform(X_wine)
    
    X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris_rbf, y_iris, test_size=0.3, random_state=40)
    X_wine_train, X_wine_test, y_wine_train, y_wine_test = train_test_split(X_wine_rbf, y_wine, test_size=0.3, random_state=40)
    
    NN1_accuracies = (ml_lmnn.score_1NN(X_iris_train, y_iris_train, X_iris_test, y_iris_test),
      ml_lmnn.score_1NN(X_wine_train, y_wine_train, X_wine_test, y_wine_test))

    NN1_accuracies_after_lmnn = (
        ml_lmnn.score_1NN_after_lmnn(X_iris_train, y_iris_train, X_iris_test, y_iris_test, k=1),
        ml_lmnn.score_1NN_after_lmnn(X_wine_train, y_wine_train, X_wine_test, y_wine_test, k=1)
        )

    dists_original_space = ml_lmnn.mmd_dists(X_iris_train, y_iris_train, X_wine_train, y_wine_train)
    
    dists_after_lmnn = ml_lmnn.mmd_dists_after_lmnn(X_iris_train, y_iris_train, X_wine_train, y_wine_train)
    
    results[r] =  {
        '1NN_os': NN1_accuracies, '1NN_lmnn': NN1_accuracies_after_lmnn, 
        'mmd_os': dists_original_space, 'mmd_lmnn': dists_after_lmnn}


In [95]:
rows = [
        ['Datasets', '1NN os', '1NN after lmnn'],
        ]

dataset_names = [('Iris', 'Wine')]
for r in rs:
    dataset_names.append((f'Iris rbf-KPCA ({r})', f'Wine rbf-KPCA ({r})'))
    

for counter, i in enumerate(results.keys()):
    row1 = [dataset_names[counter][0], str(round(results[i]['1NN_os'][0], 4)), str(round(results[i]['1NN_lmnn'][0], 4))]
    row2 = [dataset_names[counter][1], str(round(results[i]['1NN_os'][1], 4)), str(round(results[i]['1NN_lmnn'][1], 4))]

    rows.append(row1)
    rows.append(row2)

table = Texttable(max_width=0)
table.set_cols_align(["c"] * len(rows[0]))
table.set_deco(Texttable.HEADER | Texttable.VLINES | Texttable.HLINES)
table.set_cols_dtype(["t"]*len(rows[0]))

table.add_rows(rows=rows)
print(table.draw())

    Datasets      | 1NN os | 1NN after lmnn
      Iris        |  1.0   |     0.4667    
------------------+--------+---------------
      Wine        | 0.6481 |     0.3704    
------------------+--------+---------------
Iris rbf-KPCA (2) | 0.8889 |     0.5556    
------------------+--------+---------------
Wine rbf-KPCA (2) | 0.3704 |     0.3704    
------------------+--------+---------------
Iris rbf-KPCA (3) | 0.9111 |     0.3333    
------------------+--------+---------------
Wine rbf-KPCA (3) | 0.3704 |     0.3333    
------------------+--------+---------------
Iris rbf-KPCA (4) | 0.9778 |     0.6889    
------------------+--------+---------------
Wine rbf-KPCA (4) | 0.3704 |     0.3704    


In [97]:
print(latextable.draw_latex(table, caption="1NN accuracy comparison between datasets and their KPCA transformation before and after LMNN projection."))

\begin{table}
	\begin{center}
		\begin{tabular}{c|c|c}
			Datasets & 1NN os & 1NN after lmnn \\
			\hline
			Iris & 1.0 & 0.4667 \\
			\hline
			Wine & 0.6481 & 0.3704 \\
			\hline
			Iris rbf-KPCA (2) & 0.8889 & 0.5556 \\
			\hline
			Wine rbf-KPCA (2) & 0.3704 & 0.3704 \\
			\hline
			Iris rbf-KPCA (3) & 0.9111 & 0.3333 \\
			\hline
			Wine rbf-KPCA (3) & 0.3704 & 0.3333 \\
			\hline
			Iris rbf-KPCA (4) & 0.9778 & 0.6889 \\
			\hline
			Wine rbf-KPCA (4) & 0.3704 & 0.3704 \\
		\end{tabular}
	\end{center}
	\caption{1NN accuracy comparison between datasets and their KPCA transformation before and after LMNN projection.}
\end{table}


In [98]:
rows = [
        ['Datasets', '0-1 os', '0-2 os', '1-2 os', '0-1 lmnn', '0-2 lmnn', '1-2 lmnn'],
        ]

dataset_names = [('Iris', 'Wine')]
for r in rs:
    dataset_names.append((f'Iris rbf-KPCA ({r})', f'Wine rbf-KPCA ({r})'))
    

for counter, i in enumerate(results.keys()):
    row1 = [dataset_names[counter][0],
            results[i]['mmd_os']['iris'][0][0], 
            results[i]['mmd_os']['iris'][0][1],
            results[i]['mmd_os']['iris'][1][1],
            
            results[i]['mmd_lmnn']['iris'][0][0], 
            results[i]['mmd_lmnn']['iris'][0][1],
            results[i]['mmd_lmnn']['iris'][1][1],
        ]
    
    row2 = [dataset_names[counter][1],
            results[i]['mmd_os']['wine'][0][0], 
            results[i]['mmd_os']['wine'][0][1],
            results[i]['mmd_os']['wine'][1][1],
            
            results[i]['mmd_lmnn']['wine'][0][0], 
            results[i]['mmd_lmnn']['wine'][0][1],
            results[i]['mmd_lmnn']['wine'][1][1],
        ]

    rows.append(row1)
    rows.append(row2)

table = Texttable(max_width=0)
table.set_cols_align(["c"] * len(rows[0]))
table.set_deco(Texttable.HEADER | Texttable.VLINES | Texttable.HLINES)
table.set_cols_dtype(["t"]*len(rows[0]))

table.add_rows(rows=rows)
print(table.draw())

    Datasets      |        0-1 os         |        0-2 os        |        1-2 os        |       0-1 lmnn       |       0-2 lmnn       |       1-2 lmnn       
      Iris        |  10.489003348071265   |  22.253143598615935  |  2.333943783068804   |  46.991613121901935  |  95.88935532823876   |   9.027050976659723  
------------------+-----------------------+----------------------+----------------------+----------------------+----------------------+----------------------
      Wine        |  361161.67985665216   |  213240.27073315647  |  19412.325194987352  |  184.36903704034614  |  83.97250098322752   |  26.292380984508327  
------------------+-----------------------+----------------------+----------------------+----------------------+----------------------+----------------------
Iris rbf-KPCA (2) |  1.4540678188333527   |  1.5359100554085456  | 0.42513871539000014  |  83.36610878864359   |  83.34799940703058   |   8.866836843941945  
------------------+-----------------------+---------

In [100]:
print(latextable.draw_latex(table, caption="MMD distance comparison between different labels (0-1) of datasets in original space (os) and after LMNN projection of train-datapoints only"))

\begin{table}
	\begin{center}
		\begin{tabular}{c|c|c|c|c|c|c}
			Datasets & 0-1 os & 0-2 os & 1-2 os & 0-1 lmnn & 0-2 lmnn & 1-2 lmnn \\
			\hline
			Iris & 10.489003348071265 & 22.253143598615935 & 2.333943783068804 & 46.991613121901935 & 95.88935532823876 & 9.027050976659723 \\
			\hline
			Wine & 361161.67985665216 & 213240.27073315647 & 19412.325194987352 & 184.36903704034614 & 83.97250098322752 & 26.292380984508327 \\
			\hline
			Iris rbf-KPCA (2) & 1.4540678188333527 & 1.5359100554085456 & 0.42513871539000014 & 83.36610878864359 & 83.34799940703058 & 8.866836843941945 \\
			\hline
			Wine rbf-KPCA (2) & 0.0014203300885756503 & 0.004227686720609728 & 0.004800002196953085 & 1.4257339729074243 & 4.336984612555827 & 1.1458078063897261 \\
			\hline
			Iris rbf-KPCA (3) & 1.4586554858084195 & 1.5423288502572015 & 0.42529210517914984 & 71.27702220769993 & 7.229123349796222 & 46.00290846756947 \\
			\hline
			Wine rbf-KPCA (3) & 0.0027805382079268966 & 0.00530392819481051 & 0.004816606

In [101]:
results

{'os': {'1NN_os': (1.0, 0.6481481481481481),
  '1NN_lmnn': (0.4666666666666667, 0.37037037037037035),
  'mmd_os': {'iris': {0: [10.489003348071265, 22.253143598615935],
    1: [10.489003348071265, 2.333943783068804],
    2: [22.25314359861592, 2.333943783068804]},
   'wine': {0: [361161.67985665216, 213240.27073315647],
    1: [361161.67985665216, 19412.325194987352],
    2: [213240.27073315647, 19412.325194987236]}},
  'mmd_lmnn': {'iris': {0: [46.991613121901935, 95.88935532823876],
    1: [46.99161312190199, 9.027050976659723],
    2: [95.88935532823871, 9.02705097665978]},
   'wine': {0: [184.36903704034614, 83.97250098322752],
    1: [184.36903704034614, 26.292380984508327],
    2: [83.97250098322752, 26.292380984508213]}}},
 2: {'1NN_os': (0.8888888888888888, 0.37037037037037035),
  '1NN_lmnn': (0.5555555555555556, 0.37037037037037035),
  'mmd_os': {'iris': {0: [1.4540678188333527, 1.5359100554085456],
    1: [1.4540678188333527, 0.42513871539000014],
    2: [1.5359100554085456, 