In [None]:
import array
import random
import numpy as np
import pandas as pd
import glob
import os.path
import sys
from datetime import datetime

from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer, StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, mean_squared_error

from metric_learn import LMNN, NCA, LFDA, CMAES

from deap import algorithms
from deap import base
from deap import benchmarks
from deap import cma
from deap import creator
from deap import tools

from concurrent.futures import ThreadPoolExecutor 

In [None]:
# data = pd.read_csv('datasets/breast-cancer-wisconsin.csv', sep=',', skiprows=1, header=0)
data = pd.read_csv('datasets/ionosphere.csv', sep=',', skiprows=1, header=0)

y = data['class']
X = data.drop(['class'], axis=1).values

# Problem size
N=X.shape[1]

X.shape, y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=47)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
imputer = Imputer(missing_values='NaN', strategy='mean', axis=0, verbose=0, copy=False)
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

In [None]:
knn = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
knn.fit(X_train, y_train)
knn.score(X_test, y_test)

In [None]:
class ESMetric():
    def __init__(self, L, N, isFull=False):
        self.params = {}
        self.N = N
        if N == len(L):
            self.L = np.diag(L)
        elif N**2 == len(L):
            self.L = np.reshape(L, (N, N))
        else:
            raise Error('Invalid size of N')

    def transform(self, X):
        return X.dot(self.transformer().T)
        
    def transformer(self):
        return self.L

In [None]:
from __future__ import division, absolute_import
import numpy as np
import scipy
from six.moves import xrange
from sklearn.metrics import pairwise_distances

# from .base_metric import BaseMetricLearner


# class LFDA(BaseMetricLearner):
class Deap():
    def __init__(self, metric='diagonal'):
        if metric not in ('diagonal', 'full'):
            raise ValueError('Invalid metric: %r' % metric)

        self.params = {
            'metric': metric,
        }

    def transform(self, X):
        return X.dot(self.transformer().T)
        
    def transformer(self):
        return self.L

    def knnEvaluationBuilder(self, X, y, N):
        assert N == X.shape[1]
#         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=47)
        
#         def knnEvaluation(individual):
#             es = ESMetric(individual, N)
            
#             subset = .1
#             train_mask = np.random.choice([True, False], X.shape[0], p=[subset, 1-subset])
#             X_train, X_test, y_train, y_test = train_test_split(X[train_mask], y[train_mask], test_size=0.33)#, random_state=47)
#             X_train_trans = es.transform(X_train)
#             X_test_trans = es.transform(X_test)
#             knn = KNeighborsClassifier(n_neighbors=8, n_jobs=-1)
#             knn.fit(X_train_trans, y_train)
#             score = knn.score(X_test_trans, y_test)

#             return [score]
#             return [score - mean_squared_error(individual, np.ones(N))]
#             return [score - np.sum(np.absolute(individual))]
    
        def knnEvaluation(individual):
            es = ESMetric(individual, N)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)#, random_state=47)
            X_train_trans = es.transform(X_train)
            X_test_trans = es.transform(X_test)
            knn = KNeighborsClassifier(n_neighbors=8, n_jobs=-1, weights='uniform')
            knn.fit(X_train_trans, y_train)
            score = knn.score(X_test_trans, y_test)

            return [score]
            return [score - mean_squared_error(individual, np.ones(N))]
            return [score - np.sum(np.absolute(individual))]
        
        return knnEvaluation

    def fit(self, X, Y):
        '''
         X: (n, d) array-like of samples
         Y: (n,) array-like of class labels
        '''
        self.N = X.shape[1]
        
        creator.create("FitnessMin", base.Fitness, weights=(1.0,))
        creator.create("Individual", list, fitness=creator.FitnessMin)

        toolbox = base.Toolbox()
        toolbox.register("evaluate", self.knnEvaluationBuilder(X,Y,self.N))
        toolbox.register("map", ThreadPoolExecutor(max_workers=None).map)
        
        # The cma module uses the numpy random number generator
        np.random.seed(128)

        # The CMA-ES algorithm takes a population of one individual as argument
        # The centroid is set to a vector of 5.0 see http://www.lri.fr/~hansen/cmaes_inmatlab.html
        # for more details about the rastrigin and other tests for CMA-ES
        
        if self.params['metric'] == 'diagonal':
            sizeOfIndividual = self.N
        else:
            sizeOfIndividual = self.N**2
        
        strategy = cma.Strategy(centroid=[0.0]*sizeOfIndividual, sigma=10.0) # lambda_=20*N
        toolbox.register("generate", strategy.generate, creator.Individual)
        toolbox.register("update", strategy.update)

        self.hof = tools.HallOfFame(1)
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", np.mean)
        stats.register("std", np.std)
        stats.register("min", np.min)
        stats.register("max", np.max)
#         logger = tools.EvolutionLogger(stats.functions.keys())

        # The CMA-ES algorithm converge with good probability with those settings
        pop, logbook = algorithms.eaGenerateUpdate(toolbox, ngen=250, stats=stats, halloffame=self.hof)
        
        if self.params['metric'] == 'diagonal':
            self.L = np.diag(self.hof[0])
        else:
            self.L = np.reshape(self.hof[0], (N, N))
        return self
    
    def fit_transform(self, X, Y):
        self.fit(X,Y)
        return self.transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=47)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
imputer = Imputer(missing_values='NaN', strategy='mean', axis=0, verbose=0, copy=False)
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

In [None]:
evo = CMAES(metric='diagonal', n_gen=25)
X_train_evo = evo.fit_transform(X_train, y_train)
X_test_evo = evo.transform(X_test)

In [None]:
evo.transformer()

In [None]:
knn = KNeighborsClassifier(n_neighbors=4, n_jobs=-1, weights='uniform')
knn.fit(X_train_evo, y_train)
knn.score(X_test_evo, y_test), knn.score(X_train_evo, y_train), knnEvaluation(evo.hof[0])

In [None]:
def knnEvaluation(ind):
    es = ESMetric(ind, N)
    X_train_trans = es.transform(X_train)
    X_test_trans = es.transform(X_test)
    knn = KNeighborsClassifier(n_neighbors=8, n_jobs=-1)
    knn.fit(X_train_trans, y_train)
    score = knn.score(X_test_trans, y_test)

    return [score]
    return [score - mean_squared_error(ind, np.ones(N))]
    return [score - np.sum(np.absolute(ind))]

In [None]:
creator.create("FitnessMin", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("evaluate", knnEvaluation)
toolbox.register("map", ThreadPoolExecutor(max_workers=None).map)

In [None]:
# The cma module uses the numpy random number generator
np.random.seed(128)

# The CMA-ES algorithm takes a population of one individual as argument
# The centroid is set to a vector of 5.0 see http://www.lri.fr/~hansen/cmaes_inmatlab.html
# for more details about the rastrigin and other tests for CMA-ES
strategy = cma.Strategy(centroid=[1.0]*N, sigma=5.0) # lambda_=20*N
toolbox.register("generate", strategy.generate, creator.Individual)
toolbox.register("update", strategy.update)

hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
# stats.register("std", numpy.std)
# stats.register("min", numpy.min)
stats.register("max", np.max)
#logger = tools.EvolutionLogger(stats.functions.keys())

In [None]:
# The CMA-ES algorithm converge with good probability with those settings
pop, logbook = algorithms.eaGenerateUpdate(toolbox, ngen=50, stats=stats, halloffame=hof)

In [None]:
# print "Best individual is %s, %s" % (hof[0], hof[0].fitness.values)
hof[0].fitness.values[0]
hof[0], knnEvaluation(np.abs(hof[0]))
knnEvaluation(hof[0]), hof[0]

In [None]:
pop

In [None]:
logbook