In [None]:
%cd ..

In [None]:
from genetals.core import *
from genetals.callbacks import GAStatus, MultiObjectiveReport
from genetals.operators import TwoPointXover, BiasedMutation, ShuffleOperator, NSGAOperator
from genetals.initializers import RandomStdInit
from evgena.datasets import Dataset, images_to_BHWC
from evgena.models import Model, TfModel
from evgena.metrics import SSIM, mse
# from evgena.genetals import Im
from evgena.utils.large_files import maybe_download

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
# why so slow?

class BestImgReport(CallbackBase):
    def __init__(self, ax: plt.Axes = None, best_picker = None):
        super(BestImgReport, self).__init__()
        
        if ax is None:
            self._fig, self._ax = plt.subplots(1, 1)
        else:
            self._fig, self._ax = ax.figure, ax
        
        self._best_picker = (lambda fitness: fitness.argmax()) if (best_picker is None) else best_picker
        
    def __call__(self, ga: GeneticAlgorithm) -> None:
        offspring = ga.capture(-1)
        best_i = self._best_picker(offspring.fitnesses)
        
        self._ax.imshow(offspring.individuals[best_i], cmap='plasma', vmin=-1, vmax=1)

In [None]:
class MultiSigmaRandomInit(InitializerBase):
    def __init__(self, individual_shape, sigmas = (1,), mu: np.ndarray = 0):
        super(MultiSigmaRandomInit, self).__init__()

        self._individual_shape = individual_shape
        self._sigmas = sigmas
        self._mu = mu

    def __call__(self, population_size: int, *args, **kwargs) -> np.ndarray:
        sigmas = np.tile(self._sigmas, (population_size + (len(self._sigmas) - 1)) // len(self._sigmas))[:population_size]
        result = (np.random.random((population_size,) + tuple(self._individual_shape)) * 2) - 1

        return self._mu + result * sigmas[:population_size].reshape(population_size, *([1] * len(self._individual_shape)))

In [None]:
class MultiSigmaRandomNormalInit(InitializerBase):
    def __init__(self, individual_shape, sigmas = (1,), mu: np.ndarray = 0):
        super(MultiSigmaRandomNormalInit, self).__init__()

        self._individual_shape = individual_shape
        self._sigmas = sigmas
        self._mu = mu

    def __call__(self, population_size: int, *args, **kwargs) -> np.ndarray:
        sigmas = np.tile(self._sigmas, (population_size + (len(self._sigmas) - 1)) // len(self._sigmas))[:population_size]
        result = np.random.standard_normal((population_size,) + tuple(self._individual_shape))

        return self._mu + result * sigmas[:population_size].reshape(population_size, *([1] * len(self._individual_shape)))

In [None]:
class PrePopulationInit(InitializerBase):
    def __init__(self, prepopulation):
        super(PrePopulationInit, self).__init__()

        self._prepopulation = prepopulation

    def __call__(self, population_size: int, *args, **kwargs) -> np.ndarray:
        assert population_size == len(self._prepopulation), 'Wrong pop size'  # TODO maybe tile or so
        
        return self._prepopulation

In [None]:
class ImageAugmentation:
    def __init__(self):
        graph = tf.Graph()
        self.session = tf.Session(graph=graph)
        
        with graph.as_default():
            # input placeholders
            self.augmentations = tf.placeholder(tf.float32, [None, None, None, 1], name='augmentations')
            self.base_images = tf.placeholder(tf.float32, [None, None, None, 1], name='base_images')  # TODO link dimensions??
            
            # resize augmentations to match images
            resized_augmentations = tf.image.resize_images(
                self.augmentations, tf.shape(self.base_images)[1:3],
                method=tf.image.ResizeMethod.BILINEAR, align_corners=True
            )
            
            # add together with augmentations reshaped
            self.augmented_images = tf.clip_by_value(
                self.base_images + tf.expand_dims(resized_augmentations, 1), 0.0, 1.1
            )

    def __call__(self, augmentations, base_images):
        augmentations = images_to_BHWC(augmentations)        
        base_images = images_to_BHWC(base_images)
        
        return self.session.run(
            self.augmented_images,
            feed_dict={self.augmentations: augmentations, self.base_images: base_images}
        )
    
augment_images = ImageAugmentation()

In [None]:
class Images2LabelObjectiveFnc(ObjectiveFncBase):
    def __init__(
        self, model: Model, similarity_measure: Callable[[np.ndarray, np.ndarray], np.ndarray],
        target_label: int, source_images: np.ndarray, shuffle: bool = True,
        sample_size: int = 64, sample_ttl: float = 0.9):
        super(Images2LabelObjectiveFnc, self).__init__()
        
        self._metrics = similarity_measure
        self._model = model
        self._target_label = target_label
        self._source_images = source_images
        self._sample_size = sample_size
        self._sample_ttl = sample_ttl
        self._shuffle_source = shuffle
        
        if self._shuffle_source:
            self._source_index = np.random.permutation(len(self._source_images))
        else:
            self._source_index = np.arange(len(self._source_images))
        
        self._samples = np.recarray((self._sample_size,), dtype=[('index', np.int32), ('ttl', np.float32)])
        self._samples.index = np.arange(self._sample_size)
        self._samples.ttl = 1
        
        self._source_i = self._sample_size
      
    def __call__(self, individuals: np.ndarray) -> np.ndarray:
        # fetch samples
        images = self._source_images[self._source_index[self._samples.index]]
        
        # resolve ttl of samples
        self._samples.ttl *= self._sample_ttl
        death_mask = self._samples.ttl < np.random.random(len(self._samples))
        
        u_source_i = self._source_i + np.sum(death_mask)
        if  u_source_i > len(self._source_images):
            u_source_i -= len(self._source_images)
            babies = np.concatenate((np.arange(self._source_i, len(self._source_images)), np.arange(u_source_i)))
            np.random.shuffle(self._source_index)
        else:
            babies = np.arange(self._source_i, u_source_i)
        self._source_i = u_source_i
        
        self._samples.index[death_mask] = babies
        self._samples.ttl[death_mask] = 1
        
        # augment images
        augmented_images = augment_images(individuals, images)
        augmented_images_batch_shaped = augmented_images.reshape(-1, *augmented_images.shape[2:])
        
        # for each individual sample its predictions, copmute ssim mean ssim
        norms = self._metrics(augmented_images_batch_shaped, np.expand_dims(images, 0).repeat(len(individuals), axis=0).reshape(augmented_images_batch_shaped.shape))
        norms = norms.reshape(augmented_images.shape[:2])
        logits = model(augmented_images_batch_shaped)[:, self._target_label]
        logits = logits.reshape(augmented_images.shape[:2])
                       
        avg_norms = np.average(norms, axis=-1)
        avg_logits = np.average(logits, axis=-1)
        
        # create array by merging columns
        return np.stack((avg_logits, avg_norms), axis=-1)

In [None]:
model = TfModel('models/fashion_mnist_cnn/model', 'end_points/images', 'end_points/scores', batch_size=8192)

fashion_mnist = Dataset.from_nprecord(maybe_download('datasets/fashion_mnist.npz'))

In [None]:
source_class = 0
target_class = 5
images = Dataset.sub_dataset(fashion_mnist, [source_class], do_stratified=False).train.X

In [None]:
graph = OperatorGraph()

select_op = ShuffleOperator(graph.init_op)
xover_op = TwoPointXover(select_op, 0.6)
mutation_op = BiasedMutation(xover_op, sigma=0.1, l_bound=-1, u_bound=1)
moea_op = NSGAOperator(graph.init_op, mutation_op)

In [None]:
%matplotlib notebook

fig, ax = plt.subplots(1, 1, figsize=(10,5))
# fig.tight_layout()

ax.set_xlim(0.0000000001, 1)
ax.set_xlabel('Target class prediction probability')
ax.set_ylim(-1, 1)
ax.set_ylabel('mean SSIM')
ax.set_xscale('log')
ax.grid(axis='both')
ax.vlines(0.5, -1, 1, colors='g')

callbacks = [MultiObjectiveReport(ax), GAStatus(fig)] # TODO BestImgReport(ax[1], best_picker=lambda fit: np.argmax(np.sum(fit, axis=-1)))]

- uniform vs std **norm** distributions??
- persisting ga - pickle, joblib?
- callback for intermediate individual checking
- ssim border sensitivity??

- ga to string method/repr

- crossover based on adjacency on pareto-optimal front
- new model (simillar to current/different)
- ssim not same as skimage.measure.compare_ssim

- compare with gradient based methods
- joint approach? operator based on local search on loss functions
- run GA and train classifier simultaneously, use gradients in GA operator
- try to get class representant from scratch (model reverse engineering) - feature explanations, generating "real-world" examples
- try to train inverse mapping?

- z nuly tričko
- uniformní crossover

- specifikovat a určit co ano/co ne

In [None]:
ga = GeneticAlgorithm(
#     initializer=PrePopulationInit(first_run[0].individuals),
    initializer=MultiSigmaRandomNormalInit((14, 14), (np.exp(np.linspace(3, 5, 100)) - 1) / (np.exp(5) - 1)),
    operator_graph=graph,
    objective_fnc=Images2LabelObjectiveFnc(model, lambda x, y: - mse(x, y), target_class, images, sample_size=64, sample_ttl = 0.98),
    callbacks=callbacks,
    results_dir='playground/ga_runs'
)

In [None]:
%time final_pop, fitnesses, objectives = ga.run(population_size=512, generation_cap=256)

In [None]:
%time final_pop, fitnesses, objectives = ga.resume(256)

In [None]:
for i in range(7):
    ga = GeneticAlgorithm(
    #     initializer=PrePopulationInit(first_run[0].individuals),
        initializer=MultiSigmaRandomNormalInit((28, 28), (np.exp(np.linspace(0.5, 5, 100)) - 1) / (np.exp(5) - 1)),
        operator_graph=graph,
        objective_fnc=Images2LabelObjectiveFnc(model, target_class, images, sample_size=64, sample_ttl = 0.98),
        callbacks=callbacks,
        results_dir='playground/ga_runs'
    )
    
    ga.run(population_size=512, generation_cap=512)

In [None]:
%time result = ga.resume(64)

In [None]:
first_run = final_pop, fitnesses, objectives

In [None]:
prediction_bound = 0.5
ssim_bound = -1

filtered_indices, *_ = np.where(np.logical_and(final_pop.objectives[:, 0] > prediction_bound, final_pop.objectives[:, 1] > ssim_bound))
filtered_individuals = final_pop.individuals[filtered_indices]
filtered_objectives = final_pop.objectives[filtered_indices]

In [None]:
test_data = test.X[test.y == source_class]
test_individuals = filtered_individuals

augmented_images = augment_images(test_individuals, test_data)
augmented_images_batch_shaped = augmented_images.reshape(-1, *augmented_images.shape[2:4], 1)

generalization = model(augmented_images_batch_shaped)[:, target_class].reshape(augmented_images.shape[:2])

In [None]:
generalization.mean(), np.sum(generalization > 0.5), generalization.size

In [None]:
np.argsort(filtered_objectives[:,1])
# filtered_indices[np.argsort(filtered_objectives[:,1])]

In [None]:
%matplotlib notebook

individual_i = 6
image_i = 8

compare_fig, compare_ax = plt.subplots(1, 3, figsize=(13, 6))
compare_ax[0].imshow(test_data[image_i], cmap='gray', vmin=0, vmax=1)
compare_ax[1].imshow(filtered_individuals[individual_i], cmap='plasma', vmin=-1, vmax=1)
compare_ax[2].imshow(augmented_images[individual_i, image_i][:,:,0], cmap='gray', vmin=0, vmax=1)