In [1]:
import numpy as np
from tqdm import trange

import os
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow import keras

from tensorflow.keras import layers
from tensorflow.keras import activations

In [522]:
class Layer():
    def __init__(self, output_shape, input_shape, activation="relu"):
        w_init = tf.keras.initializers.Zeros()
        self.weights = tf.Variable(
            initial_value=w_init(shape=(input_shape, output_shape), dtype="float32"),
            trainable=True,
        )
        self.weights = self.weights + np.random.normal(size=(input_shape, output_shape), scale=0.3)

        b_init = tf.keras.initializers.Zeros()
        self.biases = tf.Variable(
            initial_value=b_init(shape=(output_shape,), dtype="float32"), trainable=True
        )

        self.output_shape = output_shape
        self.input_shape = input_shape
        self.size = self.weights.shape
        
        if activation == "relu":
            self.activation = tf.keras.activations.relu
        if activation == "tanh":
            self.activation = tf.keras.activations.tanh
        
    def add_weights(self, weights):
        self.weights = self.weights + weights
        
    def add_biases(self, biases):
        self.biases = self.biases + biases
    
    def extend(self):
        self.weights = np.append(self.weights, np.random.normal(size=(self.input_shape,1)), axis=1)
        self.biases = np.append(self.biases, 0)        
        self.output_shape += 1
        self.size = self.weights.shape
    
    def extend_input(self):
        self.weights = np.append(self.weights, np.random.normal(size=(1,self.output_shape)), axis=0)
        self.input_shape += 1
        self.size = self.weights.shape
        
    def decrease(self):
        self.weights = np.delete(self.weights, -1, axis=1)
        self.biases = np.delete(self.biases, -1)        
        self.output_shape -= 1
        self.size = self.weights.shape
    
    def decrease_input(self):
        self.weights = np.delete(self.weights, -1, axis=0)
        self.input_shape -= 1
        self.size = self.weights.shape
    
    def change_output_size(self, size):
        while self.output_shape != size:
            if self.output_shape > size:
                self.decrease()
            elif self.output_shape < size:
                self.extend()
                
    def call(self, inputs):
        return self.activation(tf.matmul(inputs, self.weights) + self.biases)
    
    def __repr__(self):
        return "Layer: [shape=" + str(self.weights.shape) + "]"

In [523]:
class Network():
    def __init__(self, input_shape):
        self.layers = []
        self.input_shape = input_shape
        
    def add_layer(self, size, weights=None, biases=None, activation="relu"):
        if len(self.layers) == 0:
            self.layers.append(
                Layer(
                    size,
                    self.input_shape,  
                    activation=activation, 
                    #weights=weights, 
                    #biases=biases
                )
            )
        else:
            self.layers.append(
                Layer(
                    size, 
                    self.layers[-1].output_shape, 
                    activation=activation,
                    #weights=weights, 
                    #biases=biases
                )
            )
            
    def predict(self, sample):
        result = sample
        for layer in self.layers:
            result = layer.call(result)
        return np.array(result)
    
    
    def mutate_weights(self, intensity=0.05):
        for layer in self.layers:
            layer.add_weights(np.random.normal(scale=intensity, size=layer.size))
            layer.add_biases(np.random.normal(scale=intensity, size=layer.output_shape))
            
    def mutate_layers(self, propa=0.5):
        for i in range(len(self.layers)-1):
            if np.random.choice([True, False], p=[propa,1-propa]):
                self.layers[i].extend()
                self.layers[i+1].extend_input()
            elif np.random.choice([True, False], p=[propa,1-propa]):
                if self.layers[i].output_shape > 3:
                    self.layers[i].decrease()
                    self.layers[i+1].decrease_input()
        
    def mutate_topology(self, propa=0.1, activation="tanh"):
        choice = np.random.choice([0, 1], p=[propa,1-propa])
        # Add a Layer
        if choice == 0:
            if len(self.layers) == 1:
                index = 1
                layer = Layer(input_shape=self.layers[0].output_shape, output_shape=self.layers[0].output_shape, activation=activation)
            else:
                index = np.random.randint(1, len(self.layers))
                layer = Layer(input_shape=self.layers[index-1].output_shape, output_shape=self.layers[index].input_shape, activation=activation)

            self.layers.insert(index, layer)
        # Remove a layer
        elif choice == 1:
            if len(self.layers) > 2:
                index = np.random.randint(1, len(self.layers) - 1)
                self.layers[index-1].change_output_size(self.layers[index+1].input_shape)
                del self.layers[index]
            
            
    
    def __repr__(self):
        return "Network: Layers= \n" + str(self.layers)

In [524]:
net = Network(input_shape=2)
net.add_layer(4)

net.add_layer(7)
net.add_layer(6)
net.add_layer(2)
net.add_layer(3)


#net.mutate_layers(propa=0.1)
net.mutate_topology(propa=1)
net.mutate_layers(propa=1)

print(net)
net.predict(np.ones((8,2)).astype(np.float32))


Network: Layers= 
[Layer: [shape=(2, 5)], Layer: [shape=(5, 5)], Layer: [shape=(5, 8)], Layer: [shape=(8, 7)], Layer: [shape=(7, 3)], Layer: [shape=(3, 3)]]


array([[6.0796938e-05, 0.0000000e+00, 0.0000000e+00],
       [6.0796938e-05, 0.0000000e+00, 0.0000000e+00],
       [6.0796938e-05, 0.0000000e+00, 0.0000000e+00],
       [6.0796938e-05, 0.0000000e+00, 0.0000000e+00],
       [6.0796938e-05, 0.0000000e+00, 0.0000000e+00],
       [6.0796938e-05, 0.0000000e+00, 0.0000000e+00],
       [6.0796938e-05, 0.0000000e+00, 0.0000000e+00],
       [6.0796938e-05, 0.0000000e+00, 0.0000000e+00]], dtype=float32)

In [619]:
import copy
from keras import backend as K


class NetworkPool():
    def __init__(self, input_shape, output_shape, population_size=100):
        self.networks = []
        for _ in range(population_size):
            
            net = Network(input_shape=input_shape)
            #net.add_layer(128, activation="relu")
            net.add_layer(output_shape, activation="relu")
            
            self.networks.append(net)            
        #self.networks = np.array(self.networks)
            
    
    def fit(self, data, epochs=100, num_survivors=10, num_children=10, batch_size=32, loss=tf.keras.losses.mean_squared_error):
        X, y = data
        
        batch_start = 0
        
        t = trange(epochs, desc='Loss', leave=True)
        for _ in t:            
            batch_X = np.array(X[batch_start:np.min([batch_start+batch_size, len(X)])]).astype('float32')
            batch_y = np.array(y[batch_start:np.min([batch_start+batch_size, len(y)])]).astype('float32')
            
            batch_start = (batch_start + batch_size) % len(X)
            
            losses = []
            for network in self.networks:
                losses.append(loss(batch_y, network.predict(batch_X)))
            
            losses = np.array(losses)#.reshape(len(self.networks), -1).astype('float32')
            
            #print(loss(predictions, [batch_y]))
            #print(loss(predictions, np.repeat(batch_y, 1))[0])
            #print(tf.math.reduce_mean(loss(predictions, np.repeat(batch_y, len(predictions))), axis=-1)[0])
            #losses = tf.math.reduce_mean(loss(predictions, batch_y), axis=-1)
            t.set_description("Population: loss_min={:.4f}, loss_avg={:.4f}".format(np.min(losses), np.mean(losses)))

            idx = np.argsort(losses)

            survivors = [self.networks[x] for x in idx[:num_survivors]] #self.networks[idx[:num_survivors]]
            
            self.best_network = self.networks[idx[0]]
            
            
            # Mutate all surviors and use them as the new networks
            # The best network survives
            self.networks = [self.best_network]
            for survivor1 in survivors:
                #for survivor2 in survivors:
                for _ in range(num_children):
                    #child = survivor1.crossbreed(survivor2, random_mutations=True)
                    child = copy.deepcopy(survivor1)
                    child.mutate_topology() 
                    child.mutate_layers()
                    child.mutate_weights()
                    self.networks.append(child)
            
            #self.networks = np.array(self.networks)

In [620]:
X = np.array([[0,0], [0,1], [1,0], [1,1]]).astype(np.float32)
y = np.array([[0.], [1.], [1.], [0.]])

In [621]:
loss=tf.keras.losses.MeanSquaredError()#reduction=tf.keras.losses.Reduction.NONE)
pool = NetworkPool(input_shape=2, output_shape=1, population_size=200)
pool.fit((X,y), epochs=100, loss=loss)

Population: loss_min=0.0000, loss_avg=0.1017: 100%|██████████| 100/100 [00:11<00:00,  8.81it/s]


In [622]:
print(pool.best_network.predict(X))
print(pool.best_network)

[[ 0.00142315]
 [ 0.99984443]
 [ 0.99959326]
 [-0.00163677]]
Network: Layers= 
[Layer: [shape=(2, 9)], Layer: [shape=(9, 1)]]


In [623]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist

In [624]:
from sklearn.preprocessing import StandardScaler

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(-1, 28*28)
x_test = x_test.reshape(-1, 28*28)

scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)


y_train_onehot = np.zeros((y_train.size, y_train.max()+1))
y_train_onehot[np.arange(y_train.size),y_train] = 1

In [625]:
np.mean(x_train)

-2.1974863349995617e-18

In [626]:
print(x_train.shape)
print(y_train_onehot.shape)

(60000, 784)
(60000, 10)


In [627]:
pool = NetworkPool(input_shape=784,output_shape=10, population_size=100)

In [632]:
loss=tf.keras.losses.CategoricalCrossentropy()
pool.fit((x_train,y_train_onehot), epochs=100, batch_size=2000, num_survivors=10, num_children=10, loss=loss)

Population: loss_min=1.6739, loss_avg=2.3985: 100%|██████████| 100/100 [00:49<00:00,  2.00it/s]


In [633]:
pool.best_network

Network: Layers= 
[Layer: [shape=(784, 53)], Layer: [shape=(53, 10)]]

In [634]:
index = [2547,19910,12233,54220]

x_test = x_test.astype('float32')

#print(pool.best_network.predict([x_train[index]]))
print(np.argmax(pool.best_network.predict(x_test), axis=1))
print(y_test)
print(tf.keras.losses.mean_squared_error(np.argmax(pool.best_network.predict(x_test), axis=1).astype('int32'), y_test.astype('int32')))


[0 1 0 ... 0 0 1]
[7 2 1 ... 4 5 6]
tf.Tensor(23, shape=(), dtype=int32)


In [631]:
tf.matmul(np.ones((2,4)), np.ones((4,2)))

<tf.Tensor: shape=(2, 2), dtype=float64, numpy=
array([[4., 4.],
       [4., 4.]])>

In [191]:
np.array(X).shape

(4, 2)

In [600]:
loss=tf.keras.losses.CategoricalCrossentropy()#reduction=tf.keras.losses.Reduction.NONE)
y_true = np.repeat([[0,1,0]], 2, axis=0)#, [0,1,1]]
y_pred = [[0.1,1.2,0.1], [0.1,1.1,1.1]]
print(y_true)
print(y_pred)
print(loss(y_true, y_pred).numpy())

[[0 1 0]
 [0 1 0]]
[[0.1, 1.2, 0.1], [0.1, 1.1, 1.1]]
0.44587487


In [604]:
y_test_onehot = np.zeros((y_test.size, y_test.max()+1))
y_test_onehot[np.arange(y_test.size),y_test] = 1

y_true = y_test_onehot#np.repeat(y_test_onehot, len(x_test), axis=-1)
y_pred = pool.best_network.predict(x_test)
# Using 'auto'/'sum_over_batch_size' reduction type.
cce = tf.keras.losses.CategoricalCrossentropy()
cce(y_true, y_pred).numpy()


6.539699

In [602]:
y_test_onehot

array([[0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])