In [1272]:
import torch
import numpy as np
import uuid
from tqdm import trange
import copy
import tensorflow as tf
import networkx as nx

In [1334]:
def partial_activation(inputs, activation, passthroughs):
    return np.where(passthroughs > 0, inputs, activation(inputs))

def relu(inputs):
    return np.where(inputs>0, inputs, 0)


class Layer():
    def __init__(self, input_shape, output_shape, passthrough=True, activation="relu", locked=False):
        self.weights = np.zeros((input_shape, output_shape))
        self.bias = np.zeros(output_shape)
        self.passthrough = np.zeros(output_shape)
        
        if passthrough:
            self.passthrough = np.ones(output_shape)
            self.weights = np.eye(input_shape, output_shape)
        self.output_shape = output_shape
        self.input_shape = input_shape
        self.size = self.weights.shape
        
        self.locked = locked
        
        if activation == "relu":
            self.activation = relu
        if activation == "tanh":
            self.activation = tf.keras.activations.tanh
        
        self.id = str(uuid.uuid1())

        
    def increase_output(self, link):
        padding = np.zeros(shape=(self.input_shape,1))
        padding[link] = np.random.normal((1,))
        self.weights = np.append(self.weights, padding, axis=1)
        
        
        self.bias = np.append(self.bias, 0)      
        self.passthrough = np.append(self.passthrough, 0)  
        
        self.output_shape += 1
        self.size = self.weights.shape
        
    def increase_input(self, link=None):
        padding = np.zeros(shape=(1,self.output_shape))
        if link is not None:
            padding[0][link] = np.random.normal(1)
        
        self.weights = np.append(self.weights, padding, axis=0)
        
        self.input_shape += 1
        self.size = self.weights.shape
        
    def mutate_weights(self, scale=0.1):
        randoms = np.random.normal(scale=scale, size=self.weights.shape)
        self.weights = np.where(self.weights != 0, self.weights + randoms, self.weights)
    
    def decrease_input(self):
        self.weights = np.delete(self.weights, -1, axis=0)
        self.input_shape -= 1
        self.size = self.weights.shape
        
    def change_input_size(self, size):
        while self.input_shape != size:
            if self.input_shape > size:
                self.decrease_input()
            elif self.input_shape < size:
                self.increase_input()
        
    def call(self, inputs):
        return partial_activation((np.matmul(inputs, self.weights) + self.bias), self.activation, self.passthrough)
    
    def __repr__(self):
        return "Layer: [shape=" + str(self.weights.shape) + "]"

In [1466]:
class Network():
    def __init__(self, input_shape):
        self.layers = []
        self.input_shape = input_shape
        
        self.age = 0
        self.layer_counter = 0
        #self.layers.append(Layer(input_shape, output_shape, passthrough=True, locked=True))
        #self.layers.append(Layer(output_shape, output_shape, passthrough=True, locked=True))
    
    
    def add_layer(self, size, weights=None, biases=None, activation="relu", passthrough=False, locked=False):       
        if len(self.layers) == 0:
            self.layers.append(Layer(self.input_shape, size, locked=True))
        else:
            self.layers.append(Layer(self.layers[-1].output_shape, size, locked=locked))
        
        
        self.output_shape = size
        
    def mutate_link(self):
        index = np.random.randint(len(self.layers) - 1)
        
        layer = self.layers[index]
        next_layer = self.layers[index + 1]
        
        link_from = np.random.randint(layer.input_shape)
        
        while not next_layer.locked:
            link_to = np.random.randint(layer.output_shape + 1) #Check if doubling/another distrubution works best

            if link_to >= self.output_shape:
                layer.increase_output(link_from)
                next_layer.increase_input()
                
                index += 1
                layer = self.layers[index]
                next_layer = self.layers[index+1]
                link_from = layer.input_shape - 1
            else:
                layer.increase_output(link_from)
                next_layer.increase_input(link=(link_to % next_layer.output_shape))          
                return
        
    def mutate_node(self):
        index = np.random.randint(1, len(self.layers))
        
        layer = self.layers[index]
        
        # Instead of adding an additional layer convert passthroughs to nodes first
        if np.max(layer.passthrough) == 1:
            pass_index = np.random.randint(layer.output_shape)
            while True:
                if layer.passthrough[pass_index] == 1:
                    layer.passthrough[pass_index] = 0
                    return
                else:
                    pass_index = (pass_index + 1) % layer.output_shape
        
        # If current layer does not have any passthrough nodes, add new layer
        new_layer = Layer(layer.input_shape, layer.input_shape, passthrough=True)
        node_index = np.random.randint(new_layer.output_shape)
        new_layer.passthrough[node_index] = 0
    
        self.layers.insert(index, new_layer)
        
        self.layer_counter += 1
    
    def mutate_weights(self):
        for layer in self.layers:
            layer.mutate_weights()
    
    
    def mutate(self, p_link=0.2, p_node=0.2, p_mutate=0.5): 
        if np.random.choice([True, False], 1, p=[p_link, 1-p_link]):
            self.mutate_link()
        if np.random.choice([True, False], 1, p=[p_node, 1-p_node]):
            self.mutate_node()  
        if np.random.choice([True, False], 1, p=[p_node, 1-p_node]):
            self.mutate_weights()
    
        
    def trim_shapes(self):
        previous_shape = self.layers[0].output_shape
        for layer in self.layers[1:]:
            if layer.input_shape != previous_shape:
                layer.change_input_size(previous_shape)

            previous_shape = layer.output_shape
        
        
    
    def predict(self, x):
        for layer in self.layers:
            x = layer.call(x)
        return x
    
    def print_graph(self):
        G = nx.Graph()
        
        for x in range(len(self.layers) - 1):
            layer = self.layers[x]
            next_layer = self.layers[x+1]
            
            for row_index in range(len(layer.weights)):
                for col_index in range(len(layer.weights[0])):
                    if layer.weights[row_index, col_index] != 0 and layer.passthrough[col_index] == 0:
                        G.add_node(str(layer.id) + "_" + str(row_index), subset=layer.id)
                        G.add_node(str(next_layer.id) + "_" + str(col_index), subset=next_layer.id)
                        G.add_edge(str(layer.id) + "_" + str(row_index), str(next_layer.id) + "_" + str(col_index))
                        
        pos = nx.multipartite_layout(G)
        nx.draw(G, pos=pos)
                    
    
    def __repr__(self):
        return "Network: Layers= \n" + str(self.layers)

In [1467]:
def crossbreed(mom, dad):
    mom = copy.deepcopy(mom)
    dad = copy.deepcopy(dad)
    
    
    net = Network(mom.input_shape)
    net.output_shape = mom.output_shape
    net.layers.append(mom.layers[0])

    for layer_one in mom.layers[1:]:
        for layer_two in dad.layers[1:]:
            if layer_one.id == layer_two.id:
                if np.random.choice([True, False], 1, p=[0.5, 0.5]):
                    net.layers.append(layer_one)
                else:
                    net.layers.append(layer_two)
    
    net.trim_shapes()
    return net

In [1468]:
layer = Layer(1,1)

In [1469]:
net = Network(10)
net.add_layer(20,passthrough=False)
#net.add_layer(100, passthrough=False)
net.add_layer(30, passthrough=False)
net.add_layer(10, passthrough=False)
net.add_layer(10, passthrough=False, locked=True)
print(net.predict(np.zeros(10)))

print(net)
net.mutate_link()
net.mutate_link()
net.mutate_link()
net.mutate_link()
net.mutate_node()
net.mutate_node()
net.mutate_node()
net.mutate_node()

net.mutate(0.1, 0.1)
print(net.layers)
print(net.predict(np.zeros(10)))


#net.print_graph()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Network: Layers= 
[Layer: [shape=(10, 20)], Layer: [shape=(20, 30)], Layer: [shape=(30, 10)], Layer: [shape=(10, 10)]]
[Layer: [shape=(10, 21)], Layer: [shape=(21, 32)], Layer: [shape=(32, 10)], Layer: [shape=(10, 10)]]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [1487]:
class NetworkPool():
    def __init__(self, input_shape, output_shape, population_size=100):
        self.networks = []
        for _ in range(population_size):
            
            net = Network(input_shape=input_shape)
            net.add_layer(1, passthrough=True)
            #net.add_layer(128, passthrough=False)
            net.add_layer(output_shape, passthrough=True, locked=True)
            
            self.networks.append(net)            

    
    def fit(self, data, epochs=100, num_survivors=10, num_children=10, batch_size=32, loss=tf.keras.losses.MeanSquaredError()):
        X, y = data
        
        batch_start = 0
        
        t = trange(epochs, desc='Loss', leave=True)
        for _ in t:            
            batch_X = np.array(X[batch_start:np.min([batch_start+batch_size, len(X)])]).astype(np.float)
            batch_y = np.array(y[batch_start:np.min([batch_start+batch_size, len(y)])]).astype(np.float)
            
            batch_start = (batch_start + batch_size) % len(X)
            
            losses = []
            for network in self.networks:    
                prediction = network.predict(batch_X).astype(np.float)
                losses.append(loss(batch_y, prediction))
                
            losses = np.array(losses)
            
            #losses = np.mean(losses, axis=1)#.reshape(len(self.networks), -1).astype('float32')

            t.set_description("Population: loss_min={:.4f}, loss_avg={:.4f}".format(np.min(losses), np.mean(losses)))

            idx = np.argsort(losses)

            survivors = [self.networks[x] for x in idx[:num_survivors]] #self.networks[idx[:num_survivors]]
            
            
            self.best_network = self.networks[idx[0]]
            
            
            # Mutate all surviors and use them as the new networks
            # The best network survives
            self.networks = [self.best_network]
            for survivor1 in survivors:
                for survivor2 in survivors:
                    for _ in range(num_children):
                        child = crossbreed(survivor1,survivor2)
                        child.mutate(p_link=0.2, p_node=0.2, p_mutate=0.7)
                        self.networks.append(child)
            
            for survivor in survivors:
                if survivor.age < 5:
                    survivor.age += 1
                    survivor.mutate(p_link=1, p_node=0, p_mutate=1)
                    self.networks.append(survivor)
            
            #print("____________")

In [1488]:
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[0.], [1.], [1.], [0.]])

In [1489]:
pool = NetworkPool(input_shape=2, output_shape=1, population_size=1)
pool.fit((X,y), epochs=100, num_survivors=5, num_children=5)

Population: loss_min=0.0000, loss_avg=0.0558: 100%|██████████| 100/100 [00:15<00:00,  6.27it/s]


In [1490]:
pool.best_network.predict(X)

array([[0.        ],
       [0.99900067],
       [0.99797647],
       [0.        ]])

In [1491]:
pool.best_network.layers

[Layer: [shape=(2, 13)],
 Layer: [shape=(13, 16)],
 Layer: [shape=(16, 19)],
 Layer: [shape=(19, 24)],
 Layer: [shape=(24, 1)],
 Layer: [shape=(1, 1)]]

In [1492]:
import tensorflow
mnist = tensorflow.keras.datasets.mnist

In [1493]:
from sklearn.preprocessing import StandardScaler

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(-1, 28*28)
x_test = x_test.reshape(-1, 28*28)

scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test =scaler.transform(x_test)


y_train_onehot = np.zeros((y_train.size, y_train.max()+1))
y_train_onehot[np.arange(y_train.size),y_train] = 1

In [1494]:
pool = NetworkPool(input_shape=784,output_shape=10, population_size=1)
len(pool.networks[0].layers)

2

In [1495]:
loss=tf.keras.losses.CategoricalCrossentropy()
pool.fit((x_train,y_train_onehot), epochs=400, batch_size=100, num_survivors=5, num_children=4)#, loss=loss)

Population: loss_min=0.0923, loss_avg=0.0942: 100%|██████████| 400/400 [01:28<00:00,  4.53it/s]


In [1496]:
pool.best_network.layers[-1].weights

array([[0.63481105, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ]])

In [1497]:
index = [2547,19910,12233,54220]

x_test = np.array(x_test)

#print(pool.best_network.predict([x_train[index]]))
print(np.argmax(pool.best_network.predict(x_test), axis=1).tolist()[:35])
print(y_test.tolist()[:35])

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5, 4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7]
