In [1]:
# primitives = ["conv2d1x1", "conv2d3x3",
# #               , "conv2d5x5",
# #               "conv2dstride1x1", "conv2dstride3x3", "conv2dstride5x5",
#               "conv2dsep1x1", "conv2dsep3x3",
# #               "conv2dsep5x5",
# #               "conv2dsepstride1x1", "conv2dsepstride3x3", "conv2dsepstride5x5",
#               ]

primitives = [1, 2,
#               , "conv2d5x5",
#               "conv2dstride1x1", "conv2dstride3x3", "conv2dstride5x5",
              3, 4
#               "conv2dsep5x5",
#               "conv2dsepstride1x1", "conv2dsepstride3x3", "conv2dsepstride5x5",
              ]

In [2]:
def getUntriedMoves(neighborsCache):
    untriedMoves = []
    
    for (i, j) in neighborsCache:
        move = dict({})
        move["i"] = i
        move["j"] = j
        for p in primitives:
            move["primitive"] = p
            untriedMoves.append(deepcopy(move))
    
    return untriedMoves

In [3]:
from keras import backend as K
from keras.datasets import cifar10, mnist
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import numpy as np
from keras import utils as np_utils

class Dataset():
    def __init__(self, percentage = np.random.uniform(.1, .9), dataset = cifar10, augment_data = True):
        self.percentage = percentage
        
        (self.X_train, self.y_train), (self.X_test, self.y_test) = dataset.load_data()

        print("WARNING: number of categories is hardcoded, should make dynamic")
        self.y_train = np_utils.to_categorical(self.y_train, 10)
        self.y_test = np_utils.to_categorical(self.y_test, 10)

        if augment_data:
            self.train_datagen = ImageDataGenerator(
                  rotation_range=40,
                  width_shift_range=0.2,
                  height_shift_range=0.2,
                  shear_range=0.2,
                  zoom_range=0.2,
                  horizontal_flip=True,
                  fill_mode='nearest')
        else:
            self.train_datagen = ImageDataGenerator()

        self.test_datagen = ImageDataGenerator()

        self.X_train = self.X_train.astype('float32') / 255
        self.X_test = self.X_test.astype('float32') / 255

        X_train_mean = np.mean(self.X_train, axis = 0)
        self.X_train -= X_train_mean
        self.X_test -= X_train_mean

        self.X_val, self.X_test, self.y_val, self.y_test = train_test_split(
            self.X_test, self.y_test, test_size = 0.5)
        
        self.X_train_subset = self.X_train[:int(len(self.X_train) * self.percentage)]
        self.y_train_subset = self.y_train[:int(len(self.y_train) * self.percentage)]
        
        self.X_val_subset = self.X_val[:int(len(self.X_val) * self.percentage)]
        self.y_val_subset = self.y_val[:int(len(self.y_val) * self.percentage)]
        
        self.X_test_subset = self.X_test[:int(len(self.X_test) * self.percentage)]
        self.y_test_subset = self.y_test[:int(len(self.y_test) * self.percentage)]
        
    def create_generators(self, batch_size = 32):
        self.train_steps = int(len(self.X_train) * self.percentage) // batch_size
        self.val_steps = int(len(self.X_val) * self.percentage) // batch_size
        
        train_generator = self.train_datagen.flow(
                self.X_train_subset, self.y_train_subset,
                batch_size = batch_size)

        val_generator = self.test_datagen.flow(
                self.X_val_subset, self.y_val_subset,
                batch_size = batch_size)

        test_generator = self.test_datagen.flow(
                self.X_test_subset, self.y_test_subset,
                batch_size = batch_size)
        
        ds.train_generator = train_generator
        ds.val_generator = val_generator
        ds.test_generator = test_generator

        return train_generator, val_generator, test_generator
    
ds = Dataset(percentage=1,
             augment_data=False)

train_generator, val_generator, test_generator = ds.create_generators(
    batch_size=64)

Using TensorFlow backend.




In [4]:
import numpy as np
from copy import deepcopy

class Node:
    def __init__(self, move = None, parent = None, state = None):
        self.move = move
        self.parentNode = parent
        self.childNodes = []
        self.wins = 0
        self.visits = 0
        self.weights = None
        self.untriedMoves = deepcopy(state.legalMoves)
        
    def UCTSelectChild(self):
        s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + sqrt(2*log(self.visits)/c.visits))[-1]
        
    def AddChild(self, m, s):
        n = Node(move = m, parent = self, state = s)
        self.childNodes.append(n)
        return n

    def Update(self, result):
        self.visits += 1
        self.wins += result
        
    def __repr__(self):
        return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"
    
    def TreeToString(self, indent):
        s = self.IndentString(indent) + str(self)
        for c in self.childNodes:
            s += c.TreeToString(indent+1)
        return s
    
    def IndentString(self, indent):
        s = "\n"
        for i in range(1, indent+1):
            s += "| "
        return s
    
    def ChildrenToString(self):
        s = ""
        for c in self.childNodes:
            s += str(c) + "\n"
        return s

In [5]:
from keras.optimizers import SGD
from clr_callback import CyclicLR


def fitAutoEnc(model, X_train, X_test, clr):
    model.fit(X_train, X_train,
              epochs=2,
              batch_size=64,
              validation_data=(X_test, X_test),
              callbacks=[clr])


def fitDataset(model, ds, clr):
    model.fit_generator(generator=ds.train_generator, steps_per_epoch=ds.train_steps,
                        epochs=2, verbose=1, callbacks=[clr], validation_data=ds.val_generator,
                        validation_steps=ds.val_steps)


def finetune(model, ds=ds, loss="binary_crossentropy", metrics=None):
    optim = SGD(nesterov=True)
    base_lr = 0.001
    max_lr = 0.006
    clr = CyclicLR(base_lr=base_lr, max_lr=max_lr,
                   step_size=2000., mode='triangular')

    model.compile(optimizer=optim,
                  loss=loss,
                  metrics=metrics)
    
    for layer in model.layers:
        print(layer.trainable)

    fitDataset(model, ds, clr)

    for layer in model.layers:
        layer.trainable = True

    model.compile(optimizer="adam",
                  loss=loss,
                  metrics=metrics)

    return model

In [6]:
from copy import deepcopy
from IPython.core.debugger import set_trace

def load_weights(model, weights, debug = False, doFinetune = True,
                metrics = None, ):
    if weights is None:
        return model
    
    origModelWeights = deepcopy(model.get_weights())
    weight_idx = 0
    setWeights = True
    for i, layer in enumerate(model.layers):
        if layer.get_weights() != []:
            if layer.get_weights()[0].shape == weights[weight_idx].shape:
                temp = []
                for j, weight in enumerate(layer.get_weights()):
                    if weight_idx+j < len(weights)-1:
                        weight_idx += j
                        temp.append(weights[weight_idx])
                    else:
                        setWeights = False
                if setWeights:
                    layer.set_weights(temp)
                    if finetune: layer.trainable = False
                    weight_idx += 1
                if debug: print("Weight group {} of {} used".format(weight_idx, len(weights)))
                if debug: print(layer.name, "Weight Changed")
        else:
            if debug: print(layer.name, "Weight Not Changed")
            
    if debug: 
        for weight in weights[weight_idx-1:]:
            print(weight.shape)
    
    assert (origModelWeights[0] != model.get_weights()[0]).any()
    
    if doFinetune:
        model = finetune(model)
    
    return model

In [8]:
from math import *
import random
from model_assembly import fitWithCLR, assemble_model 
from copy import deepcopy
from IPython.core.debugger import set_trace
from os.path import exists, join
from os import mkdir

class EvoSearchState:
    def __init__(self, kwargs={}):
        self.net = kwargs.get("net", None)
        self.netReady = kwargs.get("netReady", False)
        self.graph = kwargs.get("graph", np.zeros((30, 30)))
        self.numNeighbors = kwargs.get("numNeighbors", 3)
        self.prevAcc = kwargs.get("prevAcc", None)
        self.prevModel = kwargs.get("prevModel", None)
        self.bestAcc = kwargs.get("bestAcc", None)
        self.bestGraph = kwargs.get("bestGraph", None)
        self.bestModel = kwargs.get("bestAcc", None)
        self.coords = kwargs.get("coords", (-2, -1))
        self.legalMoves = kwargs.get("legalMoves", self.getLegalMoves())
        if not exists("weights"):
            mkdir("weights")

    def Clone(self):
        kwargs = {"net": self.net, 
                  "netReady": self.netReady,
                  "graph": self.graph, 
                  "numNeighbors": self.numNeighbors,
                  "prevAcc": self.prevAcc,
                  "prevModel": self.prevModel,
                  "bestAcc": self.bestAcc,
                  "bestGraph": self.bestGraph,
                  "bestModel": self.bestModel,
                  "legalMoves": self.legalMoves,
                  "coords": self.coords}
        return EvoSearchState(kwargs)

    def DoMove(self, move):
        self.graph[move["i"]][move["j"]] = move["primitive"]
        self.coords = (move["i"], move["j"])
        self.legalMoves = self.getLegalMoves()

    def flattenMatrix(self, M, index=None):
        flattened_vec = []
        returnIndex = None

        for i in range(M.shape[0]):
            for j in range(M.shape[1]):
                if j > i and j < i + 3:
                    flattened_vec.append(M[i, j])
                    if index is not None and i == index[0] and j == index[1]:
                        returnIndex = len(flattened_vec) - 1

        return np.asarray(flattened_vec), returnIndex

    def unflattenVector(self, v, index=None):
        graph = self.graph
        M = np.zeros((graph.shape[0], graph.shape[1]))
        returnIndex = None

        cnt = 0
        for i in range(size[0]):
            for j in range(size[1]):
                if j > i and j < i + 3:
                    if index is not None and index == cnt:
                        returnIndex = (i, j)
                    M[i][j] = v[cnt]
                    cnt += 1

        return np.asarray(M), returnIndex

    def getNeighborsCache(self):
        (i, j) = self.coords
        numNeighbors = self.numNeighbors
        graph = self.graph

        def twosFloor(num):
            if num % 2 == 1:
                return (num - 1) // 2
            else:
                return num // 2

        def shiftedTwosFloor(num):
            num += 3
            if num % 2 == 1:
                return (num - 1) // 2
            else:
                return num // 2

#         if j < 0 or j > graph.shape[0] - 1 or i < 0 or i > graph.shape[0] - 1:
#             raise Exception("i or j are out of bounds. i: %d, j: %d" % (i, j))
        neighbors = []

        _, idx = self.flattenMatrix(graph, (i, j))
        if i == -1 and j == 0:
            idx = -1
        elif i == -2 and j == -1:
            idx = -2

        for k_ in range(numNeighbors):
            k = k_ + 1
    #         if idx - k >= 0:
    #             negNeighbor = (twosFloor(idx - k), shiftedTwosFloor(idx - k))
    #             neighbors.append(negNeighbor)
            if idx + k < int(graph.shape[0] * 2) - 3:
                posNeighbor = (twosFloor(idx + k), shiftedTwosFloor(idx + k))
                neighbors.append(posNeighbor)
        return neighbors

    def getLegalMoves(self):
        neighborsCache = self.getNeighborsCache()
        legalMoves = []
        for (i, j) in neighborsCache:
            for p in primitives:
                move = dict({})
                move["i"] = i
                move["j"] = j
                move["primitive"] = p
                legalMoves.append(move)
                
        return legalMoves

    #         should start random
# Should I limit it to expanding the current board state (no overwrites?)
# that would complicate the softmax and prevent the network from fixing itself
# but might prevent undoing it's own training

    def GetMove(self, untriedMoves):
        #         should return the 9x9 grid around where the last move was made.
        # default would be upsper left hand corner
        #         if self.moveCounter > 100:
        #             net.train
        if self.netReady:
            predictions = self.net.predict(self.graph)
            location = predictions[0]
            primitive = predictions[1]
            (i, j) = self.unflattenVector(probas, np.max(probas))
            move = dict({})
            move["i"] = i
            move["j"] = j
            move["primitive"] = np.max(primitive)
            return move
        else:
            return random.choice(untriedMoves)

    def isFinished(self, numNodes, weights):
        model = assemble_model(self.graph.tolist())
        model.summary()
        if weights is not None:
            model = load_weights(model, weights, doFinetune=True, debug=False)
            
        acc = fitWithCLR(model, ds).history['val_acc'][-1]

        if acc is not None and (self.prevAcc is None or acc > self.prevAcc):
            print("prevAcc: {}, newAcc: {}".format(self.prevAcc, acc))
            self.prevAcc = acc
            self.prevGraph = self.graph
            self.prevModel = model
            model.save_weights(join("weights", "{}.h5".format(numNodes)))
            return False, model.get_weights()
        else:
            print("Termination condition reached")
            return True, None

    def GetResult(self, numNodes):
        model = assemble_model(self.graph.tolist())
#         Not considering the last option might be bad, but it's probably worth it since we want 
# the best performing model
        if self.bestAcc is None or self.prevAcc > self.bestAcc:
            print("bestAcc: {}, newAcc: {}".format(self.bestAcc, self.prevAcc))
            self.bestAcc = self.prevAcc
            self.bestGraph = self.prevGraph
            self.bestModel = self.prevModel
#             model.save_weights(join("weights", "{}.h5".format(numNodes)))
            model.save(
                join("weights", "best_model_node_{}.h5".format(numNodes)))
            print("Success")
            return 1
        else:
            print("Failure")
            return 0

In [9]:
import pickle
from keras.models import load_model

def UCT(rootState, maxIters, verbose=False):
    nodeLookup = dict({})

    try:
        rootNode = pickle.load(open("MCTS.p", "rb"))
        bestGraph = pickle.load(open("bestGraph.p", "rb"))
        bestModel = load_model("bestModel.h5")
        bestAcc = pickle.load(open("bestAcc.p", "rb"))
    except Exception as e:
        print(e)
        rootNode = Node(state=rootState)
        bestAcc = None

    origAcc = bestAcc

    for i in range(maxIters):
        print("On iteration {}".format(i))
        numNodes = 0
        node = rootNode
        state = rootState.Clone()

        if bestAcc is not None:
            state.bestAcc = bestAcc
            state.bestGraph = bestGraph
            state.bestModel = bestModel

# if a node has no untriedMoves left and it IS NOT a leaf node, then select the node through UCT
        while node.untriedMoves == [] and node.childNodes != []:
            #         misleading name but I think select child refers to select a leaf action (ie train a network)
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        if node.untriedMoves != []:
            finished = False
            while not finished:
                move = state.GetMove(node.untriedMoves)
                node.untriedMoves.remove(move)
#                 DoMove updates state.legalMoves
                state.DoMove(move)
#                 node.untriedMoves = state.getLegalMoves()
                numNodes += 1
                if node != rootNode:
                    weights = node.parentNode.weights
                else:
                    weights = None
                finished, wgts = state.isFinished(numNodes, weights)
                node.weights = wgts
                node = node.AddChild(move, state)

        result = state.GetResult(numNodes)

        while node != None:
            node.Update(result)
            node = node.parentNode

        bestAcc = state.bestAcc
        bestGraph = state.bestGraph
        bestModel = state.bestModel

        if i % 10 == 0 or i == maxIters - 1:
            pickle.dump(rootNode, open("MCTS.p", "wb"))
            if bestAcc > origAcc:
                bestModel.save("bestModel.h5")
                pickle.dump(bestGraph, open("bestGraph.p", "wb"))
                pickle.dump(bestAcc, open("bestAcc.p", "wb"))

#     if (verbose):
#         print (rootNode.TreeToString(0))
#     else:
#         print (rootNode.ChildrenToString())

    return bestAcc, bestGraph, bestModel, rootNode

In [10]:
def RunMCTS():
    state = EvoSearchState()
#     while (state.GetMove() != None):
    bestAcc, bestGraph, bestModel, rootNode = UCT(rootState = state, maxIters = 500, verbose = False)
    print("Best Acc: {}".format(bestAcc))
    
#     print("Best Move: {}\n".format(m))
#     state.DoMove(m)
#     if state.GetResult() == 1:
#         print("Successful Architecture!")
#     else: 
#         print("Unsuccessful Architecture")

In [11]:
RunMCTS()

On iteration 0
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32, 32, 3)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 16)        64        
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 16)        64        
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 16)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 16384)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                163850    
Total params: 163,978
Trainable params: 163,946
Non-trainable params: 32
______________________________________________________

KeyboardInterrupt: 