In [3]:
# what I want is a set of variables that will provide a maximum percentage accuracy increase with the lowest
# number of iterations

# what if I took what I have and changed it to a conv1d for masking. 

# could give a softmax of num iters over the difference trainable layer indices
# could also give the lr number
# 

# the maximum percentage accuracy increase is the average of all increases from random actions, 


# iter_budget = 10

# example softmax [.2, .2, .2, .2, .2]
# all layers would train for 2 iterations each

# do I want it to be able to train multiple layers at once? in theory you could replicating training all at once
# through doing 1 iteration each for 

# no you couldnt since it is like freezing everything
# I am going under the assumption however that training one layer at a time would be close to as good.
# at least for tweaking existing models it should be pretty effective

# could have a sequence of layers that are either trained or not trained, and they have


# basically we need to choose whether or not to train a layer. the less which are marked to be trained 
# the less computation it requires.
# solving the lr issue would be great


# what do we want the input to be
# we want it to be all of the trainable layers
# could have a 2dconv which analyzes it

# layer_dict["idx"] = np.random.choice(trainable_indices)
# layer_dict["lr"] = np.random.uniform(1e-1, 1e-6)
# layer_dict["iters"] = np.random.randint(0, 3)
# layer_dict["acc"] = train(layer_dict)

# test = [
# # lr, train, conv/dense, magnitude 
# np.asarray([np.random.uniform(1e-1, 1e-6), np.random.choice([0, 1]), np.random.choice([0, 1]), 10]),
# np.asarray([np.random.uniform(1e-1, 1e-6), np.random.choice([0, 1]), np.random.choice([0, 1]), 10])
# ]
# var = np.asarray(test); var.shape

In [None]:
# Idea: LSTM that chooses which layer to train next and with what parameter settings. Could have it be for a set
# number of iterations to reduce the overhead. could pass in a conv2d feature extraction for use

# iters is number of times trained
# highest iters are trained before other modules come in
# also could have all iters be indepedent
# could have it be an option for the lstm

# could either have the previous layers predict future layers, or could have all layers together predict 
# the accuracy improvement. I want to get the variables most of all, so how would I get those?
# I need variations in those variables, and differences in accuracy as a result of those variables
# I can start by randomly having variables and taking their accuracy improvement as above

# maybe the model is input two sets of complete variables and it predicts which one is the best,
# and what the accuracy of each is

# also could compare against a base config, and estimate the accuracy of the unknown varset
# send in set of layers and have it predict the nex
# t layers value and the final accuracy

#         should the end of each iters be net better? or do we care about after all layers do their iters
#         the greedy approach could probably enable optimization and allow to compare accuracies at each step
#         but it would not align as well with my original vision. if any step reduces the accuracy of the model
#         you probably wouldnt want to do it in the future



# inputs = []
# targets = []

# for i, layer in enumerate(schedules):
#     if i % 2 != 1:
#         input = []
#         input.append(np.asarray([layer["idx"], layer["iters"], layer["lr"]]))
#         input.append(np.asarray([schedules[i+1]["idx"], schedules[i+1]["iters"], schedules[i+1]["lr"]]))
#         targets.append(layer["acc"])
    
# inputs = np.asarray(inputs)
# targets = np.asarray(targets)

In [1]:
from keras import backend as K
from keras.datasets import cifar10, mnist
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import numpy as np
from keras import utils as np_utils

class Dataset():
    def __init__(self, percentage = np.random.uniform(.1, .9), dataset = cifar10, augment_data = True):
        self.percentage = percentage
        
        (self.X_train, self.y_train), (self.X_test, self.y_test) = dataset.load_data()

        print("WARNING: number of categories is hardcoded, should make dynamic")
        self.y_train = np_utils.to_categorical(self.y_train, 10)
        self.y_test = np_utils.to_categorical(self.y_test, 10)

        if augment_data:
            self.train_datagen = ImageDataGenerator(
                  rotation_range=40,
                  width_shift_range=0.2,
                  height_shift_range=0.2,
                  shear_range=0.2,
                  zoom_range=0.2,
                  horizontal_flip=True,
                  fill_mode='nearest')
        else:
            self.train_datagen = ImageDataGenerator()

        self.test_datagen = ImageDataGenerator()

        self.X_train = self.X_train.astype('float32') / 255
        self.X_test = self.X_test.astype('float32') / 255

        X_train_mean = np.mean(self.X_train, axis = 0)
        self.X_train -= X_train_mean
        self.X_test -= X_train_mean

        self.X_val, self.X_test, self.y_val, self.y_test = train_test_split(
            self.X_test, self.y_test, test_size = 0.5)
        
        self.X_train_subset = self.X_train[:int(len(self.X_train) * self.percentage)]
        self.y_train_subset = self.y_train[:int(len(self.y_train) * self.percentage)]
        
        self.X_val_subset = self.X_val[:int(len(self.X_val) * self.percentage)]
        self.y_val_subset = self.y_val[:int(len(self.y_val) * self.percentage)]
        
        self.X_test_subset = self.X_test[:int(len(self.X_test) * self.percentage)]
        self.y_test_subset = self.y_test[:int(len(self.y_test) * self.percentage)]
        
    def create_generators(self, batch_size = 32):
        self.train_steps = int(len(self.X_train) * self.percentage) // batch_size
        self.val_steps = int(len(self.X_val) * self.percentage) // batch_size
        
        train_generator = self.train_datagen.flow(
                self.X_train_subset, self.y_train_subset,
                batch_size = batch_size)

        validation_generator = self.test_datagen.flow(
                self.X_val_subset, self.y_val_subset,
                batch_size = batch_size)

        test_generator = self.test_datagen.flow(
                self.X_test_subset, self.y_test_subset,
                batch_size = batch_size)

        return train_generator, validation_generator, test_generator

Using TensorFlow backend.


In [2]:
from keras import layers
from keras import models

def create_conv_net():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu',
                            input_shape=(32, 32, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer = "sgd",
         loss = "categorical_crossentropy",
         metrics = ["acc"])
    return model

ds = Dataset(percentage=1,
             augment_data=False)
train_generator, validation_generator, test_generator = ds.create_generators(
    batch_size=64)



In [44]:
from keras.models import Sequential, Model
from keras import layers
from keras.layers import Dense, MaxPooling2D, Input, Conv2D, Flatten, AveragePooling2D, LSTM
import numpy as np
from tqdm import tqdm
from IPython.core.debugger import set_trace
from keras.optimizers import Adam, SGD
from copy import deepcopy
from keras.models import load_model
import random
from copy import deepcopy, copy

try:
    print("Loading Best")
    net = load_model("best.h5")
    best_weights = net.get_weights()
except:
    print("Model not found, creating new model")
    net = create_conv_net()
    
for l in net.layers:
    l.trainable = False
    
def compile_model(lr=1e-3):
    net.compile(optimizer=SGD(lr),
    loss="categorical_crossentropy",
    metrics=["acc"])
    
compile_model()

def decision(probability):
    return random.random() < probability

def get_scaled_lr(lr):
    min = 1e-6
    max = 1e-1
    return get_scaled(lr, min, max)

def get_scaled(num, min, max):
    mean = (max + min)/2

    std = (((min - mean)**2 + (max - mean)**2)/2)**(1/2)

    zero_mean = num - mean
    scaled_num = zero_mean/std
    
    return scaled_num

def unscale(num, min, max):
    mean = (max + min)/2

    std = (((min - mean)**2 + (max - mean)**2)/2)**(1/2)
    
    return (num*std) + mean

def unscale_lr(lr):
    return unscale(lr, 1e-5, 1e-2)

# todo: would be nice to have a cached model instead of having to create a new one

prev_acc = net.evaluate_generator(test_generator, steps = 100)[1]

def train(schedule):
    global net
    global prev_acc
    
#     I given a random update, I should predict if it's good or bad
# once it is sufficiently good, I can filter down the type of updates I check
    
    lr = unscale_lr(schedule["lr"])
    idx = schedule["idx"]

    net.layers[idx].trainable = True
    compile_model(lr=lr)

#     length = unscale_length(schedule["length"])

    net.fit_generator(
        train_generator,
        steps_per_epoch=48,
        epochs=1,
        validation_data=validation_generator,
        validation_steps=36,
        verbose=0)

    net.layers[idx].trainable = False

    acc = net.evaluate_generator(test_generator, steps = 50)[1]
    
    print("Acc: {}, Prev Acc: {}".format(acc, prev_acc))
    if  acc > prev_acc:
        print("Saved New Best")
        net.save("best.h5")
        best_weights = net.get_weights()
        prev_acc = copy(acc)
        return 1
    else:
        try:
            print("Reverted Model")
            net = net.load_weights(best_weights)
        except:
            pass
        return 0

Loading Best


In [45]:
def get_index(location, random = False):
    location += 1
    location /= 2
    
    index = int(np.round(location * len(trainable_indices)))
    
#         index += np.random.choice([-1, 0, 0, 0, 0, 1])
    index = min(index, len(trainable_indices) - 1)
    index = max(index, 0)
    return index

def get_magnitude(index):
    mean_param_count = net.count_params() / len(trainable_indices)
    index_param_count = net.layers[index].count_params()
    
    mss = 0
    
    for i in range(len(trainable_indices)):
        mss += (net.layers[trainable_indices[i]].count_params() - mean_param_count)**2
        
    std = mss**(1/2)
    
    ipc_zero = index_param_count - mean_param_count
    scaled_ipc = ipc_zero/std
    return scaled_ipc

trainable_indices = []

times_trained = {}

for i, l in enumerate(net.layers):
    if l.count_params() > 0:
        trainable_indices.append(i)

for i in range(len(trainable_indices)):
    times_trained[i] = 0

def transform_schedule(schedule):
#     acc = train(schedule)
#     transformed_schedule = np.asarray([acc, schedule["location"], schedule["num_indices"], 
#                                        schedule["times_trained"], schedule["magnitude"], schedule["lr"]])
    
    transformed_schedule = np.asarray([schedule["lr"], schedule["curr_acc"],
                                       schedule["location"], schedule["magnitude"]])
    
    transformed_schedule = transformed_schedule
    
    return transformed_schedule

def make_schedule():
    schedule = {}
    
    schedule["lr"] = get_scaled_lr(np.random.uniform(1e-6, 1e-1))
    schedule["curr_acc"] = prev_acc
    schedule["location"] = np.random.uniform(-1, 1)
    schedule["idx"] = get_index(schedule["location"])
    schedule["magnitude"] = get_magnitude(schedule["idx"])
    transformed_schedule = transform_schedule(schedule)
    
    return np.expand_dims(np.expand_dims(np.asarray(transformed_schedule), axis=0), axis=0), schedule

def check_good_or_bad(schedule):
    return np.array([[train(schedule)]])

In [46]:
# def penalized_loss(noise):
#     def loss(y_true, y_pred):
#         return K.mean(K.square(y_pred - y_true) - K.square(y_true - noise), axis=-1)
#     return loss


# input1 = Input(batch_shape=(batch_size, timesteps, features))
# lstm =  LSTM(features, stateful=True, return_sequences=True)(input1)
# output1 = TimeDistributed(Dense(features, activation='sigmoid'))(lstm)
# output2 = TimeDistributed(Dense(features, activation='sigmoid'))(lstm)
# model = Model(input=[input1], output=[output1, output2])
# model.compile(loss=[penalized_loss(noise=output2), penalized_loss(noise=output1)], optimizer='rmsprop')

In [65]:
# example input:
# network architecture?
# current learning rate magnitude
# age of layer
# location of layer
# loss can be auxiliary
# inputs start random and the LSTM will be reinforced from good changes

from keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed, concatenate
from keras.models import Model
from keras.layers import Conv1D
from keras.losses import mean_squared_error

# could either do one layer update at a time or the whole model at once
# one layer at a time might be more overhead, but might optimize more
# whole model might train easier too

#might need to make a custom layer where I construct the model and return the accuracy of it, and
# use that as the loss against perfect acc

# how could I split the lstm

# have an lstm that produces a schedule given a previous schedule

def sameness_loss(y_true, y_pred):
    return K.mean(K.pow(y_pred - y_true, -2), axis=-1)

def custom_loss(y_true, y_pred):
    if y_pred.shape[1].value == 2:
        return sameness_loss(y_true, y_pred)
    else:
        return mean_squared_error(y_true,y_pred)

def create_LSTM():
#     input is a random update
    input = Input(shape = (1, 4,))
    
    x = LSTM(256, dropout = 0.5, recurrent_dropout = 0.5, return_sequences = True, activation = "relu")(input)
    x = LSTM(256, dropout = 0.5, recurrent_dropout = 0.5, return_sequences = True, activation = "relu")(x)
    x = LSTM(256, dropout = 0.5, recurrent_dropout = 0.5, activation = "relu")(x)
#     output is how likely this update is good or bad, or could do each individual update?
    good_or_bad = Dense(1, activation = "sigmoid")(x)
    
#     need two losses: one that penalizes the mse between predicted_acc and actual_acc
#     predictions = Dense(int(len(trainable_indices)*2))(x)

# idea: have the model predict a percentage of the model, and the program selects the closest trainable
# index

# need to tie together predicting accuracy and proposing a new schedule
    
    model = Model(inputs=input, 
                  outputs=good_or_bad)
    
    model.compile(optimizer=Adam(lr = 1e-3), loss="binary_crossentropy", metrics=["mae"])
    
    return model
    
#     idea: have an aux loss which looks at predicted accuracy for the predicted vars and the loss is how
# far that is from a desired acc (.9)

# try:
#     lstm = load_model("best_lstm.h5")
# except:
lstm = create_LSTM()

In [63]:
lstm.compile(optimizer=Adam(lr = 8e-4), loss="binary_crossentropy", metrics=["mae"]) 

In [72]:
net = create_conv_net()
inputs = []
targets = []
num_good = 0
num_bad = 0
inputs_targets = []

prev_acc = 0.1

In [74]:
while True:
    input, schedule = make_schedule()
    good_or_bad = check_good_or_bad(schedule)
    
    if good_or_bad == 0 and num_bad < 50:
        inputs_targets.append((input, good_or_bad))
    elif good_or_bad == 1 and num_good < 50:
        inputs_targets.append((input, good_or_bad))
    else:
        break
        

    


Acc: 0.1183206106870229, Prev Acc: 0.1
Saved New Best
Acc: 0.1275, Prev Acc: 0.1183206106870229
Saved New Best
Acc: 0.1313613231552163, Prev Acc: 0.1275
Saved New Best
Acc: 0.16221374045801526, Prev Acc: 0.1313613231552163
Saved New Best
Acc: 0.1678125, Prev Acc: 0.16221374045801526
Saved New Best
Acc: 0.1696875, Prev Acc: 0.1678125
Saved New Best
Acc: 0.17239185750636132, Prev Acc: 0.1696875
Saved New Best
Acc: 0.178117048346056, Prev Acc: 0.17239185750636132
Saved New Best
Acc: 0.1684375, Prev Acc: 0.178117048346056
Reverted Model
Acc: 0.1753125, Prev Acc: 0.178117048346056
Reverted Model
Acc: 0.16762086513994912, Prev Acc: 0.178117048346056
Reverted Model
Acc: 0.1806615776081425, Prev Acc: 0.178117048346056
Saved New Best
Acc: 0.1903125, Prev Acc: 0.1806615776081425
Saved New Best
Acc: 0.1990625, Prev Acc: 0.1903125
Saved New Best
Acc: 0.191793893129771, Prev Acc: 0.1990625
Reverted Model
Acc: 0.19083969465648856, Prev Acc: 0.1990625
Reverted Model
Acc: 0.195625, Prev Acc: 0.1990625

KeyboardInterrupt: 

In [80]:
from random import shuffle
import pickle

shuffle(inputs_targets)

inputs = []
targets = []

for it in inputs_targets:
    inputs.append(it[0])
    targets.append(it[1])
    
# pickle.dump(inputs, open("inputs.p", "wb"))
# pickle.dump(targets, open("targets.p", "wb"))

In [64]:
num_cycles = 10000

for i in range(num_cycles):
    input, schedule = make_schedule()
    good_or_bad_pred = lstm.predict(input)[0][0]
    
    if decision(good_or_bad_pred + .01):
        good_or_bad = check_good_or_bad(schedule)
        print("Predicted: {}, Actual: {}".format(good_or_bad_pred, good_or_bad))
        lstm.fit(input, good_or_bad, verbose = 2)

Acc: 0.3212468193384224, Prev Acc: 0.33125
Reverted Model
Predicted: 0.0, Actual: [[0]]
Epoch 1/1
8s - loss: 0.0000e+00 - mean_absolute_error: 1.5048e-22
Acc: 0.32665394402035625, Prev Acc: 0.33125
Reverted Model
Predicted: 0.0, Actual: [[0]]
Epoch 1/1
0s - loss: 1.4525e-38 - mean_absolute_error: 1.2052e-19
Acc: 0.3290625, Prev Acc: 0.33125
Reverted Model
Predicted: 5.0873850909738394e-08, Actual: [[0]]
Epoch 1/1
0s - loss: 0.0000e+00 - mean_absolute_error: 3.0622e-29
Acc: 0.32665394402035625, Prev Acc: 0.33125
Reverted Model
Predicted: 2.092201828451934e-10, Actual: [[0]]
Epoch 1/1
0s - loss: 3.1328e-13 - mean_absolute_error: 5.5972e-07
Acc: 0.3244274809160305, Prev Acc: 0.33125
Reverted Model
Predicted: 2.850833284717691e-11, Actual: [[0]]
Epoch 1/1
0s - loss: 0.0000e+00 - mean_absolute_error: 1.0027e-24
Acc: 0.3336513994910941, Prev Acc: 0.33125
Saved New Best
Predicted: 1.713376219113498e-18, Actual: [[1]]
Epoch 1/1
0s - loss: 1.0000 - mean_absolute_error: 1.0000
Acc: 0.314375, Pre

KeyboardInterrupt: 

In [54]:
# lstm.save("best_lstm.h5")

In [None]:
# maybe external output which maps to the accuracy of the last model
# or have the lstm predict the accuracy of the last one and the between that

# i want an lstm gan which is fed it's previous predicted pieces(starts being random) and predicts the accuracy
# of it's inputs



# def randomize_lr(lr):
#     if decision(.7):
#         return lr
#     else: 
#         return get_scaled_lr(np.random.uniform(5e-5, 5e-1))

# def randomize_location(loc):
#     if decision(.7):
#         return loc
#     else:
#         return np.random.uniform(-1, 1)


        
# for i in range(len(trainable_indices)):
#     print("Cycle %d" % (i))

    
#     good_or_bad_pred = prediction[0]
# #     next_schedule[0][0] = np.random.uniform(-1, 1)
# #     next_schedule[0][1] = np.random.uniform(-1, 1)

#     schedules, acc = create_schedules(trainable, lr)

# #     schedule = {}
# #     schedule["location"] = location
# #     schedule["idx"] = get_index(schedule["location"])
# #     schedule["trainable"] = trainable
#     #     schedule["num_indices"] = len(trainable_indices)
#     #     schedule["times_trained"] = times_trained[schedule["idx"]]
# #     schedule["magnitude"] = get_magnitude(schedule["idx"])
#     #     + (np.random.uniform(-1, 1) / 1e3)
# #     schedule["lr"] = lr
#     #     + (np.random.uniform(-1, 1) * 1e-4

#     print(schedules, predicted_acc)
    
#     lstm_inputs = schedules

#     lstm_targets = []
#     lstm_targets.append(np.zeros((1, 5)))
#     lstm_targets.append(np.zeros((1, 5)))
#     lstm_targets.append(predicted_acc)
#     lstm_targets.append(np.array([[.9]]))
    
#     lstm.fit(lstm_inputs, lstm_targets)
    
# # diff between prediction accuracy and actual



# # lstm_inputs = np.asarray([np.expand_dims(lstm.predict(lstm_inputs, lstm_target), axis=0)])

# res = lstm.predict(input, target["vars"], target["acc"])

In [40]:
# issue: the output for the models is just continually going up. this is probably because it predicts that the
# accuracy will slowly rise. the output needs a direct loss or it will not be able to train properly
# the issue is that without a loss which will propagate to that part for that purpose, it will not learn as
# desired. The issue also is that we cannot provide good examples, we want the network to learn it.
# Maybe changing the accuracy to an aux loss and having the main loss be . . . . . . .

# need a loss that will penalize the predicted numbers for not increasing the accuracy
# the issue is that predicting accuracy just makes the model learn to predict the pattern of how the accuracy
# generally increases, rather than any deeper meaning. 

# want to produce a location and a learning rate, see how much they improved the accuracy, and penalize the 
# difference between that and an ideal rate on improvement (5%)

# right now it is just predicting that the accuracy will slowly increase
# what we want is to have it be heavily penalized for not improving faster, and be encouraged to try new things
# maybe we don't care what the predicted accuracy is? but then how will we connect the layer to the loss

# the loss right now is just not good enough to work
# An issue could be that I'm using single sequences rather than the whole thing

In [None]:
predictions = []

for input in inputs:
#     want lstm's loss to be based on changing variables to improve accuracy
# so need to input negative examples and good examples, or have the lstm randomly 
# choose
    predictions.append(lstm.evaluate(input, target))

for i, layer in enumerate(layers):
#     0 is iters, 1 is lr, 2 is trainable
    layers[i].age += predictions[i][0]
    layers[i]["iters"] = predictions[i][0]
    layers[i]["lr"] = predictions[i][1]
    layers[i]["trainable"] = predictions[i][2]
    conv_model.layers[layer["name"]].trainable = predictions[i][2]
    


In [None]:
from keras.models import Sequential, Model, clone_model
from keras import layers
from keras.layers import Dense, MaxPooling2D, Input, Conv2D, Flatten, AveragePooling2D
import numpy as np
from tqdm import tqdm
from IPython.core.debugger import set_trace
from keras.optimizers import Adam
from copy import deepcopy

ds = Dataset(percentage = 1,
             augment_data = False)
train_generator, validation_generator, test_generator = ds.create_generators(batch_size = 64);

model = Sequential()
model.add(Conv2D(32, 3, 
                 activation = 'relu', 
                 input_shape = (32, 32, 3)))
model.add(Conv2D(32, 3, activation = 'relu'))
model.add(Conv2D(32, 3, activation = 'relu'))
model.add(Conv2D(32, 3, activation = 'relu'))
model.add(Conv2D(32, 3, activation = 'relu'))
model.add(AveragePooling2D(3))
model.add(Flatten())
model.add(Dense(10, activation = 'softmax'))

def train():
    history = model.fit_generator(
    train_generator,
    steps_per_epoch = ds.train_steps // 4,
    epochs = 20,
    validation_data = validation_generator,
    validation_steps = ds.val_steps // 4,
    verbose = 1)   

try:
    def train_input():
        for i, layer in enumerate(model.layers):
        if i == 0:
            layer.trainable = True
        else:
            layer.trainable = False
            
        model.compile(optimizer = Adam(1e-4),
         loss = "categorical_crossentropy",
         metrics = ["acc"])
        train()
except KeyboardInterrupt as e:
    pass
  
try:
    def train_output():
        for i, layer in enumerate(model.layers):
        if i == (len(model.layers)-1):
            layer.trainable = True
        else:
            layer.trainable = False
        model.compile(optimizer = Adam(1e-4),
             loss = "categorical_crossentropy",
             metrics = ["acc"])
        train()
except KeyboardInterrupt as e:
    pass

try:
    def train_conv2d3x3():
        for i, layer in enumerate(model.layers):
        if "conv2d" in name:
            layer.trainable = True
        else:
            layer.trainable = False
        model.compile(optimizer = Adam(1e-4),
             loss = "categorical_crossentropy",
             metrics = ["acc"])
        train()
except KeyboardInterrupt as e:
    pass

In [26]:
for i, layer in enumerate(model.layers):
    if layer.trainable and i != 0 and i != (len(model.layers)-1):
        name = "".join(layer.name.split("_")[:-1])
        if "conv2d" in name:
            layer.trainable = False
            print("layer made untrainable", layer)
    else:
        layer.trainable = True
        print("layer made trainable", layer)   

model.compile(optimizer = Adam(5e-3),
     loss = "categorical_crossentropy",
     metrics = ["acc"])

history = model.fit_generator(
    train_generator,
    steps_per_epoch = ds.train_steps // 4,
    epochs = 20,
    validation_data = validation_generator,
    validation_steps = ds.val_steps // 4,
    verbose = 1)

layer made trainable <keras.layers.convolutional.Conv2D object at 0x7f803ac2a400>
layer made trainable <keras.layers.convolutional.Conv2D object at 0x7f803a869c18>
layer made trainable <keras.layers.convolutional.Conv2D object at 0x7f803a869d30>
layer made trainable <keras.layers.convolutional.Conv2D object at 0x7f803a869dd8>
layer made trainable <keras.layers.core.Dense object at 0x7f803a7bcd68>
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20

KeyboardInterrupt: 

In [29]:
from os.path import exists, join

weights = []
output_weights = None

for i, layer in enumerate(model.layers):
    name = "".join(layer.name.split("_")[:-1])
    if not exists(join("parameters", name)):
        mkdir(join("parameters", name))
    input_shape_str = "_".join(map(str, layer.input_shape[1:]))
    if "conv" in name.lower():
        if i == 0:
            input_weights = layer.get_weights()
        else:
            weights.append(layer.get_weights())
    elif "dense" in name.lower():
        output_weights = layer.get_weights()
#         weights_filename = save_parameters(layer, name, input_shape_str, "weights", 0)
#         biases_filename = save_parameters(layer, name, input_shape_str, "biases", 0)
print(len(weights))

def save_weights(input = False, output = False):
    if input:
        np.save("input_weights_conv2d3x3.npy", input_weights)
        
    current_weight = deepcopy(weights[0][0])
    current_bias = deepcopy(weights[0][1])
    for i in range(len(weights[1:]) - 1):
        if i == 0:
            weight_difference = current_weight - weights[i+1][0]
            bias_difference = current_bias - weights[i+1][1]
        else:
            weight_difference = (weight_difference + current_weight - weights[i+1][0]) / 2
            bias_difference = (bias_difference + current_bias - weights[i+1][1]) / 2
        current_weight = deepcopy(weights[i+1][0])
        current_bias = deepcopy(weights[i+1][1])
    np.save("starting_params_conv2d3x3.npy", weights[0])
    np.save("weight_difference_conv2d3x3.npy", weight_difference)
    np.save("bias_difference_conv2d3x3.npy", bias_difference)
    if output:
        np.save("output_weights|2048_10.npy", output_weights)
        
save_weights(input = True, output = True)

3


In [None]:
def save_parameters(layer, name, input_shape_str, param_type, motif_id):
    if param_type is "weights":
        params = layer.get_weights()[0]
    else:
        params = layer.get_weights()[1]
    params_filename = join("parameters", str(motif_id) + "~" + name, param_type + "_" + input_shape_str + "-0.npy")
    path = Path(params_filename)
    
    i = 1
    while True:
        if path.is_file():
            params_filename = params_filename.split("-")[0] + "-%d" % (i) + ".npy"
            path = Path(params_filename)
        else:
            np.save(params_filename, params)
            break
        i += 1
    return params_filename

In [22]:
from pathlib import Path

def save_parameters(layer, name, input_shape_str, param_type, motif_id):
    if param_type is "weights":
        params = layer.get_weights()[0]
    else:
        params = layer.get_weights()[1]
    params_filename = join("parameters", str(motif_id) + "~" + name, param_type + "_" + input_shape_str + "-0.npy")
    path = Path(params_filename)
    
    i = 1
    while True:
        if path.is_file():
            params_filename = params_filename.split("-")[0] + "-%d" % (i) + ".npy"
            path = Path(params_filename)
        else:
            np.save(params_filename, params)
            break
        i += 1
    return params_filename

from os.path import join, exists
from os import mkdir

parameter_inserts = []
for layer in model.layers:
    if layer.trainable:
        name = "".join(layer.name.split("_")[:-1])
        if not exists(join("parameters", name)):
            mkdir(join("parameters", name))
        input_shape_str = "_".join(map(str, layer.input_shape[1:]))

        weights_filename = save_parameters(layer, name, input_shape_str, "weights", 0)
        biases_filename = save_parameters(layer, name, input_shape_str, "biases", 0)

12 Tensor("conv2d_21/add:0", shape=(3, 3, 3, 32), dtype=float32)
13 Tensor("conv2d_22/add:0", shape=(3, 3, 3, 32), dtype=float32)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


FileNotFoundError: [Errno 2] No such file or directory: 'parameters/14~conv2d/weights_32_32_3-0.npy'