In [2]:
import dataset

ds = dataset.Dataset()

In [None]:
# idea: if loss is below a certain amount, two tensors can be considered the same and it will round to 
# the nearest one

In [3]:
def binary_crossentropy(y, p):
    return -1*(y*np.log(p) + (1-y)*np.log(1-p))

In [12]:
from keras import layers, models, optimizers

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
                        input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

In [13]:
from keras.optimizers import SGD, Nadam
from clr_callback import CyclicLR


def fit(model):
    optim = Nadam()
    base_lr = 0.001
    max_lr = 0.006
    clr = CyclicLR(base_lr=base_lr, max_lr=max_lr,
                   step_size=2000., mode='triangular')

    model.compile(optimizer=optim,
                  loss="categorical_crossentropy")

    model.fit_generator(ds.train_gen, ds.train_steps, validation_data=ds.val_gen, validation_steps=ds.val_steps,
                        epochs=10, callbacks=[clr], verbose=1)

In [14]:
fit(model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
 4/78 [>.............................] - ETA: 21s - loss: 1.4485

KeyboardInterrupt: 

In [18]:
for layer in model.layers:
    if len(layer.get_weights()) > 0:
        print(layer.get_weights()[0].shape)

(3, 3, 3, 32)
(3, 3, 32, 64)
(3, 3, 64, 128)
(512, 512)
(512, 10)


In [28]:
from keras.layers import Conv2D, Dense, Input, LSTM, TimeDistributed, Reshape, ConvLSTM2D
from keras.models import Model

def create_random_lstm():
    inp = Input(shape = (2, 32, 32, 3))
    x = ConvLSTM2D(128, 1, return_sequences=True)(inp)
    x = ConvLSTM2D(128, 1, return_sequences=True)(x)
    out = ConvLSTM2D(3, 1, return_sequences=False)(x)

    model = Model(inputs=inp, outputs=out)
    model.summary()

    return model

model = create_random_lstm()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 2, 32, 32, 3)      0         
_________________________________________________________________
conv_lst_m2d_3 (ConvLSTM2D)  (None, 2, 32, 32, 128)    67584     
_________________________________________________________________
conv_lst_m2d_4 (ConvLSTM2D)  (None, 2, 32, 32, 128)    131584    
_________________________________________________________________
conv_lst_m2d_5 (ConvLSTM2D)  (None, 32, 32, 3)         1584      
Total params: 200,752
Trainable params: 200,752
Non-trainable params: 0
_________________________________________________________________


In [27]:
import numpy as np
from tqdm import tqdm
from IPython.core.debugger import set_trace
from copy import deepcopy

def generate_training_data(num_samples=10000, input_shape=(32, 32, 3)):
    np.random.seed(0)
    orig = np.random.uniform(size=(32, 32, 3))
    np.random.seed(1)
    target = np.random.uniform(size = (32, 32, 3))
    np.random.seed()
    
    inputs = []
    targets = []
    
    for i in tqdm(range(num_samples)):
        noise = (np.random.uniform(size = (32, 32, 3))-.5)*.5
        inputs.append([deepcopy(orig), deepcopy(noise)])
        orig_loss = binary_crossentropy(target, orig)
        orig_noise = orig+noise
        orig_noise = np.where(orig_noise < 0, 0, orig_noise)
        orig_noise = np.where(orig_noise > 1, 1, orig_noise)
        orig_noise_loss = binary_crossentropy(target, orig_noise)
        optimal_mask = np.where(orig_loss > orig_noise_loss, 1, 0)
        orig += (noise * optimal_mask)
        orig = np.where(orig < 0, 0, orig)
        orig = np.where(orig > 1, 1, orig)
        targets.append(deepcopy(orig))
    return np.array(inputs), np.array(targets)

inputs, targets = generate_training_data()

  
100%|██████████| 10000/10000 [00:12<00:00, 775.80it/s]


In [31]:
fit(model)

Epoch 1/1
   64/10000 [..............................] - ETA: 3524s - loss: 5.2338

KeyboardInterrupt: 

In [None]:
# assuming this worked perfectly, I would use this output mask to ignore certain parameter updates and probably
# scale the learning rate based on the confidence level
# could I do this internally though? 
# maybe I could learn to create random uniform numbers and from that random uniform number I can
# add and divide it to whatever the current number is, with the goal of something