In [None]:
import  tensorflow as tf
from tensorflow import keras

In [None]:
# wrappe um nicht bei jede layer init stretegie, regulierer und activation func aufrufen zu müssen

from functools import partial

RegulizedDense = partial(keras.layers.Dense,
                         activation = "elu",
                         kernel_initializer="he_normal",
                         kernel_regularizer=keras.regularizers.l2(0.01))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dropout(rate=0.2), # regularisierung mit dropout
    RegulizedDense(300),
    keras.layers.Dropout(rate=0.2),
    RegulizedDense(100),
    keras.layers.Dropout(rate=0.2),
    RegulizedDense(10, activation="softmax", kernel_initializer="glorot_uniform")
])

In [None]:
keras.layers.Dense(10, activation="relu", kernel_initializer="he_normal")

In [None]:
model = keras.models.Sequential([
    keras.layers.Dense(10, kernel_initializers="he_normal"), 
    keras.layers.LeakyReLU(alpha=0.3) # fügt leaky relu activation func nach der dense layer
])

In [None]:
layer = keras.layers.Dense(10, activation="selu", kernel_initializer="lecun_normal")

In [None]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.BatchNormalization(),  # fügt immer die 4 parameter hinzu
    keras.layers.Dense(300, activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation="softmax")
])

print(model.summary())

In [None]:
[(var.name, var.trainable) for var in model.layers[1].variables] #nur gamma und beta wird gelernt, varianz und durschnitt wird aktualisiert

In [None]:
model.layers[1].updates

In [None]:
#bn schicht vor der aktivierungsfunktion

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, kernel_initializer="he_normal", use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation("elu"),
    keras.layers.Dense(100, kernel_initializer="he_normal", use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation("elu"),
    keras.layers.Dense(10, activation="softmax")
])

print(model.summary())

Transferlearning

In [None]:
model_A = keras.models.load_model("MyKerasModel.h5")
model_A_clone = keras.models.clone_model(model_A) # vermeidet das model a mit verändert wird
model_A_clone.set_weights(model_A.get_weights())
model_B_on_A = keras.models.Sequential(model_A_clone.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))

In [None]:
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False # einfrieren der oberen layer um große fehler gradienten zu vermeiden
    
#danach muss das modell immer wieder kompeliert werden
model_B_on_A.compile(loss="binary_crossentropy", optimizer="sgd", metrics=["accuracy"])

In [None]:
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4, validation_data=(X_valid_B, y_valid_B)) # für einen paar epochen trainieren

for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True # auftauen der oberen layer um große fehler gradienten zu vermeiden
    
optimizer = keras.optimizers.SGD(learning_rate=1e-4) # nach dem autauen die lr kleiner setzen standard 1e-2
model_B_on_A.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16, validation_data=(X_valid_B, y_valid_B))


Optimierer und LR wählen

In [None]:
optimizer = keras.optimizers.SGD(learning_rate=0.001, momentum=0.9) # momentum
optimizer = keras.optimizers.SGD(learning_rate=0.001, momentum=0.9, nesterov=True) # momentum nach nesterov
optimizer = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999) #adam optimierung

In [None]:
optimizer = keras.optimizers.SGD(learning_rate=0.001, decay=1e-4) # power scheduling lr

In [None]:
def exponential_decay(epoch):
    return 0.01 * 0.1**(epoch/20)

lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay)
history = model.fit(x_train, y_train, callbacks=[lr_scheduler])

Regularisierung

In [None]:
# l1 und l2 norm
layer = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal", kernel_regularizers=keras.regularizers.l2(0.01))

In [None]:
#1cylcle scheduling

class OneCycleScheduler(keras.callbacks.Callback):
    def __init__(self, iterations, max_rate, start_rate=None,
                 last_iterations=None, last_rate=None):
        self.iterations = iterations
        self.max_rate = max_rate
        self.start_rate = start_rate or max_rate / 10
        self.last_iterations = last_iterations or iterations // 10 + 1
        self.half_iteration = (iterations - self.last_iterations) // 2
        self.last_rate = last_rate or self.start_rate / 1000
        self.iteration = 0
    def _interpolate(self, iter1, iter2, rate1, rate2):
        return ((rate2 - rate1) * (self.iteration - iter1)
                / (iter2 - iter1) + rate1)
    def on_batch_begin(self, batch, logs):
        if self.iteration < self.half_iteration:
            rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)
        elif self.iteration < 2 * self.half_iteration:
            rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,
                                     self.max_rate, self.start_rate)
        else:
            rate = self._interpolate(2 * self.half_iteration, self.iterations,
                                     self.start_rate, self.last_rate)
        self.iteration += 1
        K.set_value(self.model.optimizer.learning_rate, rate)

n_epochs = 25
onecycle = OneCycleScheduler(math.ceil(len(X_train) / batch_size) * n_epochs, max_rate=0.05)
history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,
                    validation_data=(X_valid_scaled, y_valid),
                    callbacks=[onecycle])