In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras import regularizers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras_tuner
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV
print(tf.__version__)
print(keras.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))




# Task 1

## 2.
1. Fashion MNIST 
- Loading data
- creating train, validation and test set 
- create class names


In [None]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
                   "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

#num_classes = 10
#y_train = keras.utils.to_categorical(y_train, num_classes)
#y_valid = keras.utils.to_categorical(y_valid, num_classes)
#y_test = keras.utils.to_categorical(y_test, num_classes)

- create model 

In [None]:
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[28, 28], opt = "SGD"): 
    model = keras.models.Sequential() 
    model.add(keras.layers.Flatten(input_shape=input_shape))
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    model.add(keras.layers.Dense(10, activation="softmax"))
    optimizer = eval("".join(["keras.optimizers.",opt, "(lr=learning_rate)"]))
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics = ['accuracy'])
    return model

In [None]:
keras_reg = keras.wrappers.scikit_learn.KerasClassifier(build_model)


In [None]:
keras_reg.fit(X_train, y_train, epochs=100,
            validation_data=(X_valid, y_valid),
            callbacks=[keras.callbacks.EarlyStopping(patience=5, min_delta=0.001)])
mse_test = keras_reg.score(X_test, y_test)
y_pred = keras_reg.predict(X_new)

In [None]:
param_distribs = {
        "n_hidden": [0, 1, 2, 3],
        "n_neurons": np.arange(1, 100),
        "learning_rate": reciprocal(3e-4, 3e-2),
        "opt": ["SGD","Adam"],
}

In [None]:
# this runs for 43 minutes
rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=10, cv=3)
rnd_search_cv.fit(X_train, y_train, epochs=300,
                batch_size=50,
                validation_data=(X_valid, y_valid),
                callbacks=[keras.callbacks.EarlyStopping(patience=3, min_delta=0.001)])

In [None]:
print(rnd_search_cv.best_params_)
print(rnd_search_cv.best_score_)

In [None]:
model = rnd_search_cv.best_estimator_.model
model.evaluate(X_test, y_test)
model.save("MLP_brute.h5")

Optimizing search with KerasTuner:
- l1&l2 regularization slowed down the search process by a lot

In [None]:
hp = keras_tuner.HyperParameters()
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Flatten())
    # Tune the number of layers.
    for i in range(hp.Int("num_layers", 1, 6)):
        model.add(
            layers.Dense(
                # Tune number of neurons separately.
                units=hp.Int(f"units_{i}", min_value=30, max_value=515, step=31),
                activation=hp.Choice("activation", ["relu", "tanh"]),
            )
        )
    if hp.Boolean("dropout"):
        model.add(layers.Dropout(rate=0.25))
    model.add(layers.Dense(10, activation="softmax"))
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

build_model(keras_tuner.HyperParameters())

choosing tuner: https://medium.com/swlh/hyperparameter-tuning-in-keras-tensorflow-2-with-keras-tuner-randomsearch-hyperband-3e212647778f

In [None]:
Tuner = keras_tuner.BayesianOptimization(
    hypermodel=build_model,
    objective="val_accuracy",
    max_trials=10,
    num_initial_points=2,
    seed=90,
    directory="Task1",
    project_name="MLP",
    #overwrite=True,
)


In [None]:
Tuner.search_space_summary()


In [None]:
Tuner.search(X_train, y_train, epochs=30, validation_data=(X_valid, y_valid))


In [None]:

# Get the top 3 models.
models_mlp = Tuner.get_best_models(num_models=3)
best_model_mlp = models_mlp[0]
# Build model
best_model_mlp.build(input_shape=(None, 28, 28))
best_model_mlp.summary()
Tuner.results_summary()


In [None]:
best_model_mlp.evaluate(X_test, y_test)
best_model_mlp.save("best_mlp.h5")

In [None]:
history = best_model_mlp.fit(X_train, y_train, epochs=30, 
                        validation_data=(X_valid, y_valid))

pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0,1) # set the vertical range to [0-1]
plt.gca().set_xlim(0,29)
plt.show()

## CNN

In [None]:
hp = keras_tuner.HyperParameters()
def build_model_cnn(hp):
    model = keras.Sequential()
    model.add(layers.Conv2D(
        #adding filter 
        filters=hp.Int('conv_1_filter', min_value=32, max_value=128, step=16),
        # adding kernel size
        kernel_size=hp.Choice('conv_1_kernel', values = [3,5]),
        #activation function
        activation='relu',
        input_shape=[28,28,1],
        padding='same',)
    )
    model.add(
    layers.MaxPooling2D(pool_size = 2
    ),
    )
    # Tune the number of layers.
    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(
            layers.Conv2D(
                #adding filter 
                filters=hp.Int(f'conv_{i}_filter', min_value=32, max_value=128, step=16),
                # adding kernel size
                kernel_size=hp.Choice(f'conv_{i}_kernel', values = [3,5]),
                #activation function
                activation='relu',
                padding='same'),
        )
        model.add(
            layers.MaxPooling2D(
                pool_size = hp.Choice(f'pool_{i}_size', values = [2,4,6])
            ),
            )
            
    model.add(layers.Flatten())        

    for i in range(hp.Int("num_layers", 1, 2)):
        model.add(
            layers.Dense(
                units=hp.Int(f"units_{i}", min_value=60, max_value=515, step=20),
                activation=hp.Choice("activation", ["relu", "tanh"]),),
        )
        
        if hp.Boolean("dropout"):
            model.add(layers.Dropout(rate=hp.Choice(f'rate_{i}', values = [0.25,0.5,0.75])))
    
    model.add(layers.Dense(10, activation="softmax"))

    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

build_model_cnn(keras_tuner.HyperParameters())

In [None]:
tuner = keras_tuner.BayesianOptimization(
    hypermodel=build_model_cnn,
    objective="val_accuracy",
    max_trials=20,
    num_initial_points=2,
    seed=90,
    directory="Task1",
    project_name="CNN",
    #overwrite=True,
    #executions_per_trial,
)

In [None]:
tuner.search_space_summary()

In [None]:
tuner.search(X_train, y_train, epochs=30, validation_data=(X_valid, y_valid), batch_size=100)


In [None]:
# Get the top 3 models.
models_cnn = tuner.get_best_models(num_models=3)
best_model_cnn = models_cnn[0]
# Build model
best_model_cnn.build(input_shape=(None, 28, 28))
best_model_cnn.summary()
#tuner.results_summary()

In [None]:
best_model_cnn.evaluate(X_test, y_test)
best_model_cnn.save("best_cnn.h5")

## CIFAR10

In [None]:
cifar10 = keras.datasets.cifar10
(x_train_full_cif, y_train_full_cif), (x_test_full_cif, y_test_full_cif) = cifar10.load_data()
X_valid, X_train = x_train_full_cif[:5000] / 255.0, x_train_full_cif[5000:] / 255.0
y_valid, y_train = y_train_full_cif[:5000], y_train_full_cif[5000:]


In [None]:
model_mlp_cifar = keras.models.load_model("best_mlp.h5")
model_cnn_cifar = keras.models.load_model("best_cnn.h5")

In [None]:
history = model_cnn_cifar.fit(X_train, y_train, epochs=30, validation_data=(X_valid, y_valid))
pd.DataFrame(history.history).plot(figsize=(8, 5)) 
plt.grid(True)
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1] plt.show()
model_cnn_cifar.evaluate(x_test_full_cif, y_test_full_cif)