In [None]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.losses import Loss
from tensorflow.keras.layers import Conv2D, Flatten, Dense 
from tensorflow.keras.models import Sequential
from tensorflow.keras.activations import relu, tanh
from tensorflow.keras.initializers import Constant, RandomNormal
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.optimizers import SGD
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss
from multiprocessing import Process, Value
from datetime import datetime
import numpy as np
import math
import os
from PIL import Image
from google.colab import drive
import pickle

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
def normalized_relu(x):
  return math.sqrt(2) * relu(x)

def normalized_tanh(x):
  return tanh(x) / 0.6279

def build_model(input_size, filter_size, r = 32):
  # define model:
  model = Sequential()
  # add feature extraction layer:
  model.add(Conv2D(r, filter_size, activation = normalized_relu, kernel_initializer = RandomNormal(0, math.sqrt(input_size[-1] / np.prod(input_size))), bias_initializer = Constant(0), bias_constraint = MaxNorm(0), input_shape = input_size))
  model.add(Flatten())
  # add hidden fully connected layer:
  model.add(Dense(r, activation = normalized_relu, kernel_initializer = RandomNormal(0, math.sqrt(1 / model.layers[-1].output_shape[-1])), bias_initializer = Constant(0), bias_constraint = MaxNorm(0)))
  # add output layer:
  model.add(Dense(10))
  return model

In [None]:
# data:
!cp /content/drive/MyDrive/cifar10/final.zip /content
!unzip final.zip

[1;30;43mDie letzten 5000 Zeilen der Streamingausgabe wurden abgeschnitten.[0m
  inflating: final/train/3/644.jpg   
  inflating: final/train/3/645.jpg   
  inflating: final/train/3/646.jpg   
  inflating: final/train/3/647.jpg   
  inflating: final/train/3/648.jpg   
  inflating: final/train/3/649.jpg   
  inflating: final/train/3/65.jpg    
  inflating: final/train/3/650.jpg   
  inflating: final/train/3/651.jpg   
  inflating: final/train/3/652.jpg   
  inflating: final/train/3/653.jpg   
  inflating: final/train/3/654.jpg   
  inflating: final/train/3/655.jpg   
  inflating: final/train/3/656.jpg   
  inflating: final/train/3/657.jpg   
  inflating: final/train/3/658.jpg   
  inflating: final/train/3/659.jpg   
  inflating: final/train/3/66.jpg    
  inflating: final/train/3/660.jpg   
  inflating: final/train/3/661.jpg   
  inflating: final/train/3/662.jpg   
  inflating: final/train/3/663.jpg   
  inflating: final/train/3/664.jpg   
  inflating: final/train/3/665.jpg   
  infla

In [None]:
# input transformation:
img_size = (32, 32)
def load_data(root_dir):
  images, labels = [], []
  current_label = 0
  for subdir, dirs, files in os.walk(root_dir):
    if subdir == root_dir:
      continue
    label = int(os.path.basename(os.path.normpath(subdir)))
    for file in files:
      img = Image.open(os.path.join(subdir, file))
      img = img.convert("L")
      img = img.resize(img_size)
      img = np.asarray(img)
      img = img[..., None]
      images.append(img)
      labels.append(label)
  images = np.array(images)
  labels = np.array(labels)
  return images, labels

def sphere_transformation(data):
    trans_data = np.empty(data.shape[0:-1] + (data.shape[-1] + 1,))
    trans_data[..., 0] = np.cos(data[..., 0])
    for i in range(1, data.shape[-1]):
        trans_data[..., i] = np.prod(np.sin(data[..., :i]), axis = -1) * np.cos(data[..., i])
    trans_data[..., -1] = np.prod(np.sin(data), axis = -1)
    return trans_data

train_images, train_labels = load_data("final/train")
test_images, test_labels = load_data("final/test")

train_images, test_images = (2 * train_images / 255.0 - 1) * math.pi, (2 * test_images / 255.0 - 1) * math.pi

train_images = sphere_transformation(train_images)
test_images = sphere_transformation(test_images)

In [None]:
# setups:
input_size = train_images[0].shape
filter_sizes = ((2, 2), (3, 3), (5, 5), (9, 9), (16, 16))
n_folds = 5
epochs = 500

# hyperparams:
batch_sizes = (16, 32, 64)
decay_epochs = (10, 25, 50, 100, 150, 200, 300, 500)
Cs = (1E-3, 1E-2, 1E-1, 1, 1E+1, 1E+2, 1E+3)

In [None]:
# read current results:
if os.path.exists("/content/drive/MyDrive/CIFAR10relu.pickle"):
  with open("/content/drive/MyDrive/CIFAR10relu.pickle", "rb") as file:
    results = pickle.load(file)
else:
  results = {}
  folds = StratifiedKFold(n_splits = n_folds, shuffle = True)
  splits = [(train, val) for (train, val) in folds.split(train_images, train_labels)]
  results["HYP_LAST_LOSSES"] = []
  results["HYP_LAST_SPLITS"] = splits
  results["HYP_LAST_TIME"] = 0
  results["HYP_FULL_LOSSES"] = []
  results["HYP_FULL_SPLITS"] = splits
  results["HYP_FULL_TIME"] = 0
  results["LAST_LOSSES"] = []
  results["LAST_TIME"] = 0
  results["FULL_LOSSES"] = []
  results["FULL_TIME"] = 0

In [None]:
# learning rate scheduler:
max_lr = 1E-2
min_lr = 1E-5
def lr_decay(epoch, lr, epochs, max_lr, min_lr):
  if epoch == 0:
    return lr
  decay = (min_lr / max_lr) ** (1 / (epochs - 1))
  if lr * decay > min_lr:
    lr *= decay
  return lr

# full model evaluation:
def full_model_eval(input_size, filter_size, train_images, train_labels, test_images, test_labels, de, bs, test_loss, test_accuracy): 
  # split training set:
  X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, stratify = train_labels, test_size = 0.25)
  # train model:
  full_model = build_model(input_size, filter_size)
  full_model.compile(optimizer = SGD(learning_rate = max_lr), loss = SparseCategoricalCrossentropy(from_logits = True), metrics = ["accuracy"])
  full_model.fit(X_train, y_train, epochs = epochs, batch_size = bs, validation_data = (X_val, y_val), callbacks = [EarlyStopping(monitor = "val_loss", min_delta = 0.001, patience = 10, restore_best_weights = True), LearningRateScheduler(lambda epoch, lr: lr_decay(epoch, lr, de, max_lr, min_lr))], verbose = 0)
  # evaluate on test set:
  test_loss.value, test_accuracy.value = full_model.evaluate(test_images, test_labels, verbose = 0)

# last layer model evaluation:
def last_layer_eval(input_size, filter_size, train_images, train_labels, test_images, test_labels, C, test_loss, test_accuracy): 
  # train model:
  last_layer_model = build_model(input_size, filter_size)
  last_layer_model = Model(inputs = last_layer_model.input, outputs = last_layer_model.layers[-2].output)
  X_train = last_layer_model.predict(train_images, verbose = 0)
  y_train = np.squeeze(train_labels)
  pipe = make_pipeline(StandardScaler(), LogisticRegression(C = C, fit_intercept = False, max_iter = 10000, verbose = 0))
  pipe.fit(X_train, y_train)
  # evaluate on test set:
  X_test = last_layer_model.predict(test_images, verbose = 0)
  y_test = np.squeeze(test_labels)
  probs_test = pipe.predict_proba(X_test)
  test_loss.value = log_loss(y_test, probs_test)
  test_accuracy.value = pipe.score(X_test, y_test)

In [None]:
for i, (train, val) in enumerate(results["HYP_LAST_SPLITS"]):

  if len(results["HYP_LAST_LOSSES"]) <= i:
    results["HYP_LAST_LOSSES"].append([])

  for j, filter_size in enumerate(filter_sizes):

    if len(results["HYP_LAST_LOSSES"][i]) <= j:
      results["HYP_LAST_LOSSES"][i].append([])

    # train last layer models:
    for k, C in enumerate(Cs):

      if len(results["HYP_LAST_LOSSES"][i][j]) > k:
        continue

      before = datetime.now()
      loss = Value("d", math.inf)
      accuracy = Value("d", 0.)
      p = Process(target = last_layer_eval, args = (input_size, filter_size, train_images[train], train_labels[train], train_images[val], train_labels[val], C, loss, accuracy))
      p.start()
      p.join()
      results["HYP_LAST_LOSSES"][i][j].append(loss.value)
      after = datetime.now()

      results["HYP_LAST_TIME"] += (after - before).total_seconds() / 60
      with open("/content/drive/MyDrive/CIFAR10relu.pickle", "wb") as file:
        pickle.dump(results, file, protocol = pickle.HIGHEST_PROTOCOL)

In [None]:
C_best = np.array(Cs)[np.argmin(np.mean(results["HYP_LAST_LOSSES"], axis = 0), axis = -1)]
print(C_best)
print(results["HYP_LAST_TIME"])

[0.001 0.001 0.001 0.001 0.001]
12.30390413333333


In [None]:
# optimize full model hyperparams:
for i, (train, val) in enumerate(results["HYP_FULL_SPLITS"]):

  if len(results["HYP_FULL_LOSSES"]) <= i:
    results["HYP_FULL_LOSSES"].append([])

  for j, filter_size in enumerate(filter_sizes):

    if len(results["HYP_FULL_LOSSES"][i]) <= j:
      results["HYP_FULL_LOSSES"][i].append([])
    
    # train full models:
    for k, bs in enumerate(batch_sizes):

      if len(results["HYP_FULL_LOSSES"][i][j]) <= k:
        results["HYP_FULL_LOSSES"][i][j].append([])
      
      for l, de in enumerate(decay_epochs):

        if len(results["HYP_FULL_LOSSES"][i][j][k]) > l:
          continue

        before = datetime.now()
        loss = Value("d", math.inf)
        accuracy = Value("d", 0.)
        p = Process(target = full_model_eval, args = (input_size, filter_size, train_images[train], train_labels[train], train_images[val], train_labels[val], de, bs, loss, accuracy))
        p.start()
        p.join()
        results["HYP_FULL_LOSSES"][i][j][k].append(loss.value)
        after = datetime.now()

        results["HYP_FULL_TIME"] += (after - before).total_seconds() / 60
        with open("/content/drive/MyDrive/CIFAR10relu.pickle", "wb") as file:
          pickle.dump(results, file, protocol = pickle.HIGHEST_PROTOCOL)

In [None]:
means = np.mean(results["HYP_FULL_LOSSES"], axis = 0)
indices = np.argmin(means.reshape((means.shape[0], -1)), axis = -1)
indices = np.unravel_index(indices, means.shape[1:])
batch_size_best = np.array(batch_sizes)[indices[0]]
decay_epochs_best = np.array(decay_epochs)[indices[1]]
print(batch_size_best)
print(decay_epochs_best)
print(results["HYP_FULL_TIME"])

[16 16 16 16 16]
[500  25  25  25  25]
323.00586001666727


In [None]:
# optimize hyperparameters:
repetitions = 100

for i, filter_size in enumerate(filter_sizes):

  if len(results["LAST_LOSSES"]) <= i:
    results["LAST_LOSSES"].append([])
  if len(results["FULL_LOSSES"]) <= i:
    results["FULL_LOSSES"].append([])

  for j, rep in enumerate(range(repetitions)):

    # shuffle train data:
    indices = np.arange(len(train_labels))
    np.random.shuffle(indices)
    train_images = train_images[indices]
    train_labels = train_labels[indices]

    if len(results["LAST_LOSSES"][i]) <= j:
      # train last layer models:
      before = datetime.now()
      loss = Value("d", math.inf)
      accuracy = Value("d", 0.)
      p = Process(target = last_layer_eval, args = (input_size, filter_size, train_images, train_labels, test_images, test_labels, C_best[i], loss, accuracy))
      p.start()
      p.join()
      results["LAST_LOSSES"][-1].append(loss.value)
      after = datetime.now()
      results["LAST_TIME"] += (after - before).total_seconds()

    if len(results["FULL_LOSSES"][i]) <= j:
      # train full models:
      before = datetime.now()
      loss = Value("d", math.inf)
      accuracy = Value("d", 0.)
      p = Process(target = full_model_eval, args = (input_size, filter_size, train_images, train_labels, test_images, test_labels, decay_epochs_best[i], batch_size_best[i], loss, accuracy))
      p.start()
      p.join()
      results["FULL_LOSSES"][-1].append(loss.value)
      after = datetime.now()
      results["FULL_TIME"] += (after - before).total_seconds()

print(np.mean(results["LAST_LOSSES"], axis = -1))
print(results["FULL_TIME"])
print(np.mean(results["FULL_LOSSES"], axis = -1))
print(results["FULL_TIME"])

with open("/content/drive/MyDrive/CIFAR10relu.pickle", "wb") as file:
  pickle.dump(results, file, protocol = pickle.HIGHEST_PROTOCOL)

In [None]:
import matplotlib.pyplot as plt

x = np.mean(results["LAST_LOSSES"], axis = -1)
y = np.mean(results["FULL_LOSSES"], axis = -1)
plt.scatter(x, y)
plt.savefig("scatterCIFAR10relu.png")

In [None]:
!cp /content/scatterCIFAR10relu.png /content/drive/MyDrive

In [None]:
from google.colab import runtime

runtime.unassign()