In [None]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.layers import Flatten, Dense 
from tensorflow.keras.models import Sequential
from tensorflow.keras.activations import relu, tanh
from tensorflow.keras.initializers import Constant, RandomNormal
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.optimizers import SGD
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss
from multiprocessing import Process, Value
from datetime import datetime
import numpy as np
import math
import os
from PIL import Image
from google.colab import drive
import pickle
import pandas as pd

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
def normalized_relu(x):
  return math.sqrt(2) * relu(x)

def normalized_tanh(x):
  return tanh(x) / 0.6279

def build_model(input_size, r = 32):
  # define model:
  model = Sequential()
  # add hidden fully connected layer:
  model.add(Dense(r, activation = normalized_relu, kernel_initializer = RandomNormal(0, math.sqrt(input_size[-1] / np.prod(input_size))), bias_initializer = Constant(0), bias_constraint = MaxNorm(0), input_shape = input_size))
  # add output layer:
  model.add(Dense(3))
  return model

In [None]:
# data:
!cp /content/drive/MyDrive/iris/iris.csv /content

In [None]:
# input transformation:
def load_data(path):
  df = pd.read_csv(path)
  df[df["Species"] == "Iris-setosa"] = 0
  df[df["Species"] == "Iris-versicolor"] = 1
  df[df["Species"] == "Iris-virginica"] = 2
  data = df.to_numpy(dtype = np.float32)
  features = data[:, 1:-1][:, :, None]
  labels = data[:, -1].astype(int)
  return features, labels

def sphere_transformation(data):
    trans_data = np.empty(data.shape[0:-1] + (data.shape[-1] + 1,))
    trans_data[..., 0] = np.cos(data[..., 0])
    for i in range(1, data.shape[-1]):
        trans_data[..., i] = np.prod(np.sin(data[..., :i]), axis = -1) * np.cos(data[..., i])
    trans_data[..., -1] = np.prod(np.sin(data), axis = -1)
    return trans_data

features, labels = load_data("iris.csv")
features = (2 * features / np.max(features, axis = 0) - 1) * math.pi

train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.2)

train_features = sphere_transformation(train_features)
train_features = train_features.reshape((train_features.shape[0], -1))
test_features = sphere_transformation(test_features)
test_features = test_features.reshape((test_features.shape[0], -1))

In [None]:
# setups:
input_size = train_features[0].shape
rs = (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048)
n_folds = 5
epochs = 500

# hyperparams:
batch_sizes = (16, 32, 64)
decay_epochs = (25, 50, 75, 100)
Cs = (1E-3, 1E-2, 1E-1, 1, 1E+1, 1E+2, 1E+3)

In [None]:
# read current results:
if os.path.exists("/content/drive/MyDrive/Irisrelu.pickle"):
  with open("/content/drive/MyDrive/Irisrelu.pickle", "rb") as file:
    results = pickle.load(file)
else:
  results = {}
  folds = StratifiedKFold(n_splits = n_folds, shuffle = True)
  splits = [(train, val) for (train, val) in folds.split(train_features, train_labels)]
  results["HYP_LAST_LOSSES"] = []
  results["HYP_LAST_SPLITS"] = splits
  results["HYP_FULL_LOSSES"] = []
  results["HYP_FULL_SPLITS"] = splits
  results["LAST_LOSSES"] = []
  results["FULL_LOSSES"] = []

In [None]:
# learning rate scheduler:
max_lr = 1E-2
min_lr = 1E-5
def lr_decay(epoch, lr, epochs, max_lr, min_lr):
  if epoch == 0:
    return lr
  decay = (min_lr / max_lr) ** (1 / (epochs - 1))
  if lr * decay > min_lr:
    lr *= decay
  return lr

# full model evaluation:
def full_model_eval(r, input_size, train_features, train_labels, test_features, test_labels, de, bs, test_loss, test_accuracy): 
  # split training set:
  X_train, X_val, y_train, y_val = train_test_split(train_features, train_labels, test_size = 0.25)
  # train model:
  full_model = build_model(input_size, r)
  full_model.compile(optimizer = SGD(learning_rate = max_lr), loss = SparseCategoricalCrossentropy(from_logits = True), metrics = ["accuracy"])
  full_model.fit(X_train, y_train, epochs = epochs, batch_size = bs, validation_data = (X_val, y_val), callbacks = [EarlyStopping(monitor = "val_loss", min_delta = 0.001, patience = 10, restore_best_weights = True), LearningRateScheduler(lambda epoch, lr: lr_decay(epoch, lr, de, max_lr, min_lr))], verbose = 0)
  # evaluate on test set:
  test_loss.value, test_accuracy.value = full_model.evaluate(test_features, test_labels, verbose = 0)

# last layer model evaluation:
def last_layer_eval(r, input_size, train_features, train_labels, test_features, test_labels, C, test_loss, test_accuracy): 
  # train model:
  last_layer_model = build_model(input_size, r)
  last_layer_model = Model(inputs = last_layer_model.input, outputs = last_layer_model.layers[-2].output)
  X_train = last_layer_model.predict(train_features, verbose = 0)
  y_train = np.squeeze(train_labels)
  pipe = make_pipeline(StandardScaler(), LogisticRegression(C = C, fit_intercept = False, max_iter = 10000, verbose = 0))
  pipe.fit(X_train, y_train)
  # evaluate on test set:
  X_test = last_layer_model.predict(test_features, verbose = 0)
  y_test = np.squeeze(test_labels)
  probs_test = pipe.predict_proba(X_test)
  test_loss.value = log_loss(y_test, probs_test)
  test_accuracy.value = pipe.score(X_test, y_test)

In [None]:
# optimize last layer hyperparams:
for i, (train, val) in enumerate(results["HYP_LAST_SPLITS"]):

  if len(results["HYP_LAST_LOSSES"]) <= i:
    results["HYP_LAST_LOSSES"].append([])

  for j, r in enumerate(rs):

    if len(results["HYP_LAST_LOSSES"][i]) <= j:
      results["HYP_LAST_LOSSES"][i].append([])

    # train last layer models:
    for k, C in enumerate(Cs):

      if len(results["HYP_LAST_LOSSES"][i][j]) > k:
        continue

      before = datetime.now()
      loss = Value("d", math.inf)
      accuracy = Value("d", 0.)
      p = Process(target = last_layer_eval, args = (r, input_size, train_features[train], train_labels[train], train_features[val], train_labels[val], C, loss, accuracy))
      p.start()
      p.join()
      results["HYP_LAST_LOSSES"][i][j].append(loss.value)
      after = datetime.now()

      with open("/content/drive/MyDrive/Irisrelu.pickle", "wb") as file:
        pickle.dump(results, file, protocol = pickle.HIGHEST_PROTOCOL)

In [None]:
C_best = np.array(Cs)[np.argmin(np.mean(results["HYP_LAST_LOSSES"], axis = 0), axis = -1)]
print(C_best)

[1000. 1000. 1000. 1000. 1000. 1000. 1000. 1000.    1. 1000. 1000.]


In [None]:
# optimize full model hyperparams:
for i, (train, val) in enumerate(results["HYP_FULL_SPLITS"]):

  if len(results["HYP_FULL_LOSSES"]) <= i:
    results["HYP_FULL_LOSSES"].append([])

  for j, r in enumerate(rs):

    if len(results["HYP_FULL_LOSSES"][i]) <= j:
      results["HYP_FULL_LOSSES"][i].append([])

    for k, bs in enumerate(batch_sizes):

      if len(results["HYP_FULL_LOSSES"][i][j]) <= k:
        results["HYP_FULL_LOSSES"][i][j].append([])
      
      # train full models:
      for l, de in enumerate(decay_epochs):

        if len(results["HYP_FULL_LOSSES"][i][j][k]) > l:
          continue
        
        before = datetime.now()
        loss = Value("d", math.inf)
        accuracy = Value("d", 0.)
        p = Process(target = full_model_eval, args = (r, input_size, train_features[train], train_labels[train], train_features[val], train_labels[val], de, bs, loss, accuracy))
        p.start()
        p.join()
        results["HYP_FULL_LOSSES"][i][j][k].append(loss.value)
        after = datetime.now()

        with open("/content/drive/MyDrive/Irisrelu.pickle", "wb") as file:
          pickle.dump(results, file, protocol = pickle.HIGHEST_PROTOCOL)

In [None]:
means = np.mean(results["HYP_FULL_LOSSES"], axis = 0)
indices = np.argmin(means.reshape((means.shape[0], -1)), axis = -1)
indices = np.unravel_index(indices, means.shape[1:])
batch_size_best = np.array(batch_sizes)[indices[0]]
decay_epochs_best = np.array(decay_epochs)[indices[1]]
print(batch_size_best)
print(decay_epochs_best)

[32 32 16 16 16 16 64 16 16 32 16]
[100  50  50 100 100  75 100 100  50  75  25]


In [None]:
# optimize hyperparameters:
repetitions = 25

for i, rep in enumerate(range(repetitions)):

  if len(results["LAST_LOSSES"]) <= i:
    results["LAST_LOSSES"].append([])
  if len(results["FULL_LOSSES"]) <= i:
    results["FULL_LOSSES"].append([])

  for j, r in enumerate(rs):

    if len(results["LAST_LOSSES"][i]) <= j:
      loss = Value("d", math.inf)
      accuracy = Value("d", 0.)
      p = Process(target = last_layer_eval, args = (r, input_size, train_features, train_labels, test_features, test_labels, C_best[j], loss, accuracy))
      p.start()
      p.join()
      results["LAST_LOSSES"][i].append(loss.value)

    if len(results["FULL_LOSSES"][i]) <= j:
      loss = Value("d", math.inf)
      accuracy = Value("d", 0.)
      p = Process(target = full_model_eval, args = (r, input_size, train_features, train_labels, test_features, test_labels, decay_epochs_best[j], batch_size_best[j], loss, accuracy))
      p.start()
      p.join()
      results["FULL_LOSSES"][i].append(loss.value)

print(np.mean(results["LAST_LOSSES"], axis = 0))
print(np.mean(results["FULL_LOSSES"], axis = 0))

with open("/content/drive/MyDrive/Irisrelu.pickle", "wb") as file:
  pickle.dump(results, file, protocol = pickle.HIGHEST_PROTOCOL)

In [None]:
import matplotlib.pyplot as plt

x = rs
y1 = np.mean(results["LAST_LOSSES"], axis = 0)
y2 = np.mean(results["FULL_LOSSES"], axis = 0)
plt.plot(x, y1)
plt.plot(x, y2)
plt.savefig("scatterIrisrelu.png")

In [None]:
!cp /content/scatterIrisrelu.png /content/drive/MyDrive

In [None]:
from google.colab import runtime
runtime.unassign()