In this project, we plan to study generalization in (different?)
learning algorithms. In particular, we will examine the connection
between generalization performance and (various?) complexity metrics,
such as MixUp.

Reference:
https://parthnatekar.github.io/generalization.html?fbclid=IwAR1FFx31BjHcquOa0hng9aKT6mJREAF1w1jUggo7D1yKcJ5kPRm3B9wznSI

Link to mixup: https://arxiv.org/abs/1710.09412

Suggestion: \* Scalable in the size of the hyperparameter search \* Can
use Tensorflow? \* Can we even use kaggle tuner for the hyperparameter
search?

Possible data sets: \* Covid19
https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset
\* Image data of some kind, e.g. faces. Linear combination of images at
least has a clear interpretation. \* Breast Cancer Wisconsin
(Diagnostic) Data Set - predicting benign/malignant based on digitized
images

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from tensorflow.keras import regularizers
from pdb import set_trace
from sparkdl import HorovodRunner
#from kerastuner.tuners import RandomSearch
#from kerastuner.engine.hyperparameters import HyperParameters
#from kerastuner import Hyperband
import time

In [None]:
from tensorflow.python.client import device_lib
local_device_protos = device_lib.list_local_devices()
print(local_device_protos)

  

>     [name: "/device:CPU:0"
>     device_type: "CPU"
>     memory_limit: 268435456
>     locality {
>     }
>     incarnation: 12704549050924573955
>     , name: "/device:XLA_CPU:0"
>     device_type: "XLA_CPU"
>     memory_limit: 17179869184
>     locality {
>     }
>     incarnation: 11015930834247940706
>     physical_device_desc: "device: XLA_CPU device"
>     , name: "/device:XLA_GPU:0"
>     device_type: "XLA_GPU"
>     memory_limit: 17179869184
>     locality {
>     }
>     incarnation: 9436066314374116018
>     physical_device_desc: "device: XLA_GPU device"
>     , name: "/device:GPU:0"
>     device_type: "GPU"
>     memory_limit: 4898684928
>     locality {
>       bus_id: 1
>       links {
>       }
>     }
>     incarnation: 15433283977610770069
>     physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:1e.0, compute capability: 7.5"
>     ]

In [None]:
class mix_Sequential(tf.keras.Sequential):

    def __init__(self,*params):
        super(mix_Sequential,self).__init__(*params)
        #self.__linear_func__ = lambda a,b,c :  c

    def train_step(self,data):
        X,Y = data
        ind = np.random.randint(len(self.layers))
        lam = tf.random.uniform((tf.shape(X)[0],), minval=0, maxval=1, dtype=tf.dtypes.float32)
        Xs,Ys = tf.random.shuffle(X, seed=1),tf.random.shuffle(Y, seed=1)
        
        with tf.GradientTape() as tape:
            for i in range(ind):
                X = self.layers[i](X)
                Xs = self.layers[i](Xs)
        
            X = self.linear_combine(lam,X,Xs)
            Y = self.linear_combine(lam,Y,Ys)
            for i in range(ind,len(self.layers)):
                X = self.layers[i](X)


            loss = self.compiled_loss(Y, X, regularization_losses=self.losses)

        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        self.compiled_metrics.update_state(Y, X)

        return {m.name: m.result() for m in self.metrics}

    def linear_combine(self,lam,X,Xs):
        l = tf.reshape(lam, (-1, *([1]*(len(X.shape)-1)) ))
        Xl = l*X + (1-l)*Xs
        return Xl
        

class mixup_Sequential(tf.keras.Sequential):

    def __init__(self,*params):
        super(mixup_Sequential,self).__init__(*params)
        
    def train_step(self,data):
        X,Y = data
        lam = tf.random.uniform((tf.shape(X)[0],), minval=0, maxval=1, dtype=tf.dtypes.float32)
        Xs,Ys = tf.random.shuffle(X, seed=1),tf.random.shuffle(Y, seed=1)

        X = self.linear_combine(lam,X,Xs)
        Y = self.linear_combine(lam,Y,Ys)
        with tf.GradientTape() as tape:
            for i in range(len(self.layers)):
                X = self.layers[i](X)
                

            loss = self.compiled_loss(Y, X, regularization_losses=self.losses)

        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        self.compiled_metrics.update_state(Y, X)

        return {m.name: m.result() for m in self.metrics}

    def linear_combine(self,lam,X,Xs):
        l = tf.reshape(lam, (-1, *([1]*(len(X.shape)-1)) ))
        Xl = l*X + (1-l)*Xs
        return Xl


In [None]:
trainData,testData = tf.keras.datasets.cifar10.load_data()
trainX,trainY = trainData
testX,testY = testData
trainX = tf.cast(trainX,tf.float32)
testX = tf.cast(testX,tf.float32)
trainY_oh = tf.one_hot(trainY,10)[:,0,:]
trainY = tf.cast(trainY,tf.float32)
testY_oh = tf.one_hot(testY,10)[:,0,:]
trainY_oh = tf.cast(trainY_oh,tf.float32)
testY_oh = tf.cast(testY_oh,tf.float32)
trainY = trainY[:,0]
#trainX = tf.expand_dims(trainX,3)
#testX = tf.expand_dims(testX,3)
trainX = trainX/255 * 2 - 2
testX = testX/255 * 2 - 2
print(testY_oh.shape)

In [None]:
def build_model(hp):
    number_dense_layers = hp.Int("number_dense",1,10)
    number_units_dense = hp.Int(f"number_neuron_dense",min_value = 10,max_value = 50,step = 10)
    number_conv = hp.Int("number_conv",1,20)
    type_of_model = hp.Choice("type_of_model", ["reg","mix","mixup"])
    lambd = hp.Float("lambd",0,0.001)
    
    if type_of_model == "reg":
        model = tf.keras.Sequential()
    elif type_of_model == "mix":
        model = mix_Sequential()
    elif type_of_model == "mixup":
        model = mixup_Sequential()
    else:
        raise Exception(f"No model type called {type_of_model}")

    for i in range(number_conv):
        model.add(tf.keras.layers.Conv2D(16+16*i,kernel_size = 3,activation= "relu",padding="same"))

    model.add(tf.keras.layers.Flatten())
    
    for i in range(number_dense_layers):
        model.add(tf.keras.layers.Dense(units = number_units_dense,activation= "relu"))
    model.add(tf.keras.layers.Dense(units = 10,activation= "softmax"))
    model.compile(loss= "CategoricalCrossentropy",metrics=['accuracy'])
    return model

In [None]:
tensorboard
%tensorboard --logdir logstb

In [None]:
tbcallback = tf.keras.callbacks.TensorBoard(
        log_dir="logstb", histogram_freq=2, write_graph=True, write_images=False,
        update_freq='epoch', profile_batch=2, embeddings_freq=0)
    
early_stop_cb = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    min_delta=0.01,
    patience=5,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=True,
)

tuner = RandomSearch(build_model,
                     objective = "val_accuracy",
                     max_trials = 50,
                     executions_per_trial = 1,
                     directory = "logs")
tuner.search(x = trainX,
             y = trainY_oh,
             epochs = 50,
             batch_size = 64,
             validation_data = (testX,testY_oh),
             callbacks = [tbcallback,early_stop_cb])

In [None]:
import tensorflow.keras as keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
import numpy as np
import tensorflow.keras.backend as K 

(x_train_p, y_train_p), (x_test, y_test) = mnist.load_data()
x_train = x_train_p  #[0:1000,:,:]
y_train = y_train_p  #[0:1000]
num_classes = 10
y_test = keras.utils.to_categorical(y_test, num_classes)
y_train = keras.utils.to_categorical(y_train, num_classes)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255; x_test /= 255;
image_size = 784 #28*28
x_train = x_train.reshape(x_train.shape[0], image_size)
x_test = x_test.reshape(x_test.shape[0], image_size)

def create_dense(layer_sizes):
    model = Sequential()
    model.add(Dense(layer_sizes[0], activation='relu', input_shape=(image_size,)))
    for s in layer_sizes[1:]:
        model.add(Dense(units = s, activation = 'relu'))
    model.add(Dense(units=num_classes, activation='softmax'))
    model.compile(optimizer=keras.optimizers.SGD(lr = 0.003, momentum = 0.95), loss='categorical_crossentropy', metrics=['accuracy'])
    return model
batch_size = 256
epochs = 20

In [None]:
from ray import tune
from ray.tune import CLIReporter
import random
import tensorflow as tf
tf.test.gpu_device_name()

import sys

sys.stdout.fileno = lambda: False

# Limit the number of rows.
reporter = CLIReporter(max_progress_rows=10)
# Add a custom metric column, in addition to the default metrics.
# Note that this must be a metric that is returned in your training results.
reporter.add_metric_column("mixup_accuracy")
reporter.add_metric_column("test_accuracy")

# Randomizes the degree of mixup in test data, by choosing a
# random mean for the truncated normal used in mixup_data().
# To make the hyperparameter search fair, the same degree of
# test mixup should be used for all choices of hyperparameters.
mean_test = tf.random.uniform(shape=[], maxval=0.5, dtype=tf.dtypes.float32)

def linear_combine(lam,X,Xs):
    l = tf.reshape(lam, (-1, *([1]*(len(X.shape)-1)) ))
    Xl = l*Xs + (1-l)*X
    return Xl

def mixup_data( X, Y, mean ):
    if mean == 0: # No mixup; training and/or testing on unmixed data.
      return X, Y
    else:
      lam = tf.random.truncated_normal((tf.shape(X)[0],), mean=mean, stddev=0.5*mean, dtype=tf.dtypes.float32)
      #Xs,Ys = tf.random.shuffle(X, seed=1),tf.random.shuffle(Y, seed=1)

      indices = tf.range(start=0, limit=tf.shape(X)[0], dtype=tf.int32)
      idx = tf.random.shuffle(indices, seed = 1)
      Xs, Ys = tf.gather(X, idx), tf.gather(Y, idx)

      # Mixup: Form convex combinations of batch data.
      # Important to form the exact same mix of labels.
      X_mix = linear_combine(lam,X,Xs)
      Y_mix = linear_combine(lam,Y,Ys)
      return X_mix, Y_mix

def training_function(config, checkpoint_dir=None):
    # Hyperparameters
    width, depth = config["width"], config["depth"]
    model = create_dense( width*np.ones(depth) )

    mean = config["mean"]
    x_train_mix, y_train_mix = mixup_data( x_train, y_train, mean )
    history = model.fit(x_train_mix, y_train_mix, batch_size=batch_size, epochs=epochs, verbose=False)

    x_mix, y_mix = mixup_data( x_test, y_test, mean_test )
    # Compute loss (accuracy) for...
    mix_loss, mix_acc = model.evaluate( x_mix, y_mix ) # ...mixed test data
    test_loss, test_acc = model.evaluate( x_test, y_test ) # ...unmixed test data
    train_acc = history.history['accuracy'][-1] # ...(mixed) training data
    tune.report(mean_loss=train_acc, mixup_accuracy=mix_acc, test_accuracy = test_acc)


analysis = tune.run(
    training_function,
    resources_per_trial={'cpu': 1, 'gpu': 1},
    config={
        "width": tune.grid_search([400]), # Change back to 300, 400
        "depth": tune.grid_search([1]), # Change back to 1, 2
        "mean": tune.grid_search([0, 0.1, 0.2, 0.35, 0.5])
    },
    local_dir='ray_results',
    progress_reporter=reporter)

print("Best config: ", analysis.get_best_config(
    metric="mixup_accuracy", mode="max"))

# Get a dataframe for analyzing trial results.
df = analysis.results_df
print("Degree of mixup for test data: mean_test = ", mean_test)

  

>     2021-01-09 13:13:19,423	INFO services.py:1173 -- View the Ray dashboard at http://127.0.0.1:8265
>     2021-01-09 13:13:22,109	INFO logger.py:627 -- pip install 'ray[tune]' to see TensorBoard files.

In [None]:
# Temporary code - testing how to work with data in FileStore.
#df = spark.read.format("image").load("/FileStore/tables/Group20/seg_test/seg_test/buildings/20061.jpg")
#display(df)

dir_test = "/FileStore/tables/Group20/seg_test/seg_test/"
files = dbutils.fs.ls(dir_test + "mountain/")
n = 0
for image in files:
  n += 1
  
print("Number of pictures: ", n)



  

>     Number of pictures:  525

In [None]:
df = spark.read.format("image").load(dir_test)
df.printSchema()
display(df.select("20061.jpg"))


  

>     root
>      |-- image: struct (nullable = true)
>      |    |-- origin: string (nullable = true)
>      |    |-- height: integer (nullable = true)
>      |    |-- width: integer (nullable = true)
>      |    |-- nChannels: integer (nullable = true)
>      |    |-- mode: integer (nullable = true)
>      |    |-- data: binary (nullable = true)