In [None]:
%matplotlib inline

# 0. Import libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import log_loss, accuracy_score

In [None]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Lambda, BatchNormalization, Dropout
from keras.optimizers import Adam, SGD, Optimizer
from keras.legacy import interfaces
from keras.preprocessing import image
from keras.callbacks import LearningRateScheduler
from keras.losses import categorical_crossentropy
from keras import backend as K

In [None]:
import SGDR_keras

# 1.Download & preprocess MNIST data

Download MNIST data

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

Keras model needs one more dimension for number of channels.<br>
MNIST data has only one channel, so expand one dimension

In [None]:
X_test = np.expand_dims(X_test,3)
X_train = np.expand_dims(X_train,3)

Check the shape of input data

In [None]:
X_train.shape

Onehot encode the target data

In [None]:
enc = OneHotEncoder(sparse=False)
enc.fit(y_train.reshape(-1, 1))

In [None]:
y_train = enc.transform(y_train.reshape(-1, 1))
y_test = enc.transform(y_test.reshape(-1, 1))

In [None]:
y_train[:5]

Use `ImageDataGenerator` to make `batches` and `test_bathces`.<br>
Then make `batches` and `test_batches`

In [None]:
batch_size = 3000
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=batch_size)
test_batches = image.ImageDataGenerator().flow(X_test, y_test, batch_size=batch_size)

Input data should be normalized before get into the model.<br>
Compute mean and standard deviation of training input data

In [None]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

Define a function which return normalized input data.<br>
This function will be used as an input layer of model

In [None]:
def norm_input(x): return (x-mean_px)/std_px

# 2. Define model

In [None]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(28, 28, 1)),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(10, activation='softmax')
        ])
    return model

Get SGD and SGDR model

In [None]:
model_sgd = get_model()
model_sgdr = get_model()

Copy weight of `model_sgd` to `model_sgdr` to make identical initial starting point

In [None]:
w = model_sgd.get_weights()
model_sgdr.set_weights(w)

# 3. Train model

In [None]:
model_index = 1

In [None]:
weight_path = 'weights/'

Since keras record training history only once per one epoch, number of epochs should be changed if moreh history information is needed.<br>
`get_epochs` function returns hypothetical number of epochs given real number of epochs.

In [None]:
def get_epochs(n_epochs): return int(n_epochs * n_batch / steps_per_epoch)

`n_batch` shows the number of batches in full dataset.<br>
Since number of data sample is 60000, `n_batch` will equal to 60000 / `batch_size`

In [None]:
n_batch = len(batches); n_batch

After `n_batch` iterations, model trained for one real epoch.<br>
After `steps_per_epoch` iterations, model trained for one hypothetical epoch, therefore record training history information

### 3.1 Train SGD model

define a list which will record the training history information of SGD model

In [None]:
sgd_hist = []

`steps_per_epoch` defines number of steps for one hypothetical epoch.<br>
Note that one real epoch is one cycle of full training data.<br>
If `steps_per_epoch` is small, than Keras will record training history information more ofthen.

In [None]:
steps_per_epoch = 10

Compile the model with SGD optimizer and train the model

In [None]:
sgd = SGD(lr=0.1)
model_sgd.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
sgd_hist.append(model_sgd.fit_generator(batches, epochs=get_epochs(200),
                                        validation_data=test_batches, steps_per_epoch=steps_per_epoch, verbose=1))
model_sgd.evaluate_generator(batches)[1], model_sgd.evaluate_generator(test_batches)[1]

Training is over!<br>
Save the weights of the model.

In [None]:
# model_sgd.save_weights(f'{weight_path}mnist-sgd{model_index}.h5')

In [None]:
# model_sgd.load_weights(f'{weight_path}mnist-sgd{model_index}.h5')

### 3.2 Train SGDR model

In [None]:
sgdr_hist = []

In [None]:
steps_per_epoch = 10
lr = 0.2

`iter_per_epoch` of SGDR defines the number of iterations of one cycle of learning rate.<br>
If its same with `n_batch`, which is the number of iterations for one real epoch, then SGDR reset the learning rate for every epoch.

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
sgdr_hist.append(model_sgdr.fit_generator(batches, epochs=get_epochs(1),
                                          validation_data=test_batches, steps_per_epoch=steps_per_epoch, verbose=1))
model_sgdr.evaluate_generator(batches)[1], model_sgdr.evaluate_generator(test_batches)[1]

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=2*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
sgdr_hist.append(model_sgdr.fit_generator(batches, epochs=get_epochs(2),
                                          validation_data=test_batches, steps_per_epoch=steps_per_epoch, verbose=1))
model_sgdr.evaluate_generator(batches)[1], model_sgdr.evaluate_generator(test_batches)[1]

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=4*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
sgdr_hist.append(model_sgdr.fit_generator(batches, epochs=get_epochs(4),
                                          validation_data=test_batches, steps_per_epoch=steps_per_epoch, verbose=1))
model_sgdr.evaluate_generator(batches)[1], model_sgdr.evaluate_generator(test_batches)[1]

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=8*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
sgdr_hist.append(model_sgdr.fit_generator(batches, epochs=get_epochs(8),
                                          validation_data=test_batches, steps_per_epoch=steps_per_epoch, verbose=1))
model_sgdr.evaluate_generator(batches)[1], model_sgdr.evaluate_generator(test_batches)[1]

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=16*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
sgdr_hist.append(model_sgdr.fit_generator(batches, epochs=get_epochs(15),
                                          validation_data=test_batches, steps_per_epoch=steps_per_epoch, verbose=1))
model_sgdr.evaluate_generator(batches)[1], model_sgdr.evaluate_generator(test_batches)[1]

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=32*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
sgdr_hist.append(model_sgdr.fit_generator(batches, epochs=get_epochs(31),
                                          validation_data=test_batches, steps_per_epoch=steps_per_epoch, verbose=1))
model_sgdr.evaluate_generator(batches)[1], model_sgdr.evaluate_generator(test_batches)[1]

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=64*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
sgdr_hist.append(model_sgdr.fit_generator(batches, epochs=get_epochs(63),
                                          validation_data=test_batches, steps_per_epoch=steps_per_epoch, verbose=1))
model_sgdr.evaluate_generator(batches)[1], model_sgdr.evaluate_generator(test_batches)[1]

Training is over!<br>
Save the weights of the model

In [None]:
# model_sgdr.save_weights(f'sgdr{model_index}.h5')

In [None]:
# model_sgdr.load_weights(f'{weight_path}mnist-sgdr{model_index}.h5')

# 4. Explore results

In [None]:
def plot_hist(result, plot_type='loss', title='enter title!'):
    train_res, test_res = [], []
    
    for hist in result:
        train_res = train_res + hist.history[plot_type]
        test_res = test_res + hist.history[f'val_{plot_type}']
    
    plt.figure(figsize=(10,6))
    plt.title(title, size=15)
    plt.plot(train_res)
    plt.plot(test_res)
    plt.ylabel(plot_type)
    plt.xlabel('iterations')
    plt.legend(['train', 'test'], loc='upper right')

In [None]:
plot_hist(sgd_hist, 'loss', 'SGD loss')
plt.ylim(0, 0.5)

In [None]:
plot_hist(sgd_hist, 'acc', 'SGD Accuracy')
plt.ylim(0.7, 1)

In [None]:
plot_hist(sgdr_hist, 'loss', 'SGDR loss')
plt.ylim(0, 0.5)

In [None]:
plot_hist(sgdr_hist, 'acc', 'SGDR Accuracy')
plt.ylim(0.7, 1)

# 5. filter normalization

`make_direction` function returns filter-normalized randomly generated direction corresponed to given model

In [None]:
def make_direction(model):
    direction = []
    for l in model.layers:
        w = l.get_weights()
        
        #if layer is convolutional layer
        if isinstance(l, Conv2D):
            #make direction array
            filter_w = np.zeros(w[0].shape)
            bias_w = np.zeros(w[1].shape)

            for f in range(l.filters):
                for i in range(l.input_shape[3]):
                    #randomly generate direction
                    temp_direction = np.random.normal(size=w[0][:,:,i,f].shape)
                    temp_bias = np.random.normal(size=w[1][f].shape)

                    #compute norm of direction and original filter
                    norm_model = np.linalg.norm(w[0][:,:,i,f], ord='fro')
                    norm_direction = np.linalg.norm(temp_direction, ord='fro')

                    #normalize generated direction
                    temp_direction = temp_direction / norm_direction * norm_model
                    temp_bias = temp_bias / norm_direction * norm_model

                    #put generated one-filter direction to array
                    filter_w[:,:,i,f] = temp_direction
                    bias_w[f] = temp_bias

            #append generate one-layer direction to direction list
            direction.append(filter_w)
            direction.append(bias_w)
            
        #if layer is FC
        elif isinstance(l, Dense):
            
            #randomly generate direction
            temp_direction = np.random.normal(size=w[0].shape)
            temp_bias = np.random.normal(size=w[1].shape)
            
            #compute norm of direction and original layer
            norm_model = np.linalg.norm(w[0], ord='fro')
            norm_direction = np.linalg.norm(temp_direction, ord='fro')
            
            #normalize generated direction
            temp_direction = temp_direction / norm_direction * norm_model
            temp_bias = temp_bias / norm_direction * norm_model
            
            #put generated one-layer direction to array
            direction.append(temp_direction)
            direction.append(temp_bias)
            
        #if layer is BN
        elif isinstance(l, BatchNormalization):
            
            temp_direction_list = []
            
            #randomly generate direction
            for i in range(len(w)):
                temp_direction_list.append(np.zeros(w[i].shape))
            
            
            #put generated one-layer direction to array
            for d in temp_direction_list:
                direction.append(d)
            
    return direction

`direction_step` function returns model of which weight is alpha * direction + original_model_weights

In [None]:
def direction_step(direction, model, alpha):
    
    step_model = get_model()
    step_model.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])
    
    #get original model weights
    weight = model.get_weights()
    
    new_weights = []
    for i, w in enumerate(weight):
        new_weights.append(w + alpha * direction[i])
        
    step_model.set_weights(new_weights)
    
    return step_model

# 6. 1-D FN plot

In [None]:
direction = make_direction(model_sgd)

In [None]:
alpha_list = np.linspace(-1, 1, num=35)

In [None]:
def step_1d_fn(model, alpha_list, direction):
    loss_train, acc_train = [], []
    loss_test, acc_test = [], []
#     direction = make_direction(model)
    
    for a in tqdm(alpha_list):
        temp_model = direction_step(direction, model, a)
        
        eval_train = temp_model.evaluate_generator(batches)
        eval_test = temp_model.evaluate_generator(test_batches)
        
        loss_train.append(eval_train[0])
        acc_train.append(eval_train[1])
        
        loss_test.append(eval_test[0])
        acc_test.append(eval_test[1])
        
    return loss_train, acc_train, loss_test, acc_test

In [None]:
loss_sgd_train, acc_sgd_train, loss_sgd_test, acc_sgd_test = step_1d_fn(model_sgd, alpha_list, direction)

In [None]:
loss_sgdr_train, acc_sgdr_train, loss_sgdr_test, acc_sgdr_test = step_1d_fn(model_sgdr, alpha_list, direction)

In [None]:
plt.figure(figsize=(9,5))
plt.plot(alpha_list, loss_sgdr_test, label='test loss sgdr')
plt.plot(alpha_list, loss_sgd_test, label='test loss sgd')
plt.legend()

In [None]:
plt.figure(figsize=(9,5))
plt.plot(alpha_list, loss_sgdr_train, label='train loss sgdr')
plt.plot(alpha_list, loss_sgd_train, label='train loss sgd')
plt.legend()

In [None]:
plt.figure(figsize=(9,5))
plt.plot(alpha_list, acc_sgdr_test, label='test acc sgdr')
plt.plot(alpha_list, acc_sgd_test, label='test acc sgd')
plt.legend()

In [None]:
plt.figure(figsize=(9,5))
plt.plot(alpha_list, acc_sgdr_train, label='train acc sgdr')
plt.plot(alpha_list, acc_sgd_train, label='train acc sgd')
plt.legend()

# 7. 2D FN plot

In [None]:
path = 'data/'

In [None]:
direction1 = make_direction(model_sgd)
direction2 = make_direction(model_sgd)

In [None]:
alpha_list = np.linspace(-1, 1, num=13)

In [None]:
def step_2d_fn(model, alpha_list, direction1, direction2):
    loss_test, acc_test = [], []
    
    for a1 in tqdm(alpha_list):
        tmp_loss, tmp_acc = [], []
        for a2 in (alpha_list):
            
            temp_model = direction_step(direction1, model, a1)
            temp_model = direction_step(direction2, temp_model, a2)
        
            eval_test = temp_model.evaluate_generator(test_batches)
        
            tmp_loss.append(eval_test[0])
            tmp_acc.append(eval_test[1])
            
        loss_test.append(tmp_loss)
        acc_test.append(tmp_acc)
        
    return loss_test, acc_test

In [None]:
loss_sgd_test, acc_sgd_test = step_2d_fn(model_sgd, alpha_list, direction1, direction2)

In [None]:
# np.save(f'{path}mnist-loss_sgd_test.npy', np.array(loss_sgd_test))

In [None]:
# loss_sgd_test=np.load(f'{path}mnist-loss_sgd_test.npy')

In [None]:
loss_sgdr_test, acc_sgdr_test = step_2d_fn(model_sgdr, alpha_list, direction1, direction2)

In [None]:
# np.save(f'{path}mnist-loss_sgdr_test.npy', np.array(loss_sgdr_test))

In [None]:
# loss_sgdr_test=np.load(f'{path}mnist-loss_sgdr_test.npy')

In [None]:
plt.figure(figsize=(8,8))
# levels = np.arange(0,18,1.5)
# levels=(np.arange(1, 12, 1))
c = plt.contour(alpha_list, alpha_list, loss_sgd_test)
plt.clabel(c, inline=1, fontsize=10)
plt.title('SGD test countour plot', fontsize=15)

In [None]:
plt.figure(figsize=(8,8))
levels=(np.arange(1, 12, 1))
c = plt.contour(alpha_list, alpha_list, loss_sgdr_test)
plt.clabel(c, inline=1, fontsize=10)
plt.title('SGDR test countour plot', fontsize=15)

# 8. Linear interpolation

In [None]:
def make_combination(model1, model2, alpha):
    new_model = get_model()
    w1 = model1.get_weights()
    w2 = model2.get_weights()
    w = []
    for i in range(len(new_model.get_weights())):
        w.append(alpha * w1[i] + (1-alpha) * w2[i])
        
    new_model.set_weights(w)
    new_model.compile('sgd', loss='categorical_crossentropy', metrics=['accuracy'])
    return new_model

In [None]:
alpha_list = np.linspace(-1, 2, 40)

In [None]:
loss_list, acc_list = [], []
for a in tqdm(alpha_list):
    model_tmp = make_combination(model_sgd, model_sgdr, a)
    loss, acc = model_tmp.evaluate_generator(batches)
    del model_tmp
    
    loss_list.append(loss)
    acc_list.append(acc)

In [None]:
plt.figure(figsize = (9, 6))
plt.plot(alpha_list, acc_list, label='acc')
plt.ylabel('accuracy')
plt.legend(loc=2)