In [None]:
%matplotlib inline

# 0. Import libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
import pickle

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import log_loss, accuracy_score
from sklearn.decomposition import PCA

In [None]:
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Lambda, Input, BatchNormalization
from keras.optimizers import Adam, SGD, Optimizer
from keras.legacy import interfaces
from keras.preprocessing import image
from keras.callbacks import LearningRateScheduler, ModelCheckpoint
from keras.losses import categorical_crossentropy
from keras import backend as K

In [None]:
import SGDR_keras

# 1.Download & preprocess CIFAR-10 data

Download CIFAR-10 data

In [None]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

Check the shape of input data

In [None]:
X_train.shape

Onehot encode the target data

In [None]:
enc = OneHotEncoder(sparse=False)
enc.fit(y_train.reshape(-1, 1))

In [None]:
y_train = enc.transform(y_train.reshape(-1, 1))
y_test = enc.transform(y_test.reshape(-1, 1))

In [None]:
y_train[:5]

Use `ImageDataGenerator` to make `batches` and `test_bathces`.<br>
Then make `batches` and `test_batches`

In [None]:
batch_size = 2000
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=batch_size)
test_batches = image.ImageDataGenerator().flow(X_test, y_test, batch_size=batch_size)

Input data should be normalized before get into the model.<br>
Compute mean and standard deviation of trainin input data

In [None]:
mean_px = np.array([X_train[:,:,:,0].mean(), X_train[:,:,:,1].mean(), X_train[:,:,:,2].mean()], dtype=np.float32)
std_px = np.array([X_train[:,:,:,0].std(), X_train[:,:,:,1].std(), X_train[:,:,:,2].std()], dtype=np.float32)

Define a function which return normalized input data.<br>
This function will be used as an input layer of model

In [None]:
def norm_input(x): return (x-mean_px)/std_px

# 2. Define model

In [None]:
def get_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(32, 32, 3)),
        Conv2D(32,3, activation='relu', padding='same'),
        Conv2D(32,3, activation='relu', padding='same'),
        MaxPooling2D(),
        Conv2D(64,3, activation='relu', padding='same'),
        Conv2D(64,3, activation='relu', padding='same'),
        MaxPooling2D(),
        Conv2D(128,3, activation='relu', padding='same'),
        Conv2D(128,3, activation='relu', padding='same'),
        MaxPooling2D(),
        Conv2D(256,3, activation='relu', padding='same'),
        Conv2D(256,3, activation='relu', padding='same'),
        MaxPooling2D(),
        Flatten(),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
        ])
    return model

Get SGD and SGDR model

In [None]:
model_sgd = get_model()
model_sgdr = get_model()

Set initial weight of SGD and SGDR model same to make identical initial starting point

In [None]:
w = model_sgd.get_weights()
model_sgdr.set_weights(w)

# 3. Train model

In [None]:
model_index = 2

In [None]:
weight_path = 'weights/'

After `n_batch` iterations, model trained for one real epoch.<br>
After `steps_per_epoch` iterations, model trained for one hypothetical epoch, therefore record training history information

In [None]:
filepath = f'{weight_path}cifar10_path_hist/' + 'SGD-{epoch:02d}.hdf5'
checkpoint = ModelCheckpoint(filepath, save_best_only=False, save_weights_only=True)

### 3.1 Train SGD model

Compile the model with SGD optimizer and train the model

In [None]:
sgd = SGD(lr=0.1)
model_sgd.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model_sgd.fit_generator(batches, epochs=50, callbacks=[checkpoint], validation_data=test_batches, verbose=1)

Training is over!<br>
Save the weights of the model.

In [None]:
# model_sgd.save_weights(f'{weight_path}cifar10-sgd{model_index}.h5')

In [None]:
# model_sgd.load_weights(f'{weight_path}cifar10-sgd{model_index}.h5')

### 3.2 Train SGDR model

In [None]:
n_batch = len(batches); n_batch

In [None]:
lr = 0.2

`iter_per_epoch` of SGDR defines the number of iterations of one cycle of learning rate.<br>
If its same with `n_batch`, which is the number of iterations for one real epoch, then SGDR reset the learning rate for every epoch.

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])
filepath = f'{weight_path}cifar10_path_hist/' + 'SGDR-1-{epoch:02d}.hdf5'
checkpoint = ModelCheckpoint(filepath, save_best_only=False, save_weights_only=True)

In [None]:
model_sgdr.fit_generator(batches, epochs=1, callbacks=[checkpoint], validation_data=test_batches, verbose=1)

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=2*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])
filepath = f'{weight_path}cifar10_path_hist/' + 'SGDR-2-{epoch:02d}.hdf5'
checkpoint = ModelCheckpoint(filepath, save_best_only=False, save_weights_only=True)

In [None]:
model_sgdr.fit_generator(batches, epochs=2, callbacks=[checkpoint], validation_data=test_batches, verbose=1)

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=4*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])
filepath = f'{weight_path}cifar10_path_hist/' + 'SGDR-4-{epoch:02d}.hdf5'
checkpoint = ModelCheckpoint(filepath, save_best_only=False, save_weights_only=True)

In [None]:
model_sgdr.fit_generator(batches, epochs=4, callbacks=[checkpoint], validation_data=test_batches, verbose=1)

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=8*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])
filepath = f'{weight_path}cifar10_path_hist/' + 'SGDR-8-{epoch:02d}.hdf5'
checkpoint = ModelCheckpoint(filepath, save_best_only=False, save_weights_only=True)

In [None]:
model_sgdr.fit_generator(batches, epochs=8, callbacks=[checkpoint], validation_data=test_batches, verbose=1)

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=16*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])
filepath = f'{weight_path}cifar10_path_hist/' + 'SGDR-16-{epoch:02d}.hdf5'
checkpoint = ModelCheckpoint(filepath, save_best_only=False, save_weights_only=True)

In [None]:
model_sgdr.fit_generator(batches, epochs=16, callbacks=[checkpoint], validation_data=test_batches, verbose=1)

In [None]:
sgdr = SGDR_keras.SGDR(lr=lr, iter_per_epoch=32*n_batch)
model_sgdr.compile(sgdr, loss='categorical_crossentropy', metrics=['accuracy'])
filepath = f'{weight_path}cifar10_path_hist/' + 'SGDR-32-{epoch:02d}.hdf5'
checkpoint = ModelCheckpoint(filepath, save_best_only=False, save_weights_only=True)

In [None]:
model_sgdr.fit_generator(batches, epochs=32, callbacks=[checkpoint], validation_data=test_batches, verbose=1)

Training is over!<br>
Save the weights of the model

In [None]:
# model_sgdr.save_weights(f'{weight_path}cifar10-sgdr{model_index}.h5')

In [None]:
# model_sgdr.load_weights(f'{weight_path}cifar10-sgdr{model_index}.h5')

# 4. get PCA direction

### 4.1 SGD PCA direction

Read weight files, then make all those weights into one single (num_params x num_epochs) matrix.

In [None]:
try:
    del weights_sgd
except Exception:
    pass

for i in tqdm(range(50)):
    #Read each hdf5 file
    w_path = f'{weight_path}cifar10_path_hist/SGD-{i+1:0>2}.hdf5'
    new_model = get_model()
    new_model.load_weights(w_path)
    
    #Flatten weights (do not consider BN weights)
    weights = np.array([])
    for l in new_model.layers:
        w = l.get_weights()
        if isinstance(l, Conv2D) or isinstance(l, Dense): 
            weights = np.append(weights, w[0])
            weights = np.append(weights, w[1])
    
    #Append to the array
    try:
        weights_sgd = np.column_stack((weights_sgd, weights))
    except NameError:
        weights_sgd = weights

In [None]:
try:
    del weights_sgdr
except Exception:
    pass

cycle_list = [1, 2, 4, 8, 16, 32]

for c in cycle_list:
    for i in range(c):
        #Read each hdf5 file
        w_path = f'{weight_path}cifar10_path_hist/SGDR-{c}-{i+1:0>2}.hdf5'
        new_model = get_model()
        new_model.load_weights(w_path)

        #Flatten weights (do not consider BN weights)
        weights = np.array([])
        for l in new_model.layers:
            w = l.get_weights()
            if isinstance(l, Conv2D) or isinstance(l, Dense): 
                weights = np.append(weights, w[0])
                weights = np.append(weights, w[1])

        #Append to the array
        try:
            weights_sgdr = np.column_stack((weights_sgdr, weights))
        except NameError:
            weights_sgdr = weights

Check the shape of `weights_sgd`. This should be (number of params, number of epochs).

In [None]:
weights_sgd.shape, weights_sgdr.shape

Then compute variation of weights

In [None]:
delta_sgd = (weights_sgd - np.expand_dims(weights_sgd[:,4], axis=1))[:,:4]
delta_sgdr = (weights_sgdr - np.expand_dims(weights_sgdr[:,4], axis=1))[:,:4]

Do pca for `delta_sgd`

In [None]:
pca = PCA(n_components=2)
pca.fit(delta_sgd.T)
sgd_pca_direction = np.swapaxes(pca.components_, 0, 1)
sgd_sgdpca = pca.transform(weights_sgd.T)
sgdr_sgdpca = pca.transform(weights_sgdr.T)
sgd_pca_direction.shape

In [None]:
pca = PCA(n_components=2)
pca.fit(delta_sgdr.T)
sgdr_pca_direction = np.swapaxes(pca.components_, 0, 1)
sgd_sgdrpca = pca.transform(weights_sgd.T)
sgdr_sgdrpca = pca.transform(weights_sgdr.T)
sgdr_pca_direction.shape

In [None]:
path = 'data/'

In [None]:
# np.save(f'{path}sgd_pca_direction.npy', sgd_pca_direction)
# np.save(f'{path}sgd_sgdpca.npy', sgd_sgdpca)
# np.save(f'{path}sgdr_sgdpca.npy', sgdr_sgdpca)

# np.save(f'{path}sgdr_pca_direction.npy', sgdr_pca_direction)
# np.save(f'{path}sgd_sgdrpca.npy', sgd_sgdrpca)
# np.save(f'{path}sgdr_sgdrpca.npy', sgdr_sgdrpca)

In [None]:
# sgd_pca_direction = np.load(f'{path}sgd_pca_direction.npy')
# sgd_sgdpca = np.load(f'{path}sgd_sgdpca.npy')
# sgdr_sgdpca = np.load(f'{path}sgdr_sgdpca.npy')

# sgdr_pca_direction = np.load(f'{path}sgdr_pca_direction.npy')
# sgd_sgdrpca = np.load(f'{path}sgd_sgdrpca.npy')
# sgdr_sgdrpca = np.load(f'{path}sgdr_sgdrpca.npy')

Reshape `sgd_pca_diraction` to the shape of the original weight

In [None]:
def reshape_direction(direction):
    #make temporary model to get the shape of the weight
    model = get_model()
    
    reshaped_weight = []
    for l in model.layers:
        w = l.get_weights()
        if isinstance(l, Conv2D) or isinstance(l, Dense):
            for i in range(len(w)):
                tmp_weight = direction[:w[i].size]
                direction = direction[w[i].size:]
                tmp_weight = tmp_weight.reshape(w[i].shape)
                reshaped_weight.append(tmp_weight)
            
        elif isinstance(l, BatchNormalization):
            for i in range(len(w)):
                reshaped_weight.append(np.zeros(shape = w[i].shape))
                
    return reshaped_weight

In [None]:
sgd_direction1 = reshape_direction(sgd_pca_direction[:,0])
sgd_direction2 = reshape_direction(sgd_pca_direction[:,1])

In [None]:
sgdr_direction1 = reshape_direction(sgdr_pca_direction[:,0])
sgdr_direction2 = reshape_direction(sgdr_pca_direction[:,1])

Filter-normalize `sgd_pca_direction`

In [None]:
def normalize_direction(model, direction):
    normalized_direction = []
    
    tmp_model = get_model()
    tmp_model.set_weights(direction)
    for l, tmp_l in zip(model.layers, tmp_model.layers):
        w = l.get_weights()
        d = tmp_l.get_weights()
        
        #if layer is convolutional layer
        if isinstance(l, Conv2D):
            #make direction array
            filter_w = np.zeros(w[0].shape)
            bias_w = np.zeros(w[1].shape)

            for f in range(l.filters):
                for i in range(l.input_shape[3]):
                    #randomly generate direction
                    temp_direction = d[0][:,:,i,f]
                    temp_bias = d[1][f]

                    #compute norm of direction and original filter
                    norm_model = np.linalg.norm(w[0][:,:,i,f], ord='fro')
                    norm_direction = np.linalg.norm(temp_direction, ord='fro')

                    #normalize generated direction
                    temp_direction = temp_direction / norm_direction * norm_model
                    temp_bias = temp_bias / norm_direction * norm_model

                    #put generated one-filter direction to array
                    filter_w[:,:,i,f] = temp_direction
                    bias_w[f] = temp_bias

            #append generate one-layer direction to direction list
            normalized_direction.append(filter_w)
            normalized_direction.append(bias_w)
            
        #if layer is FC
        elif isinstance(l, Dense):
            
            #randomly generate direction
            temp_direction = d[0]
            temp_bias = d[1]
            
            #compute norm of direction and original layer
            norm_model = np.linalg.norm(w[0], ord='fro')
            norm_direction = np.linalg.norm(temp_direction, ord='fro')
            
            #normalize generated direction
            temp_direction = temp_direction / norm_direction * norm_model
            temp_bias = temp_bias / norm_direction * norm_model
            
            #put generated one-layer direction to array
            normalized_direction.append(temp_direction)
            normalized_direction.append(temp_bias)
            
        #if layer is BN
        elif isinstance(l, BatchNormalization):
            
            temp_direction_list = []
            
            #randomly generate direction
            for i in range(len(w)):
                temp_direction_list.append(np.zeros(w[i].shape))
            
            
            #put generated one-layer direction to array
            for d in temp_direction_list:
                normalized_direction.append(d)
            
    return normalized_direction

In [None]:
sgd_direction1 = normalize_direction(model_sgd, sgd_direction1)
sgd_direction2 = normalize_direction(model_sgd, sgd_direction2)

In [None]:
sgdr_direction1 = normalize_direction(model_sgdr, sgdr_direction1)
sgdr_direction2 = normalize_direction(model_sgdr, sgdr_direction2)

# 5. Draw plot

`direction_step` function returns model of which weight is alpha * direction + original_model_weights

In [None]:
def direction_step(direction, model, alpha):
    
    step_model = get_model()
    step_model.compile(sgd, loss='categorical_crossentropy', metrics=['accuracy'])
    
    #get original model weights
    weight = model.get_weights()
    
    new_weights = []
    for i, w in enumerate(weight):
        new_weights.append(w + alpha * direction[i])
        
    step_model.set_weights(new_weights)
    
    return step_model

In [None]:
alpha_list = np.linspace(-5, 2, num=15)

In [None]:
def step_2d_fn(model, alpha_list, direction1, direction2):
    loss_test, acc_test = [], []
    
    for a1 in tqdm(alpha_list):
        tmp_loss, tmp_acc = [], []
        for a2 in (alpha_list):
            
            temp_model = direction_step(direction1, model, a1)
            temp_model = direction_step(direction2, temp_model, a2)
        
            eval_test = temp_model.evaluate_generator(test_batches)
        
            tmp_loss.append(eval_test[0])
            tmp_acc.append(eval_test[1])
            
        loss_test.append(tmp_loss)
        acc_test.append(tmp_acc)
        
    return loss_test, acc_test

In [None]:
loss_sgd_test, acc_sgd_test = step_2d_fn(model_sgd, alpha_list, sgd_direction1, sgd_direction2)

In [None]:
# np.save(f'{path}pca-loss_sgd_test.npy', np.array(loss_sgd_test))

In [None]:
# loss_sgd_test=np.load(f'{path}pca-loss_sgd_test.npy')

In [None]:
plt.figure(figsize=(8,8))
c = plt.contour(alpha_list, alpha_list, acc_sgd_test)
plt.clabel(c, inline=1, fontsize=10)
plt.scatter(sgd_sgdpca.T[0], sgd_sgdpca.T[1])
plt.scatter(sgdr_sgdpca.T[0], sgdr_sgdpca.T[1])
plt.title('SGD test countour plot', fontsize=15)

In [None]:
loss_sgdr_test, acc_sgdr_test = step_2d_fn(model_sgdr, alpha_list, sgdr_direction1, sgdr_direction2)

In [None]:
# np.save(f'{path}cifar10-loss_sgdr_test_pca.npy', np.array(loss_sgdr_test))

In [None]:
# loss_sgdr_test=np.load(f'{path}cifar10-loss_sgdr_test_pca.npy')

In [None]:
plt.figure(figsize=(8,8))
c = plt.contour(alpha_list, alpha_list, loss_sgdr_test)
plt.clabel(c, inline=1, fontsize=10)
plt.scatter(sgd_pca_data)
plt.scatter(sgdr_pca_data)
plt.title('SGDR test countour plot', fontsize=15)