In [1]:
from hfunc import preprocessing
import tensorflow as tf
import pathlib
import os
import copy
import numpy as np
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [2]:
import tensorflow.keras as keras
from keras.backend import clear_session
import gc

# Reset Keras Session
def reset_keras():
    clear_session()
    print(gc.collect())
    print(gc.collect())

Using TensorFlow backend.


In [3]:
import glob

def remove_logs(log_dir):
    files = glob.glob(log_dir)
    for f in files:
        os.remove(f)

In [4]:
DIR_PATH = pathlib.Path(
    'C:\\Users\\lucas\\Documents\\Masters\\data\\kvasir-dataset-v2'
)

ds = preprocessing.load_dataset_images(DIR_PATH, 224, 224)

for image, label in ds.take(8):
    print("Image shape:", image.numpy().shape)
    print('Label:', label.numpy())

del image, label

train_ds, val_ds, test_ds = preprocessing.train_val_test_split(
    ds,
    0.7,
    0.15
)

cachefile = "C:\\Users\\lucas\\Documents\\Masters\\cache\\kvasir"

train_ds = preprocessing.prepare_for_model_use(
    train_ds,
    cache=cachefile+'_train',
    prefetch=True
)
val_ds = preprocessing.prepare_for_model_use(
    val_ds,
    cache=cachefile+'_val',
    shuffle=False,
    repeat=False
)
test_ds = preprocessing.prepare_for_model_use(
    test_ds,
    cache=False,
    shuffle=False,
    prefetch=False,
    repeat=False
)

Image shape: (224, 224, 3)
Label: [ True False False False False False False False]
Image shape: (224, 224, 3)
Label: [False  True False False False False False False]
Image shape: (224, 224, 3)
Label: [False False  True False False False False False]
Image shape: (224, 224, 3)
Label: [False False False  True False False False False]
Image shape: (224, 224, 3)
Label: [False False False False  True False False False]
Image shape: (224, 224, 3)
Label: [False False False False False  True False False]
Image shape: (224, 224, 3)
Label: [False False False False False False  True False]
Image shape: (224, 224, 3)
Label: [False False False False False False False  True]


In [6]:
ind = 14

In [7]:
ind += 1
np.random.seed(2020)
vgg16 = tf.keras.applications.VGG16(weights=None, classes=8)

In [8]:
vgg16.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [9]:
adam = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.8, beta_2=0.9)
sgd = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.5, nesterov=True)
vgg16.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
earl_stop = tf.keras.callbacks.EarlyStopping(patience=7, restore_best_weights=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=f"./logs/model_{ind}")

In [11]:
reset_keras()
if not os.path.isfile('./weights/my_weights.index'):
    hist = vgg16.fit(train_ds, epochs=200, validation_data=val_ds, steps_per_epoch=175, validation_steps=35, callbacks=[tensorboard_callback, earl_stop], use_multiprocessing=True)

6432
0
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200


In [None]:
import copy
def estimate_node_importance(model, tester_model, layer_sizes, tol_low, tol_high, data, folder_path):
    
    l, a = model.evaluate(data, verbose=0, steps=38)
    or_weights = model.get_weights()
    weight_len = len(or_weights) - 3
    amounts = []
    places = []
    
    if folder_path:
        # Retriving Zero Nodes
        try:
            r = open(folder_path+'/zeros.txt', 'r')
            temp_z = [list(map(int,line.split())) for line in r.readlines()]
            r.close()
        except:
            print('No Zero file')
            temp_remove = []
        # Retriving Important Nodes
        try:
            r = open(folder_path+'/important.txt', 'r')
            temp_imp = [list(map(int,line.split())) for line in r.readlines()]
        except:
            print("No Important File")
        # Retriving Worse Nodes
        try:
            r = open(folder_path+'/worse.txt', 'r+')
            temp_wr = [list(map(int,line.split())) for line in r.readlines()]
            r.close()
        except:
            print("No Worse File")
    
    for layer, size in enumerate(layer_sizes):
        
        if folder_path and layer < len(temp_z):
            z = temp_z[layer]
            wr = temp_wr[layer]
            imp = temp_imp[layer]
            num_zeros = len(z)
            num_worse = len(wr)
            num_important = len(imp)
            done = (num_zeros + num_worse + num_important)
        else:
            num_zeros, num_worse, num_important = (0, 0, 0)
            z = []
            wr = []
            imp = []
            done = 0
            
        print(f'Layer {len(layer_sizes)-layer}')
        for i in range(size):
            if i == 0:
                if done == size:
                    break
                else:
                    i += done
            w = copy.deepcopy(or_weights)
            w[weight_len - (2*layer+1)][...,i] = 0
            w[weight_len - 2*layer][i] = 0
            tester_model.set_weights(w)
            nl, na = tester_model.evaluate(data, verbose=0, steps=38)
            change = l - nl
            print(f'Node {i}: {change}')
            if change <= tol_high and change >= tol_low:
                num_zeros += 1
                z += [i]
                if folder_path:
                    f = open(folder_path+'/zero.txt', 'a+')
                    f.write(str(i)+" ")
                    f.close()
            elif change > 0:
                num_worse += 1
                wr += [i]
                if folder_path:
                    f = open(folder_path+'/worse.txt', 'a+')
                    f.write(str(i)+" ")
                    f.close()
            else:
                num_important += 1
                imp += [i]
                if folder_path:
                    f = open(folder_path+'/important.txt', 'a+')
                    f.write(str(i)+" ")
                    f.close()
        if folder_path:
            f = open(folder_path+'/zero.txt', 'a+')
            f.write("\n")
            f.close()
            f = open(folder_path+'/worse.txt', 'a+')
            f.write("\n")
            f.close()
            f = open(folder_path+'/important.txt', 'a+')
            f.write("\n")
            f.close()
        amounts.append((num_zeros, num_worse, num_important))
        places.append((z, wr, imp))
    
    return amounts, places

In [12]:
if not os.path.isfile('./weights/my_weights.index'):
    vgg16.save_weights('./weights/my_weights')
else:
    vgg16.load_weights('./weights/my_weights')

In [16]:
layer_sizes = [4096, 4096, 512, 512, 512, 512, 512, 512, 256, 256, 256, 128, 128, 64, 64]
vgg16_test = tf.keras.applications.VGG16(weights=None, classes=8)
vgg16_test.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
th = 1e-5
tl = -1e-5

In [None]:
th = 1e-5
tl = -1e-5
reset_keras()
amounts, places = estimate_node_importance(vgg16, vgg16_test, layer_sizes, tl, th, val_ds)

In [None]:
for i, (nz, nw, ni) in enumerate(reversed(amounts)):
    print(f'######### LAYER {i} #########')
    print("Zero Nodes:", nz)
    print("Worse Nodes:", nw)
    print("Important Nodes:", ni)

In [63]:
def node_pruning(model, tester_model, data, layer_sizes, tol, ignore_cutoff, method='exhaustive', folder_path=None):

    loss, acc = model.evaluate(data, verbose=0, steps=38)
    original = model.get_weights()
    weight_len = len(original) - 3
    bas = [acc]
    bls = [loss]
    best_weights = model.get_weights()
    best_acc = 0
    best_loss = 1e20
    ol = loss
    oa = acc
    amounts = []
    places = []
    
    if folder_path:
        # Retriving Nodes removed
        try:
            r = open(folder_path+'/removed.txt', 'r')
            temp_remove = [list(map(int,line.split())) for line in r.readlines()]
            r.close()
        except:
            print('No node removal file')
            temp_remove = []
        # Retriving loss
        try:
            r = open(folder_path+'/loss.txt', 'r')
            bls = list(map(np.double,r.readline().split()))
            ol = bls[-1]
            r.close()
        except:
            print("No Loss File")
            f_loss = open(folder_path+'/loss.txt', "a+")
            bls = [loss]
            f_loss.write(str(loss)+' ')
            f_loss.close()
        # Retriving accuracy
        try:
            r = open(folder_path+'/accuracy.txt', 'r+')
            bas = r.readline().split()
            bas = list(map(np.double,bas))
            oa = bls[-1]
            r.close()
        except:
            print("No Accuracy File")
            f_acc = open(folder_path+'/accuracy.txt', "a+")
            bas = [acc]
            f_acc.write(str(acc)+' ')
            f_acc.close()
        # Retriving best weights
        try:
            tester_model.load_weights(folder_path+'/weights')
            best_weights = tester_model.get_weights()
        except:
            print('No weights found')
        

    for layer, size in enumerate(layer_sizes):
        if folder_path and layer < len(temp_remove):
            nodes_removed = temp_remove[layer]
            num_removed = len(nodes_removed)
            if layer + 1 < len(temp_remove):
                end_not_reached = False
            else:
                end_not_reached = True
        else:
            num_removed = 0
            nodes_removed = []
            end_not_reached = True
        print(f'Starting removal in Layer {len(layer_sizes) - layer}')
        if method == 'exhaustive':
            current_pos = 0
            best_change = tol
            best_pos = -1
            improved = False
            while end_not_reached or improved:
                if not(end_not_reached):
                    end_not_reached = True
                    improved = False
                    current_pos = 0
                    size -= 1
                    nodes_removed += [best_pos]
                    best_weights[weight_len - (2*layer+1)][...,best_pos] = 0
                    best_weights[weight_len - 2*layer][best_pos] = 0
                    best_pos = -1
                    ol = best_loss
                    oa = best_acc
                    bas += [best_acc]
                    bls += [best_loss]
                    best_change = tol
                    num_removed += 1
                if current_pos in nodes_removed:
                    current_pos += 1
                    if current_pos - num_removed >= size:
                        end_not_reached = False
                    continue
                w = copy.deepcopy(best_weights)
                w[weight_len - (2*layer+1)][...,current_pos] = 0
                w[weight_len - 2*layer][current_pos] = 0
                tester_model.set_weights(w)
                nl, na = tester_model.evaluate(data, verbose=0, steps=38)
                if ol - nl >= best_change:
                    best_change = ol - nl
                    best_pos = current_pos
                    improved = True
                    best_acc = na
                    best_loss = nl
                current_pos += 1
                if current_pos - num_removed >= size:
                    end_not_reached = False
        elif method == 'greedy':
            nodes_to_estimate = list(np.arange(size))
            if folder_path and layer < len(temp_remove):
                for i in nodes_removed:
                    nodes_to_estimate.remove(i)
            current_pos = nodes_to_estimate[0]
            idx = 0
            while end_not_reached:
                w = copy.deepcopy(best_weights)
                w[weight_len - (2*layer+1)][...,current_pos] = 0
                w[weight_len - 2*layer][current_pos] = 0
                tester_model.set_weights(w)
                nl, na = tester_model.evaluate(data, verbose=0, steps=38)
                print(f'Node {current_pos}: {ol - nl}')
                if ol - nl >= tol:
                    oa = na
                    ol = nl
                    size -= 1
                    nodes_removed += [current_pos]
                    nodes_to_estimate.remove(current_pos)
                    best_weights[weight_len - (2*layer+1)][..., current_pos] = 0
                    best_weights[weight_len - 2*layer][current_pos] = 0
                    bas += [oa]
                    bls += [ol]
                    num_removed += 1
                    idx = 0
                    if folder_path:
                        f_rem = open(folder_path+'/removed.txt', "a+")
                        f_loss = open(folder_path+'/loss.txt', "a+")
                        f_acc = open(folder_path+'/accuracy.txt', "a+")
                        
                        f_rem.write(str(current_pos) + ' ')
                        f_loss.write(str(ol) + ' ')
                        f_acc.write(str(oa) + ' ')
                        
                        f_rem.close()
                        f_loss.close()
                        f_acc.close()
                        
                        tester_model.save_weights(folder_path+'/weights')
                elif ol - nl <= ignore_cutoff:
                    size -= 1
                    nodes_to_estimate.remove(current_pos)
                else:
                    idx += 1
                if idx >= size:
                    end_not_reached = False
                    if folder_path:
                        f_rem = open(folder_path+'/removed.txt', "a+")
                        f_rem.write('\n')
                        f_rem.close()
                else:
                    current_pos = nodes_to_estimate[idx]
        print(f'Removed {num_removed} nodes for Layer {len(layer_sizes) - layer}')
        amounts.append(num_removed)
        places.append(nodes_removed)
    
    if folder_path:
        f_rem.close()
        f_loss.close()
        f_acc.close()
    
    return best_weights, bas, bls, amounts, places

In [64]:
#reset_keras()
best_weights, bas, bls, amounts, places = node_pruning(vgg16, vgg16_test, val_ds, layer_sizes, tl, -3e-4, method='greedy', folder_path='.\Pruning')

No node removal file
No Accuracy File
No weights found
Starting removal in Layer 15
Node 0: -0.0007110238075256348
Node 1: -4.079937934875488e-05
Node 2: 8.526444435119629e-05
Node 1: -3.910064697265625e-05
Node 3: 1.9073486328125e-06
Node 1: -3.8951635360717773e-05
Node 4: 0.00017723441123962402
Node 1: -3.933906555175781e-05
Node 5: 0.00034439563751220703
Node 1: -3.457069396972656e-05
Node 6: -7.346272468566895e-05
Node 7: 0.001059114933013916
Node 1: -2.5779008865356445e-05
Node 6: -6.729364395141602e-05
Node 8: 0.0006658434867858887
Node 1: -2.321600914001465e-05
Node 6: -6.559491157531738e-05
Node 9: 8.103251457214355e-05
Node 1: -1.4901161193847656e-06
Node 6: -8.627772331237793e-05
Node 10: -0.00012156367301940918
Node 11: -0.00015014410018920898
Node 12: -0.0006375908851623535
Node 13: -0.0003071129322052002
Node 14: 0.00039443373680114746
Node 6: -8.040666580200195e-05
Node 10: -0.00012120604515075684
Node 11: -0.0001475214958190918
Node 15: -0.000576704740524292
Node 16: 4.4

KeyboardInterrupt: 