# Chapter 4.9

## Importing Libraries

In [1]:
from __future__ import absolute_import, division, print_function
from __future__ import unicode_literals

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import copy
import tqdm
from hfunc import models
from hfunc import metrics
import time

## Self-created functions

In [3]:
def estimate_node_importance(model, tester_model, layer_sizes, tol_low, tol_high, x, y, back=True, forward=True, single=False, layer=None):
    
    l, a = model.evaluate(x, y, verbose=0, batch_size=256)
    or_weights = model.get_weights()
    num_zeros, num_worse, num_important = (0, 0, 0)
    z = []
    wr = []
    imp = []
    amounts = []
    places = []
    if single:
        size = layer_sizes[layer]
        num_zeros, num_worse, num_important = (0, 0, 0)
        z = []
        wr = []
        imp = []
        for i in range(size):
            w = copy.deepcopy(or_weights)
            if back:
                w[2*layer][...,i] = 0
            w[2*layer+1][i] = 0
            if forward:
                w[2*layer+2][...,i,:] = 0
            tester_model.set_weights(w)
            nl, na = tester_model.evaluate(x, y, verbose=0, batch_size=256)
            change = l - nl
            if change <= tol_high and change >= tol_low:
                num_zeros += 1
                z += [i]
            elif change > 0:
                num_worse += 1
                wr += [i]
            else:
                num_important += 1
                imp += [i]
        amounts.append((num_zeros, num_worse, num_important))
        places.append((z, wr, imp))
        
        return amounts[0], places[0]
    else:
        for layer, size in enumerate(layer_sizes):
            num_zeros, num_worse, num_important = (0, 0, 0)
            z = []
            wr = []
            imp = []
            for i in range(size):
                w = copy.deepcopy(or_weights)
                if back:
                    w[2*layer][...,i] = 0
                w[2*layer+1][i] = 0
                if forward:
                    w[2*layer+2][...,i,:] = 0
                tester_model.set_weights(w)
                nl, na = tester_model.evaluate(x, y, verbose=0, batch_size=256)
                change = l - nl
                if change <= tol_high and change >= tol_low:
                    num_zeros += 1
                    z += [i]
                elif change > 0:
                    num_worse += 1
                    wr += [i]
                else:
                    num_important += 1
                    imp += [i]
            amounts.append((num_zeros, num_worse, num_important))
            places.append((z, wr, imp))
    
        return amounts, places

In [17]:
def optimize_weights(model, tester_model, layer_sizes, filter_sizes, tol_low, tol_high, x, y, input_size, output_size, min_imp_percentage=0.9, increasing=False, input_nodes=False):
    
    or_min_imp_percentage = min_imp_percentage
    
    for l, size in enumerate(layer_sizes):
        num_iter = 0
        min_imp_percentage = or_min_imp_percentage
        fil = filter_sizes[l]
        if l < len(layer_sizes) - 1:
            fil_out = filter_sizes[l+1]
        else:
            fil_out = None
        #print(fil_out)
        if l == 0:
            #print('First time')
            tmp_a, tmp_p = estimate_node_importance(model, tester_model, layer_sizes, low_tol, high_tol, x_val, y_val, layer=l, single=True)
        else:
            tmp_a, tmp_p = estimate_node_importance(model, tester_model, layer_sizes, low_tol, high_tol, x_val, y_val, back=False, layer=l, single=True)

        imp_z_ratio = (tmp_a[2] + tmp_a[0]) / layer_sizes[l]
        imp_ratio = (tmp_a[2]) / layer_sizes[l]
        #print(imp_z_ratio)
        #print(imp_ratio)
        #print('#####################')
        
        if l < len(layer_sizes) - 1:
            next_size = layer_sizes[l+1]
        else:
            next_size = output_size
            
        while imp_ratio <= min_imp_percentage:
            
            if num_iter == 50:
                min_imp_percentage -= 0.02
            
            w = model.get_weights()
            weight_len = len(w) - 3
            if fil:
                if fil_out:
                    limit2 = np.sqrt(6 / ((size*fil*fil)+(next_size*fil_out*fil_out)))
                else:
                    limit2 = np.sqrt(6 / ((size*fil*fil)+(next_size)))
            else:
                limit2 = np.sqrt(6 / ((size)+(next_size)))
            if l == 0:
                if input_nodes:
                    limit1 = np.sqrt(6 / (input_nodes+size))
                else:
                    limit1 = np.sqrt(6 / (input_size+size))
                

            if tmp_a[1]:
                
                if fil_out:
                    size_out = (fil_out, fil_out, tmp_a[1], next_size)
                else:
                    size_out = (tmp_a[1], next_size)
                
                w[2*l+2][..., tmp_p[1], :] = list(np.random.uniform(-limit2, limit2, size_out))
                if l == 0:
                    if fil:
                        size_in = (fil, fil, input_size, tmp_a[1])
                    else:
                        size_in = (input_size, tmp_a[1])
                    w[2*l][..., tmp_p[1]] = list(np.random.uniform(-limit1, limit1, size_in))

            if tmp_a[0]:
                
                if fil_out:
                    size_out = (fil_out, fil_out, tmp_a[0], next_size)
                else:
                    size_out = (tmp_a[0], next_size)
                    
                w[2*l+2][..., tmp_p[0], :] = list(np.random.uniform(-limit2, limit2, size_out))
                if l == 0:
                    if fil:
                        size_in = (fil, fil, input_size, tmp_a[0])
                    else:
                        size_in = (input_size, tmp_a[0])
                    w[2*l][..., tmp_p[0]] = list(np.random.uniform(-limit1, limit1, size_in))
            
            model.set_weights(w)
            if l == 0:
                tmp_a, tmp_p = estimate_node_importance(model, tester_model, layer_sizes, low_tol, high_tol, x_val, y_val, layer=l, single=True)
            else:
                tmp_a, tmp_p = estimate_node_importance(model, tester_model, layer_sizes, low_tol, high_tol, x_val, y_val, back=False, layer=l, single=True)
            imp_z_ratio = (tmp_a[2] + tmp_a[0]) / layer_sizes[l]
            imp_ratio = (tmp_a[2]) / layer_sizes[l]
            #print(imp_z_ratio)
            #print(imp_ratio)
            #print('#####################')
            if increasing:
                min_imp_percentage += increasing
            num_iter += 1

## Single-layer ANN

### MNIST

In [5]:
np.random.seed(2020)

In [6]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Converting interger values to floats (0 to 1)

In [7]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.85, stratify=None)

In [8]:
tester_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
tester_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [9]:
trials = 25
low_tol = -1e-5
high_tol = 1e-5
filter_sizes = [None]
layer_sizes = [128]
accs_opt = np.zeros((trials))
losses_opt = np.zeros((trials))
accs = np.zeros((trials))
losses = np.zeros((trials))
for trial in tqdm.trange(trials):
    
    model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    ) 
    
    w = model.get_weights()
    tester_model.set_weights(w)
    tester_model.fit(x_train, y_train, verbose=0, epochs=5)
    losses[trial], accs[trial] = tester_model.evaluate(x_test, y_test, verbose=0)
    
    optimize_weights(model, tester_model, layer_sizes, filter_sizes, low_tol, high_tol, x_val, y_val, 784, 10)
    
    model.fit(x_train, y_train, verbose=0, epochs=5)
    losses_opt[trial], accs_opt[trial] = model.evaluate(x_test, y_test, verbose=0)

  0%|                                                                                           | 0/25 [00:00<?, ?it/s]

None


  4%|███▎                                                                               | 1/25 [01:33<37:13, 93.07s/it]

None


  8%|██████▋                                                                            | 2/25 [02:39<32:34, 84.97s/it]

None


 12%|█████████▉                                                                         | 3/25 [03:45<29:05, 79.36s/it]

None


 16%|█████████████▎                                                                     | 4/25 [05:00<27:17, 77.97s/it]

None


 20%|████████████████▌                                                                  | 5/25 [06:23<26:30, 79.54s/it]

None


 24%|███████████████████▉                                                               | 6/25 [07:54<26:19, 83.14s/it]

None


 28%|███████████████████████▏                                                           | 7/25 [09:00<23:22, 77.90s/it]

None


 32%|██████████████████████████▌                                                        | 8/25 [10:06<21:04, 74.37s/it]

None


 36%|█████████████████████████████▉                                                     | 9/25 [11:21<19:51, 74.45s/it]

None


 40%|████████████████████████████████▊                                                 | 10/25 [12:52<19:53, 79.60s/it]

None


 44%|████████████████████████████████████                                              | 11/25 [14:15<18:48, 80.58s/it]

None


 48%|███████████████████████████████████████▎                                          | 12/25 [15:56<18:44, 86.48s/it]

None


 52%|██████████████████████████████████████████▋                                       | 13/25 [17:27<17:35, 87.94s/it]

None


 56%|█████████████████████████████████████████████▉                                    | 14/25 [18:33<14:53, 81.26s/it]

None


 60%|█████████████████████████████████████████████████▏                                | 15/25 [19:47<13:11, 79.11s/it]

None


 64%|████████████████████████████████████████████████████▍                             | 16/25 [21:09<12:00, 80.06s/it]

None


 68%|███████████████████████████████████████████████████████▊                          | 17/25 [22:40<11:06, 83.34s/it]

None


 72%|███████████████████████████████████████████████████████████                       | 18/25 [24:03<09:41, 83.13s/it]

None


 76%|██████████████████████████████████████████████████████████████▎                   | 19/25 [25:34<08:33, 85.57s/it]

None


 80%|█████████████████████████████████████████████████████████████████▌                | 20/25 [27:22<07:41, 92.36s/it]

None


 84%|████████████████████████████████████████████████████████████████████▉             | 21/25 [28:36<05:47, 86.98s/it]

None


 88%|████████████████████████████████████████████████████████████████████████▏         | 22/25 [29:59<04:16, 85.64s/it]

None


 92%|███████████████████████████████████████████████████████████████████████████▍      | 23/25 [31:22<02:49, 84.96s/it]

None


 96%|██████████████████████████████████████████████████████████████████████████████▋   | 24/25 [32:46<01:24, 84.43s/it]

None


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [34:09<00:00, 81.99s/it]


In [15]:
A = pd.DataFrame(np.array([accs, accs_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
A.to_csv('../../../results/acc_ANN_iter_weights_mnist.csv')
A.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,25.0,25.0
mean,0.975436,0.975128
std,0.001496,0.001529
min,0.9727,0.9724
25%,0.9743,0.9737
50%,0.9752,0.9756
75%,0.9768,0.9765
max,0.9774,0.9773


In [16]:
L = pd.DataFrame(np.array([losses, losses_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
L.to_csv('../../../results/loss_ANN_iter_weights_mnist.csv')
L.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,25.0,25.0
mean,0.079142,0.081441
std,0.004107,0.005087
min,0.071206,0.073149
25%,0.076419,0.076339
50%,0.078328,0.081898
75%,0.081304,0.084012
max,0.086604,0.092926


### Fashion MNIST

In [23]:
np.random.seed(2020)

In [24]:
fmnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fmnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Converting interger values to floats (0 to 1)

In [25]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.85, stratify=None)

In [26]:
tester_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
tester_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [27]:
trials = 25
low_tol = -1e-5
high_tol = 1e-5
filter_sizes = [None]
layer_sizes = [128]
accs_opt = np.zeros((trials))
losses_opt = np.zeros((trials))
accs = np.zeros((trials))
losses = np.zeros((trials))
for trial in tqdm.trange(trials):
    
    model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    w = model.get_weights()
    tester_model.set_weights(w)
    tester_model.fit(x_train, y_train, verbose=0, epochs=5)
    losses[trial], accs[trial] = tester_model.evaluate(x_test, y_test, verbose=0)
    
    optimize_weights(model, tester_model, layer_sizes, filter_sizes, low_tol, high_tol, x_val, y_val, 784, 10)
    
    model.fit(x_train, y_train, verbose=0, epochs=5)
    losses_opt[trial], accs_opt[trial] = model.evaluate(x_test, y_test, verbose=0)

100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [37:18<00:00, 89.54s/it]


In [28]:
A = pd.DataFrame(np.array([accs, accs_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
A.to_csv('../../../results/acc_ANN_iter_weights_fmnist.csv')
A.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,25.0,25.0
mean,0.868888,0.869064
std,0.006007,0.006144
min,0.8571,0.8562
25%,0.8643,0.8669
50%,0.8691,0.8698
75%,0.8745,0.8731
max,0.8789,0.8782


In [29]:
L = pd.DataFrame(np.array([losses, losses_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
L.to_csv('../../../results/loss_ANN_iter_weights_fmnist.csv')
L.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,25.0,25.0
mean,0.364696,0.362768
std,0.014066,0.016294
min,0.342609,0.341724
25%,0.357195,0.350479
50%,0.361867,0.359648
75%,0.371032,0.366726
max,0.402705,0.4062


## Multi-layer Perceptron

### MNIST

In [5]:
np.random.seed(2020)

In [6]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Converting interger values to floats (0 to 1)

In [7]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.85, stratify=None)

In [8]:
tester_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
tester_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [9]:
trials = 25
low_tol = -1e-5
high_tol = 1e-5
filter_sizes = [None, None, None]
layer_sizes = [128, 64, 32]
accs_opt = np.zeros((trials))
losses_opt = np.zeros((trials))
accs = np.zeros((trials))
losses = np.zeros((trials))
for trial in tqdm.trange(trials):
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    w = model.get_weights()
    tester_model.set_weights(w)
    tester_model.fit(x_train, y_train, verbose=0, epochs=5)
    losses[trial], accs[trial] = tester_model.evaluate(x_test, y_test, verbose=0)
    
    optimize_weights(model, tester_model, layer_sizes, filter_sizes, low_tol, high_tol, x_val, y_val, 784, 10)
    
    model.fit(x_train, y_train, verbose=0, epochs=5)
    losses_opt[trial], accs_opt[trial] = model.evaluate(x_test, y_test, verbose=0)

100%|███████████████████████████████████████████████████████████████████████████████| 25/25 [1:56:08<00:00, 278.75s/it]


In [10]:
A = pd.DataFrame(np.array([accs, accs_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
A.to_csv('../../../results/acc_MLP_iter_weights_mnist.csv')
A.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,25.0,25.0
mean,0.974096,0.973632
std,0.001981,0.002461
min,0.9685,0.9673
25%,0.9734,0.9726
50%,0.9743,0.9741
75%,0.9757,0.9748
max,0.9773,0.9778


In [11]:
L = pd.DataFrame(np.array([losses, losses_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
L.to_csv('../../../results/loss_MLP_iter_weights_mnist.csv')
L.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,25.0,25.0
mean,0.09075,0.090004
std,0.007703,0.009228
min,0.077841,0.076328
25%,0.086862,0.084064
50%,0.09064,0.089402
75%,0.095472,0.093296
max,0.108661,0.117765


### Fashion MNIST

In [12]:
np.random.seed(2020)

In [13]:
fmnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fmnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Converting interger values to floats (0 to 1)

In [14]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.85, stratify=None)

In [15]:
tester_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
tester_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [16]:
trials = 25
low_tol = -1e-5
high_tol = 1e-5
filter_sizes = [None, None, None]
layer_sizes = [128, 64, 32]
accs_opt = np.zeros((trials))
losses_opt = np.zeros((trials))
accs = np.zeros((trials))
losses = np.zeros((trials))
for trial in tqdm.trange(trials):
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    w = model.get_weights()
    tester_model.set_weights(w)
    tester_model.fit(x_train, y_train, verbose=0, epochs=5)
    losses[trial], accs[trial] = tester_model.evaluate(x_test, y_test, verbose=0)
    
    optimize_weights(model, tester_model, layer_sizes, filter_sizes, low_tol, high_tol, x_val, y_val, 784, 10)
    
    model.fit(x_train, y_train, verbose=0, epochs=5)
    losses_opt[trial], accs_opt[trial] = model.evaluate(x_test, y_test, verbose=0)

100%|███████████████████████████████████████████████████████████████████████████████| 25/25 [3:37:47<00:00, 522.72s/it]


In [17]:
A = pd.DataFrame(np.array([accs, accs_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
A.to_csv('../../../results/acc_MLP_iter_weights_fmnist.csv')
A.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,25.0,25.0
mean,0.868784,0.869532
std,0.004377,0.0064
min,0.8592,0.8505
25%,0.8655,0.8674
50%,0.8693,0.8705
75%,0.8716,0.8743
max,0.8759,0.8782


In [18]:
L = pd.DataFrame(np.array([losses, losses_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
L.to_csv('../../../results/loss_MLP_iter_weights_fmnist.csv')
L.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,25.0,25.0
mean,0.367025,0.364099
std,0.011535,0.016962
min,0.347072,0.341189
25%,0.358815,0.355939
50%,0.366326,0.36181
75%,0.372493,0.368561
max,0.391896,0.426484


## Convolutional Neural Network

### MNIST

In [11]:
np.random.seed(2020)

In [12]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train[..., np.newaxis], x_test[..., np.newaxis]
x_train, x_test = x_train / 255.0, x_test / 255.0  # Converting interger values to floats (0 to 1)

In [13]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.85, stratify=None)

In [14]:
tester_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
tester_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [15]:
trials = 10
low_tol = -1e-5
high_tol = 1e-5
filter_sizes = [3, 3, 3, 3, None]
layer_sizes = [32, 64, 128, 256, 64]
accs_opt = np.zeros((trials))
losses_opt = np.zeros((trials))
accs = np.zeros((trials))
losses = np.zeros((trials))
for trial in tqdm.trange(trials):
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
        ])
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    w = model.get_weights()
    tester_model.set_weights(w)
    tester_model.fit(x_train, y_train, verbose=0, epochs=5)
    losses[trial], accs[trial] = tester_model.evaluate(x_test, y_test, verbose=0)
    
    optimize_weights(model, tester_model, layer_sizes, filter_sizes, low_tol, high_tol, x_val, y_val, 1, 10, increasing=False, min_imp_percentage=0.6, input_nodes=784)
    
    model.fit(x_train, y_train, verbose=0, epochs=5)
    losses_opt[trial], accs_opt[trial] = model.evaluate(x_test, y_test, verbose=0)

100%|███████████████████████████████████████████████████████████████████████████████| 10/10 [1:43:26<00:00, 620.69s/it]


In [16]:
A = pd.DataFrame(np.array([accs, accs_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
A.to_csv('../../../results/acc_CNN_iter_weights_mnist.csv')
A.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,10.0,10.0
mean,0.99059,0.98987
std,0.001165,0.001374
min,0.9889,0.9881
25%,0.9896,0.988525
50%,0.99085,0.9901
75%,0.9909,0.99065
max,0.9927,0.9918


In [17]:
L = pd.DataFrame(np.array([losses, losses_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
L.to_csv('../../../results/loss_CNN_iter_weights_mnist.csv')
L.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,10.0,10.0
mean,0.031711,0.037217
std,0.003131,0.005522
min,0.026262,0.027868
25%,0.029747,0.033312
50%,0.032108,0.037163
75%,0.033612,0.041179
max,0.037126,0.045922


### Fashion MNIST

In [5]:
np.random.seed(2020)

In [6]:
fmnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fmnist.load_data()
x_train, x_test = x_train[..., np.newaxis], x_test[..., np.newaxis]
x_train, x_test = x_train / 255.0, x_test / 255.0  # Converting interger values to floats (0 to 1)

In [7]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.85, stratify=None)

In [8]:
tester_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
tester_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [9]:
trials = 10
low_tol = -1e-5
high_tol = 1e-5
filter_sizes = [3, 3, 3, 3, None]
layer_sizes = [32, 64, 128, 256, 64]
accs_opt = np.zeros((trials))
losses_opt = np.zeros((trials))
accs = np.zeros((trials))
losses = np.zeros((trials))
for trial in tqdm.trange(trials):
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
        ])
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    w = model.get_weights()
    tester_model.set_weights(w)
    tester_model.fit(x_train, y_train, verbose=0, epochs=5)
    losses[trial], accs[trial] = tester_model.evaluate(x_test, y_test, verbose=0)
    
    optimize_weights(model, tester_model, layer_sizes, filter_sizes, low_tol, high_tol, x_val, y_val, 1, 10, increasing=False, min_imp_percentage=0.6, input_nodes=784)
    
    model.fit(x_train, y_train, verbose=0, epochs=5)
    losses_opt[trial], accs_opt[trial] = model.evaluate(x_test, y_test, verbose=0)

100%|███████████████████████████████████████████████████████████████████████████████| 10/10 [2:10:28<00:00, 782.83s/it]


In [10]:
A = pd.DataFrame(np.array([accs, accs_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
A.to_csv('../../../results/acc_CNN_iter_weights_fmnist.csv')
A.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,10.0,10.0
mean,0.91095,0.91098
std,0.00302,0.002849
min,0.905,0.9069
25%,0.908875,0.90915
50%,0.9112,0.9107
75%,0.91355,0.912475
max,0.9143,0.9168


In [11]:
L = pd.DataFrame(np.array([losses, losses_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
L.to_csv('../../../results/loss_CNN_iter_weights_fmnist.csv')
L.describe()

Unnamed: 0,Unoptimized Weights,Optimized Weights
count,10.0,10.0
mean,0.255865,0.255596
std,0.007882,0.006449
min,0.243792,0.246551
25%,0.249812,0.250484
50%,0.255222,0.256682
75%,0.263459,0.258626
max,0.266264,0.26719


### CIFAR 10

In [35]:
np.random.seed(2020)

In [36]:
cifar = tf.keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Converting interger values to floats (0 to 1)

In [37]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.85, stratify=None)

In [38]:
tester_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
tester_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [39]:
trials = 5
low_tol = -1e-5
high_tol = 1e-5
filter_sizes = [3, 3, 3, 3, None]
layer_sizes = [32, 64, 128, 256, 64]
accs_opt = np.zeros((trials))
losses_opt = np.zeros((trials))
accs = np.zeros((trials))
losses = np.zeros((trials))
for trial in tqdm.trange(trials):
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(32, 32, 3)),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
        ])
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    w = model.get_weights()
    tester_model.set_weights(w)
    tester_model.fit(x_train, y_train, verbose=0, epochs=5)
    losses[trial], accs[trial] = tester_model.evaluate(x_test, y_test, verbose=0)
    
    optimize_weights(model, tester_model, layer_sizes, filter_sizes, low_tol, high_tol, x_val, y_val, 3, 10, increasing=False, min_imp_percentage=0.4, input_nodes=3072)
    
    model.fit(x_train, y_train, verbose=0, epochs=5)
    losses_opt[trial], accs_opt[trial] = model.evaluate(x_test, y_test, verbose=0)

  0%|                                                                                            | 0/5 [14:55<?, ?it/s]


KeyboardInterrupt: 

In [None]:
A = pd.DataFrame(np.array([accs, accs_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
A.to_csv('../../../results/acc_CNN_iter_weights_cifar.csv')
A.describe()

In [None]:
L = pd.DataFrame(np.array([losses, losses_opt]).T, columns=['Unoptimized Weights', 'Optimized Weights'])
L.to_csv('../../../results/loss_CNN_iter_weights_cifar.csv')
L.describe()