In [1]:
from __future__ import absolute_import, division, print_function
from __future__ import unicode_literals

## Install Tensorflow

In [2]:
import tensorflow as tf
import numpy as np
import copy

## Load MNIST Dataset from the tensorflow datasets

In [3]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Converting interger values to floats (0 to 1)

## Building the NN model (in this case a simple ANN)

In [4]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

## Setting up the optimizer and loss

In [5]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Training the model

In [6]:
model.fit(x_train, y_train, epochs=10)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1ab2a37a0c8>

## Evaluating the model

In [7]:
loss, acc = model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 1s - loss: 0.0701 - accuracy: 0.9785


## Removing random number of nodes

In [182]:
n = 64
old = model.get_weights()

In [299]:
def remove_nodes(acc, loss, weights, n, to_test, x_train, y_train, v=0):
    check = 0
    new_loss = loss
    new_acc = acc
    best_score = 1e20
    best_model = copy.deepcopy(weights)
    while check < to_test:   
        new = copy.deepcopy(weights)
        to_drop = np.random.choice(len(new[1]), n, replace=False)
        for i in to_drop:
            new[0][:,i] = 0
            new[1][i] = 0
            new[2][i,:] = 0
        model.set_weights(new)
        new_loss, new_acc = model.evaluate(x_train, y_train, verbose=v)
        score = ((new_loss / loss) - 1) + ((new_acc / acc) - 1)
        if best_score > score:
            best_score = score
            best_model = copy.deepcopy(new)
            nodes_removed = to_drop.copy()
        check = check + 1
    return best_model, best_score, nodes_removed

In [150]:
check = 0
new_loss = loss
new_acc = acc
best_score = 1e20
best_model = copy.deepcopy(old)
while check < 100 and loss <= new_loss and acc >= new_acc:   
    new = copy.deepcopy(old)
    #for i in range(len(old)):
    #    new[i] = old[i].copy()
    to_drop = np.random.choice(len(new[1]), n, replace=False)
    for i in to_drop:
        new[0][:,i] = 0
        new[1][i] = 0
        new[2][i,:] = 0
    model.set_weights(new)
    new_loss, new_acc = model.evaluate(x_test, y_test, verbose=2)
    score = ((new_loss / loss) - 1) + ((new_acc / acc) - 1)
    if best_score > score:
        best_score = score
        best_model = copy.deepcopy(new)
    check = check + 1

10000/10000 - 0s - loss: 0.2113 - accuracy: 0.9519
10000/10000 - 0s - loss: 0.2117 - accuracy: 0.9511
10000/10000 - 0s - loss: 0.2398 - accuracy: 0.9410
10000/10000 - 0s - loss: 0.2234 - accuracy: 0.9451
10000/10000 - 0s - loss: 0.2492 - accuracy: 0.9394
10000/10000 - 0s - loss: 0.2221 - accuracy: 0.9494
10000/10000 - 0s - loss: 0.2384 - accuracy: 0.9431
10000/10000 - 0s - loss: 0.2517 - accuracy: 0.9285
10000/10000 - 0s - loss: 0.1937 - accuracy: 0.9526
10000/10000 - 0s - loss: 0.2069 - accuracy: 0.9509
10000/10000 - 0s - loss: 0.2534 - accuracy: 0.9355
10000/10000 - 0s - loss: 0.1823 - accuracy: 0.9541
10000/10000 - 0s - loss: 0.2461 - accuracy: 0.9292
10000/10000 - 0s - loss: 0.2586 - accuracy: 0.9285
10000/10000 - 0s - loss: 0.2883 - accuracy: 0.9278
10000/10000 - 0s - loss: 0.3213 - accuracy: 0.9015
10000/10000 - 0s - loss: 0.2768 - accuracy: 0.9268
10000/10000 - 0s - loss: 0.2958 - accuracy: 0.9192
10000/10000 - 0s - loss: 0.2661 - accuracy: 0.9284
10000/10000 - 0s - loss: 0.2104

In [151]:
best_model[1]

array([ 5.14561906e-02,  5.68875819e-02,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00, -1.59633637e-03, -1.32929876e-01, -5.46002425e-02,
       -5.79598919e-02, -7.24315643e-02,  4.81533911e-03,  0.00000000e+00,
       -1.66753039e-01,  0.00000000e+00,  0.00000000e+00,  1.76903591e-01,
        1.09480381e-01, -3.91760580e-02, -8.20288658e-02,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  6.64555505e-02,  1.73894558e-02,
       -6.39781430e-02,  0.00000000e+00,  1.12484261e-01, -8.43775086e-03,
        6.02599643e-02,  0.00000000e+00, -2.02100664e-01,  1.28891826e-01,
        0.00000000e+00,  2.36458983e-02,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  1.03695244e-01,  4.80066426e-02, -1.01917274e-02,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -8.61475617e-02,
       -5.66104650e-02,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
       -2.18478031e-02,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00, -

In [149]:
old[0][130]

array([-0.02772735,  0.03969655, -0.04373507, -0.05227876, -0.01540127,
        0.01939916, -0.12599927,  0.00782087, -0.28775257,  0.10967262,
        0.01176385,  0.01894165,  0.03409952,  0.09057614, -0.00560972,
        0.05311511, -0.10194276,  0.04806275, -0.0351391 , -0.06116132,
        0.15312247,  0.04763832, -0.13207886, -0.06956799,  0.12679482,
       -0.18160178, -0.14505222, -0.01961163, -0.09998371, -0.05521173,
        0.10041504, -0.05807279, -0.17420274,  0.13425428,  0.11885659,
        0.05859397,  0.05600267,  0.17821111, -0.01711084,  0.13274275,
        0.01539186,  0.0376355 ,  0.09195647,  0.05777685,  0.06526637,
       -0.06228716, -0.00629885, -0.07197394,  0.05084231, -0.08569249,
       -0.28437993, -0.06442408,  0.04789071, -0.05077734, -0.03325139,
        0.14767456, -0.07351412, -0.09289319, -0.22171189,  0.09691015,
        0.04114012, -0.21430951,  0.06850304,  0.07310192,  0.00652712,
        0.07554794,  0.11685141, -0.08168121,  0.08132137,  0.04

In [153]:
best_score = 0
best_model = copy.deepcopy(old)
to_test = 25
for i in range(1, 65):
    temp_model, temp_score = remove_nodes(acc, loss, old, i, to_test)
    if temp_score < best_score:
        best_model = temp_model
        best_score = temp_score
        print("Found new best model")

10000/10000 - 0s - loss: 0.0713 - accuracy: 0.9783
10000/10000 - 0s - loss: 0.0712 - accuracy: 0.9787
10000/10000 - 0s - loss: 0.0707 - accuracy: 0.9787
10000/10000 - 0s - loss: 0.0702 - accuracy: 0.9792
10000/10000 - 0s - loss: 0.0713 - accuracy: 0.9790
10000/10000 - 0s - loss: 0.0703 - accuracy: 0.9789
10000/10000 - 0s - loss: 0.0698 - accuracy: 0.9791
10000/10000 - 0s - loss: 0.0711 - accuracy: 0.9785
10000/10000 - 0s - loss: 0.0702 - accuracy: 0.9789
10000/10000 - 0s - loss: 0.0713 - accuracy: 0.9790
10000/10000 - 0s - loss: 0.0696 - accuracy: 0.9792
10000/10000 - 0s - loss: 0.0709 - accuracy: 0.9787
10000/10000 - 0s - loss: 0.0696 - accuracy: 0.9791
10000/10000 - 0s - loss: 0.0708 - accuracy: 0.9790
10000/10000 - 0s - loss: 0.0700 - accuracy: 0.9793
10000/10000 - 0s - loss: 0.0707 - accuracy: 0.9786
10000/10000 - 0s - loss: 0.0699 - accuracy: 0.9792
10000/10000 - 0s - loss: 0.0697 - accuracy: 0.9789
10000/10000 - 0s - loss: 0.0701 - accuracy: 0.9790
10000/10000 - 0s - loss: 0.0698

## Creating new restricted model

In [195]:
old = model.get_weights()

In [216]:
n = 2
best_weights, _, nodes_removed = remove_nodes(acc, loss, old, n, 50)

new_weights = [np.zeros((best_weights[0].shape[0], best_weights[0].shape[1] - n)), np.zeros((best_weights[1].shape[0] - n)), np.zeros((best_weights[2].shape[0] - n, best_weights[2].shape[1])), best_weights[3]]

j = 0
for i in range(len(best_weights[1])):
    if i not in nodes_removed:
        new_weights[0][:, j] = best_weights[0][:, i]
        new_weights[1][j] = best_weights[1][i]
        new_weights[2][j, :] = best_weights[2][i, :]
        j = j + 1
    
new_model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128 - n, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

new_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
new_model.set_weights(new_weights)

10000/10000 - 1s - loss: 0.0666 - accuracy: 0.9799
10000/10000 - 0s - loss: 0.0670 - accuracy: 0.9804
10000/10000 - 0s - loss: 0.0660 - accuracy: 0.9798
10000/10000 - 0s - loss: 0.0656 - accuracy: 0.9798
10000/10000 - 0s - loss: 0.0661 - accuracy: 0.9800
10000/10000 - 0s - loss: 0.0668 - accuracy: 0.9797
10000/10000 - 0s - loss: 0.0692 - accuracy: 0.9791
10000/10000 - 0s - loss: 0.0647 - accuracy: 0.9802
10000/10000 - 0s - loss: 0.0669 - accuracy: 0.9802
10000/10000 - 0s - loss: 0.0663 - accuracy: 0.9793
10000/10000 - 0s - loss: 0.0653 - accuracy: 0.9806
10000/10000 - 0s - loss: 0.0669 - accuracy: 0.9796
10000/10000 - 0s - loss: 0.0655 - accuracy: 0.9799
10000/10000 - 0s - loss: 0.0665 - accuracy: 0.9801
10000/10000 - 0s - loss: 0.0675 - accuracy: 0.9796
10000/10000 - 0s - loss: 0.0661 - accuracy: 0.9793
10000/10000 - 0s - loss: 0.0659 - accuracy: 0.9801
10000/10000 - 0s - loss: 0.0667 - accuracy: 0.9797
10000/10000 - 0s - loss: 0.0661 - accuracy: 0.9802
10000/10000 - 0s - loss: 0.0649

In [242]:
def shrink_model(model, shrinkage_factor, x_train, y_train, size, to_test, v=0):
    
    n = shrinkage_factor
    loss, acc = model.evaluate(x_train, y_train, verbose=2)
    old = model.get_weights()
    best_weights, _, nodes_removed = remove_nodes(acc, loss, old, n, to_test, x_train, y_train, v)

    new_weights = [np.zeros((best_weights[0].shape[0], best_weights[0].shape[1] - n)), np.zeros((best_weights[1].shape[0] - n)), np.zeros((best_weights[2].shape[0] - n, best_weights[2].shape[1])), best_weights[3]]

    j = 0
    for i in range(len(best_weights[1])):
        if i not in nodes_removed:
            new_weights[0][:, j] = best_weights[0][:, i]
            new_weights[1][j] = best_weights[1][i]
            new_weights[2][j, :] = best_weights[2][i, :]
            j = j + 1

    new_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(size - n, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    new_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    new_model.set_weights(new_weights)
    return new_model, size-n

In [217]:
new_model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 1s - loss: 0.0643 - accuracy: 0.9794


[0.06433191793382867, 0.9794]

In [218]:
new_model.fit(x_train, y_train, epochs=3)

Train on 60000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x2a950764408>

In [219]:
new_model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 0s - loss: 0.0758 - accuracy: 0.9802


[0.07577427882198827, 0.9802]

In [245]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
size = 128
to_test = 25
for _ in range(4):
    model.fit(x_train, y_train, epochs=1)
    model, size = shrink_model(model, 8, x_train, y_train, size, to_test)
    print(len(model.get_weights()[1]))
model.fit(x_train, y_train, epochs=1)
model.evaluate(x_test, y_test, verbose=2)

Train on 60000 samples
60000/60000 - 2s - loss: 0.1337 - accuracy: 0.9624
60000/60000 - 2s - loss: 0.1440 - accuracy: 0.9610
60000/60000 - 2s - loss: 0.1446 - accuracy: 0.9606
60000/60000 - 2s - loss: 0.1446 - accuracy: 0.9596
60000/60000 - 2s - loss: 0.1464 - accuracy: 0.9599
60000/60000 - 2s - loss: 0.1412 - accuracy: 0.9625
60000/60000 - 2s - loss: 0.1460 - accuracy: 0.9597
60000/60000 - 2s - loss: 0.1465 - accuracy: 0.9599
60000/60000 - 2s - loss: 0.1433 - accuracy: 0.9606
60000/60000 - 2s - loss: 0.1484 - accuracy: 0.9600
60000/60000 - 2s - loss: 0.1432 - accuracy: 0.9612
60000/60000 - 2s - loss: 0.1429 - accuracy: 0.9603
60000/60000 - 2s - loss: 0.1427 - accuracy: 0.9614
60000/60000 - 2s - loss: 0.1453 - accuracy: 0.9600
60000/60000 - 2s - loss: 0.1454 - accuracy: 0.9611
60000/60000 - 2s - loss: 0.1398 - accuracy: 0.9624
60000/60000 - 2s - loss: 0.1470 - accuracy: 0.9610
60000/60000 - 2s - loss: 0.1482 - accuracy: 0.9590
60000/60000 - 2s - loss: 0.1446 - accuracy: 0.9614
60000/60

[0.08368590022437275, 0.975]

In [8]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [272]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
rep = 6

best_models = []
sizes = []
scores = []
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
start_weights = copy.deepcopy(model.get_weights())
model.fit(x_train, y_train, epochs=7)
loss, acc = model.evaluate(x_test, y_test, verbose=2)
print("#############################")
best_models += [model]
scores += [(loss, acc)]
sizes +=[128]
for i in range(1, 16):
    print(f"Starting to shrinking the model by {i}")
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.set_weights(start_weights)
    size = 128
    to_test = 25
    for _ in range(rep):
        model.fit(x_train, y_train, epochs=1)
        model, size = shrink_model(model, i, x_train, y_train, size, to_test)
    model.fit(x_train, y_train, epochs=1)
    loss, acc = model.evaluate(x_test, y_test, verbose=2)
    print("#############################")
    best_models += [model]
    scores += [(loss, acc)]
    sizes +=[128-(i*rep)]

Train on 60000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
10000/10000 - 0s - loss: 0.7238 - accuracy: 0.7112
#############################
Starting to shrinking the model by 1
Train on 60000 samples
60000/60000 - 2s - loss: 0.9136 - accuracy: 0.6790
Train on 60000 samples
60000/60000 - 2s - loss: 0.8217 - accuracy: 0.6990
Train on 60000 samples
60000/60000 - 2s - loss: 0.7798 - accuracy: 0.7017
Train on 60000 samples
60000/60000 - 3s - loss: 0.7434 - accuracy: 0.7127
Train on 60000 samples
60000/60000 - 4s - loss: 0.6480 - accuracy: 0.7321
Train on 60000 samples
60000/60000 - 2s - loss: 0.8912 - accuracy: 0.6470
Train on 60000 samples
10000/10000 - 0s - loss: 0.7619 - accuracy: 0.7253
#############################
Starting to shrinking the model by 2
Train on 60000 samples
60000/60000 - 3s - loss: 0.8591 - accuracy: 0.6884
Train on 60000 samples
60000/60000 - 2s - loss: 0.7847 - accuracy: 0.6908
Train on 60000 samples
60000/60000 - 3s - loss: 0.6563 -

In [278]:
print(scores)
print(sizes)

[(0.7237960828781128, 0.7112), (0.7618597736358642, 0.7253), (0.673652571439743, 0.745), (0.7662316777706146, 0.7151), (0.7450256084918976, 0.7132), (0.6963049783706665, 0.7402), (0.7658660774230956, 0.7014), (0.7515249763488769, 0.7133), (0.7926195541381836, 0.6945), (0.709289279460907, 0.7216), (0.7170024285316468, 0.7155), (0.8477518383979797, 0.6852), (0.8180793400764466, 0.6878), (0.7546764973640442, 0.708), (0.7301193749427796, 0.7202), (0.7619338217735291, 0.7205)]
[128, 122, 116, 110, 104, 98, 92, 86, 80, 74, 68, 62, 56, 50, 44, 38]


In [279]:
scores_plain = [scores[0]]
for i in range(1, len(scores)):
    print(f"Starting plain train of Dense size {sizes[i]}")
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(sizes[i], activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=7)
    loss, acc = model.evaluate(x_test, y_test, verbose=2)
    scores_plain += [(loss, acc)]
    print("###############################")

Starting plain train of Dense size 122
Train on 60000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
10000/10000 - 1s - loss: 0.6936 - accuracy: 0.7615
###############################
Starting plain train of Dense size 116
Train on 60000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
10000/10000 - 1s - loss: 0.6456 - accuracy: 0.7546
###############################
Starting plain train of Dense size 110
Train on 60000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
10000/10000 - 1s - loss: 0.6715 - accuracy: 0.7370
###############################
Starting plain train of Dense size 104
Train on 60000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
10000/10000 - 1s - loss: 0.7064 - accuracy: 0.7583
###############################
Starting plain train of Dense size 98
Train on 60000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
10000/100

In [280]:
print(scores)
print(scores_plain)

[(0.7237960828781128, 0.7112), (0.7618597736358642, 0.7253), (0.673652571439743, 0.745), (0.7662316777706146, 0.7151), (0.7450256084918976, 0.7132), (0.6963049783706665, 0.7402), (0.7658660774230956, 0.7014), (0.7515249763488769, 0.7133), (0.7926195541381836, 0.6945), (0.709289279460907, 0.7216), (0.7170024285316468, 0.7155), (0.8477518383979797, 0.6852), (0.8180793400764466, 0.6878), (0.7546764973640442, 0.708), (0.7301193749427796, 0.7202), (0.7619338217735291, 0.7205)]
[(0.7237960828781128, 0.7112), (0.6935532826900482, 0.7615), (0.6456035744667054, 0.7546), (0.6715431744098663, 0.737), (0.7063772183418274, 0.7583), (0.6979923491477966, 0.7233), (0.7351104323387146, 0.719), (0.6707839384555817, 0.7276), (0.7102795698642731, 0.7248), (0.7371831143856049, 0.7241), (0.7254973567962647, 0.7178), (0.7660896290302277, 0.6617), (0.7337547297000885, 0.7126), (0.920928341293335, 0.6198), (0.8901723062515259, 0.6578), (0.8812190089702606, 0.6335)]


In [287]:
for i in range(len(scores)):
    print("Loss change:", (scores_plain[i][0] - scores[i][0])/scores_plain[i][0] *100, "--- Acc change:", -(scores_plain[i][1] - scores[i][1]) / scores_plain[i][1] * 100)

Loss change: 0.0 --- Acc change: -0.0
Loss change: -9.84877336040848 --- Acc change: -4.753773659467697
Loss change: -4.344616120845869 --- Acc change: -1.2721950188279152
Loss change: -14.100136367845293 --- Acc change: -2.971505932509899
Loss change: -5.471352861689768 --- Acc change: -5.947518348693848
Loss change: 0.2417463141523362 --- Acc change: 2.3365136235952377
Loss change: -4.183812898224496 --- Acc change: -2.447844296693802
Loss change: -12.036817410863183 --- Acc change: -1.9653640687465668
Loss change: -11.592616170791008 --- Acc change: -4.180458188056946
Loss change: 3.78384072835765 --- Acc change: -0.3452558536082506
Loss change: 1.1709109874818622 --- Acc change: -0.32042686361819506
Loss change: -10.659615568889249 --- Acc change: 3.5514529794454575
Loss change: -11.492206722922866 --- Acc change: -3.4802138805389404
Loss change: 18.052636288270783 --- Acc change: 14.230403304100037
Loss change: 17.979994455536563 --- Acc change: 9.486163407564163
Loss change: 13.5

In [303]:
to_remove_list = np.arange(1, 65)
num_rep = 100
loss_diff = np.zeros(num_rep)
acc_diff = np.zeros(num_rep)
loss_change = np.zeros(num_rep)
acc_change = np.zeros(num_rep)
nodes_removed_list = []
num_nodes_removed = np.zeros(num_rep)

In [304]:
print(to_remove_list)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64]


In [305]:
for i in range(num_rep):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=10)
    loss, acc = model.evaluate(x_test, y_test, verbose=2)
    
    n = np.random.choice(to_remove_list, 1)
    
    best_weights, _, nodes_removed = remove_nodes(acc, loss, model.get_weights(), n, 1, x_train, y_train, 0)
    
    model.set_weights(best_weights)
    print(n)
    
    loss_new, acc_new = model.evaluate(x_test, y_test, verbose=2)
    
    loss_diff[i] = loss - loss_new
    acc_diff[i] = acc_new - acc
    loss_change[i] = loss_diff[i] / loss * 100
    acc_change[i] = acc_diff[i] / acc * 100
    num_nodes_removed[i] = n
    nodes_removed_list += [nodes_removed]
    

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 1s - loss: 0.5208 - accuracy: 0.8294
[20]
10000/10000 - 3s - loss: 0.8701 - accuracy: 0.7210
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 3s - loss: 0.5803 - accuracy: 0.7977
[38]
10000/10000 - 1s - loss: 1.0578 - accuracy: 0.6841
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 1s - loss: 0.5793 - accuracy: 0.8219
[62]
10000/10000 - 1s - loss: 2.2972 - accuracy: 0.3904
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 1s - loss: 0.5418 - accuracy: 0.8113
[14]
10000/10000 - 1s - loss: 0.9529 - accuracy: 0.6612
Train on 60000 samples
Epoch 1/10
Ep

In [323]:
for i in range(1, 65):
    print(f"{i} nodes removed")
    print("Loss changes:",loss_change[num_nodes_removed == i])
    print("Accuracy changes:",acc_change[num_nodes_removed == i])
    print("#########################")

1 nodes removed
Loss changes: [0. 0.]
Accuracy changes: [0. 0.]
#########################
2 nodes removed
Loss changes: [0. 0.]
Accuracy changes: [0. 0.]
#########################
3 nodes removed
Loss changes: [0.]
Accuracy changes: [0.]
#########################
4 nodes removed
Loss changes: [0.04453448]
Accuracy changes: [0.01216022]
#########################
5 nodes removed
Loss changes: [-0.18496728]
Accuracy changes: [0.]
#########################
6 nodes removed
Loss changes: []
Accuracy changes: []
#########################
7 nodes removed
Loss changes: [-42.59455126  -8.18437804  -4.82913439]
Accuracy changes: [-10.12460556  -2.43695922  -1.39381151]
#########################
8 nodes removed
Loss changes: [ 9.83436119e-05 -1.00845591e+02]
Accuracy changes: [  0.         -19.79977819]
#########################
9 nodes removed
Loss changes: [-0.55514907]
Accuracy changes: [-0.08978924]
#########################
10 nodes removed
Loss changes: []
Accuracy changes: []
##############

In [342]:
model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10)
loss, acc = model.evaluate(x_test, y_test, verbose=2)
end_not_reached = True
improved = False
size = 128
tol = -1e-30
current_pos = 0
best_pos = -1
best_change = tol
original = model.get_weights()
bas = [acc]
bls = [loss]
best_weights = model.get_weights()
nodes_removed = []
best_acc = 0
best_loss = 1e20
ol = loss
oa = acc
num_removed = 0
while end_not_reached or improved:
    if not(end_not_reached):
        end_not_reached = True
        improved = False
        current_pos = 0
        size -= 1
        nodes_removed += [best_pos]
        best_weights[0][:,best_pos] = 0
        best_weights[1][best_pos] = 0
        best_weights[2][best_pos,:] = 0
        best_pos = -1
        tol -= best_change
        ol = best_loss
        oa = best_acc
        bas += [best_acc]
        bls += [best_loss]
        print("Improvement has occured!! Accuracy:", best_acc, "--- Loss:", best_loss, '--- Change:', best_change, '--- New tol:', tol)
        best_change = tol
        num_removed += 1
    if current_pos in nodes_removed:
        current_pos += 1
        if current_pos - num_removed >= size:
            end_not_reached = False
        continue
    w = copy.deepcopy(best_weights)
    w[0][:,current_pos] = 0
    w[1][current_pos] = 0
    w[2][current_pos,:] = 0
    model.set_weights(w)
    nl, na = model.evaluate(x_test, y_test, verbose=0)
    if 0.8*(na - oa) + 0.2*(ol - nl) >= best_change:
        best_change = 0.8*(na - oa) + 0.2*(ol - nl)
        print(best_change)
        best_pos = current_pos
        improved = True
        best_acc = na
        best_loss = nl
        print("Found something better")
    current_pos += 1
    if current_pos - num_removed >= size:
        end_not_reached = False
    if current_pos%20 == 0:
        print("Did 20 iterations")

model.set_weights(best_weights)
loss2, acc2 = model.evaluate(x_test, y_test, verbose=2)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 3s - loss: 0.5573 - accuracy: 0.8277
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
1.5454864501940337e-06
Found something better
Did 20 iterations
Did 20 iterations
Did 20 iterations
Did 20 iterations
0.0005999746513366766
Found something better
0.00776419440269469
Found something better
Did 20 iterations
Did 20 iterations
Improvement has occured!! Accuracy: 0.8304 --- Loss: 0.5292710494995118 --- Change: 0.00776419440269469 --- New tol: -0.00776419440269469
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0
Found something better
0.0

In [347]:
print(loss - loss2)
print(acc2 - acc)
print((loss - loss2)/loss * 100)
print((acc2 - acc)/acc * 100)
print(num_removed)
print(best_weights[1])

0.024536472129821796
0.00079995394
4.402802500385656
0.09664780809544027
116
[ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
 -0.55492324  0.          0.          0.          0.17086436  0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.         -0.5797819   0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.37631464  0.
  0.21805815  0.          0.          0.          0.7172741   0.
  0.8216421   0.          0.          0.          0.         -0.48516443
  0.          0.          0.          0.          0.          0.
  0. 

In [352]:
or_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
or_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
or_model.set_weights(original)
print(or_model.get_weights()[1])

[-1.35520445e-02 -8.29671975e-03 -5.35568409e-02 -3.01919542e-02
 -3.61214168e-02 -1.26946131e-02 -3.29394825e-02  9.89562250e-04
  9.76379216e-03 -1.55554097e-02 -2.13282239e-02 -3.72213423e-02
 -2.39025634e-02 -1.00066233e-02 -3.21980081e-02 -2.01076251e-02
 -9.82459355e-03 -4.72169966e-02 -3.93438041e-02 -3.32839303e-02
 -8.45289230e-02 -3.72487418e-02 -4.38574478e-02  9.48753580e-03
 -5.54923236e-01 -1.77981462e-02 -4.05660905e-02 -3.53319235e-02
  1.70864359e-01 -4.53706421e-02  7.00719946e-04  1.48898317e-02
 -4.26049978e-02 -4.43987511e-02 -2.29403414e-02 -5.45145497e-02
  1.91695374e-02  1.64827798e-02 -1.67541616e-02  1.27871626e-03
 -8.86084046e-03 -3.19623761e-02 -4.19521742e-02 -1.56642459e-02
 -5.44507839e-02 -1.68461706e-02 -3.23471017e-02 -1.64868310e-02
 -2.58421432e-02 -5.79781890e-01  4.44697728e-03 -3.34452763e-02
 -5.26826642e-03 -2.52922121e-02 -2.47266442e-02 -6.63382933e-03
 -2.96660177e-02 -3.14345919e-02 -1.28267228e-03 -3.54811139e-02
 -2.05833837e-02  9.42861

In [351]:
model.evaluate(x_train, y_train, verbose=2)

60000/60000 - 4s - loss: 0.4512 - accuracy: 0.8487


[0.4511857439637184, 0.84866667]

In [353]:
or_model.evaluate(x_train, y_train, verbose=2)

60000/60000 - 3s - loss: 0.4545 - accuracy: 0.8495


[0.45453119107882184, 0.8494833]

In [386]:
model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 1s - loss: 0.5328 - accuracy: 0.8285


[0.5327556645870208, 0.8285]

In [387]:
or_model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 1s - loss: 0.5573 - accuracy: 0.8277


[0.5572921367168426, 0.8277]

In [373]:
red_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(12, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
red_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
red_model.fit(x_train, y_train, epochs=100, verbose=1)
red_model.evaluate(x_test, y_test, verbose=2)

Train on 60000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/10

[0.7859716332435608, 0.7123]

In [365]:
red_model.fit(x_train, y_train, epochs=15, verbose=1)
red_model.evaluate(x_test, y_test, verbose=2)

Train on 60000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
10000/10000 - 0s - loss: 0.9421 - accuracy: 0.6255


[0.9421217614173889, 0.6255]

In [375]:
!pip install -q pyyaml h5py  
# Required to save models in HDF5 format

In [376]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os

import tensorflow as tf
from tensorflow import keras

print(tf.version.VERSION)

2.1.0


In [377]:
model.save_weights('./reduced/fashion_mnist_128_12')

In [378]:
or_model.save_weights('./original/fashion_mnist_128_12')

In [379]:
new_weights = [np.zeros((best_weights[0].shape[0], best_weights[0].shape[1] - num_removed)), np.zeros((best_weights[1].shape[0] - num_removed)), np.zeros((best_weights[2].shape[0] - num_removed, best_weights[2].shape[1])), best_weights[3]]

j = 0
for i in range(len(best_weights[1])):
    if i not in nodes_removed:
        new_weights[0][:, j] = best_weights[0][:, i]
        new_weights[1][j] = best_weights[1][i]
        new_weights[2][j, :] = best_weights[2][i, :]
        j = j + 1

In [385]:
red_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(12, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
red_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
red_model.set_weights(new_weights)
red_model.save_weights('./full_reduced/fashion_mnist_128_12')
red_model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 1s - loss: 0.5328 - accuracy: 0.8285


[0.5327556574344635, 0.8285]

In [399]:
red_model.set_weights(new_weights)
red_model.fit(x_train, y_train, epochs=15, verbose=1, batch_size=4096)
red_model.evaluate(x_test, y_test, verbose=2, batch_size=256)

Train on 60000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
10000/10000 - 0s - loss: 0.4444 - accuracy: 0.8531


[0.44439594821929934, 0.8531]

In [407]:
new_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
new_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
new_model.set_weights(best_weights)
new_model.evaluate(x_test, y_test, verbose=2)
new_model.fit(x_train, y_train, epochs=10, verbose=1, batch_size=2048)
new_model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 1s - loss: 0.5328 - accuracy: 0.8285
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 1s - loss: 0.4515 - accuracy: 0.8530


[0.4514933671712875, 0.853]

In [411]:
loss, acc = new_model.evaluate(x_test, y_test, verbose=2)
end_not_reached = True
improved = False
size = 128
tol = -1e-30
current_pos = 0
best_pos = -1
best_change = tol
original2 = new_model.get_weights()
bas2 = [acc]
bls2 = [loss]
best_weights2 = new_model.get_weights()
nodes_removed2 = []
best_acc = 0
best_loss = 1e20
ol = loss
oa = acc
num_removed2 = 0
while end_not_reached or improved:
    if not(end_not_reached):
        end_not_reached = True
        improved = False
        current_pos = 0
        size -= 1
        nodes_removed2 += [best_pos]
        best_weights2[0][:,best_pos] = 0
        best_weights2[1][best_pos] = 0
        best_weights2[2][best_pos,:] = 0
        best_pos = -1
        tol -= best_change
        ol = best_loss
        oa = best_acc
        bas2 += [best_acc]
        bls2 += [best_loss]
        print("Improvement has occured!! Accuracy:", best_acc, "--- Loss:", best_loss, '--- Change:', best_change, '--- New tol:', tol)
        best_change = tol
        num_removed2 += 1
    if current_pos in nodes_removed2:
        current_pos += 1
        if current_pos - num_removed2 >= size:
            end_not_reached = False
        continue
    w = copy.deepcopy(best_weights2)
    w[0][:,current_pos] = 0
    w[1][current_pos] = 0
    w[2][current_pos,:] = 0
    new_model.set_weights(w)
    nl, na = new_model.evaluate(x_test, y_test, verbose=0)
    print(f"Node {current_pos}:", 0.8*(na - oa) + 0.2*(ol - nl))
    if 0.8*(na - oa) + 0.2*(ol - nl) > best_change:
        best_change = 0.8*(na - oa) + 0.2*(ol - nl)
        print(best_change)
        best_pos = current_pos
        improved = True
        best_acc = na
        best_loss = nl
        print("Found something better")
    current_pos += 1
    if current_pos - num_removed2 >= size:
        end_not_reached = False
    if current_pos%20 == 0:
        print("Did 20 iterations")

new_model.set_weights(best_weights2)
loss2, acc2 = new_model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 1s - loss: 0.4515 - accuracy: 0.8530
Node 0: 0.0
0.0
Found something better
Node 1: 0.0
Node 2: 0.0
Node 3: 0.0
Node 4: 0.0
Node 5: 0.0
Node 6: 0.0
Node 7: 0.0
Node 8: 0.0
Node 9: 0.0
Node 10: 0.0
Node 11: 0.0
Node 12: 0.0
Node 13: 0.0
Node 14: 0.0
Node 15: 0.0
Node 16: 0.0
Node 17: 0.0
Node 18: 0.0
Node 19: 0.0
Did 20 iterations
Node 20: 0.0
Node 21: 0.0
Node 22: 0.0
Node 23: 0.0
Node 24: -0.18667189320087435
Node 25: 0.0
Node 26: 0.0
Node 27: 0.0
Node 28: -0.009689953212738045
Node 29: 0.0
Node 30: 0.0
Node 31: 0.0
Node 32: 0.0
Node 33: 0.0
Node 34: 0.0
Node 35: 0.0
Node 36: 0.0
Node 37: 0.0
Node 38: 0.0
Node 39: 0.0
Did 20 iterations
Node 40: 0.0
Node 41: 0.0
Node 42: 0.0
Node 43: 0.0
Node 44: 0.0
Node 45: 0.0
Node 46: 0.0
Node 47: 0.0
Node 48: 0.0
Node 49: -0.10380605744838717
Node 50: 0.0
Node 51: 0.0
Node 52: 0.0
Node 53: 0.0
Node 54: 0.0
Node 55: 0.0
Node 56: 0.0
Node 57: 0.0
Node 58: 0.0
Node 59: 0.0
Did 20 iterations
Node 60: 0.0
Node 61: 0.0
Node 62: 0.0
Node 63

KeyboardInterrupt: 

In [12]:
tester_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
tester_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [417]:
l, a = or_model.evaluate(x_test, y_test, verbose=2)
or_weights = or_model.get_weights()
size = 128
for i in range(128):
    w = copy.deepcopy(or_weights)
    w[0][:,i] = 0
    w[1][i] = 0
    w[2][i,:] = 0
    tester_model.set_weights(w)
    nl, na = tester_model.evaluate(x_test, y_test, verbose=0)
    print(f"Node {i}:", 0.6*(na - a) + 0.4*(l - nl))

10000/10000 - 1s - loss: 0.5573 - accuracy: 0.8277
Node 0: 0.0
Node 1: 0.0
Node 2: 0.0
Node 3: 0.0
Node 4: 0.0
Node 5: 0.0
Node 6: 0.0
Node 7: 0.0
Node 8: 0.0
Node 9: 3.0909729003880674e-06
Node 10: 0.0
Node 11: 0.0
Node 12: 0.0
Node 13: 0.0
Node 14: 0.0
Node 15: 0.0
Node 16: 0.0
Node 17: 0.0
Node 18: 0.0
Node 19: 0.0
Node 20: 0.0
Node 21: 0.0
Node 22: 0.0
Node 23: 0.0
Node 24: -0.21466120378494266
Node 25: 0.0
Node 26: 0.0
Node 27: 0.0
Node 28: -0.0012001917839050249
Node 29: 0.0
Node 30: 0.0
Node 31: 0.0
Node 32: 0.0
Node 33: 0.0
Node 34: 0.0
Node 35: 0.0
Node 36: 0.0
Node 37: 0.0
Node 38: 0.0
Node 39: 0.0
Node 40: -3.36151123070394e-07
Node 41: 0.0
Node 42: 0.0
Node 43: -2.3934936523417607e-06
Node 44: 0.0
Node 45: 0.0
Node 46: 0.0
Node 47: 0.0
Node 48: 0.0
Node 49: -0.10663373857498168
Node 50: -4.79598999025832e-06
Node 51: 0.0
Node 52: 0.0
Node 53: 0.0
Node 54: 0.0
Node 55: 0.0
Node 56: 0.0
Node 57: -1.3974075317380042e-05
Node 58: 0.0
Node 59: 0.0
Node 60: 0.0
Node 61: -0.002966

In [423]:
num_test = 20
num_zeros = np.zeros(num_test)
num_worse = np.zeros(num_test)
num_important = np.zeros(num_test)
losses = np.zeros(num_test)
accs = np.zeros(num_test)
zero_nodes = []
worsening_nodes = []
important_nodes = []
tol = -1e-4
for j in range(num_test):
    blank_model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
    blank_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    blank_model.fit(x_train, y_train, epochs=10)
    l, a = blank_model.evaluate(x_test, y_test, verbose=2)
    losses[j] = l
    accs[j] = a
    z = []
    wr = []
    imp = []
    for i in range(128):
        w = blank_model.get_weights()
        w[0][:,i] = 0
        w[1][i] = 0
        w[2][i,:] = 0
        tester_model.set_weights(w)
        nl, na = tester_model.evaluate(x_test, y_test, verbose=0)
        change = 0.8*(na - a) + 0.2*(l - nl)
        if change <= 0 and change >= tol:
            num_zeros[j] += 1
            z += [i]
        elif change > 0:
            num_worse[j] += 1
            wr += [i]
        else:
            num_important[j] += 1
            imp += [i]
    zero_nodes += [z]
    worsening_nodes += [wr]
    important_nodes += [imp]

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 1s - loss: 0.5564 - accuracy: 0.8208
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 1s - loss: 0.5633 - accuracy: 0.8223
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 1s - loss: 0.5656 - accuracy: 0.8165
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 1s - loss: 0.5629 - accuracy: 0.8139
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
10000/10000 - 1s - loss: 0.5729 - accuracy: 0.8215
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/

In [424]:
print(num_zeros)
print(num_worse)
print(num_important)

[110. 113. 106. 109. 108. 112. 106. 104. 108. 112. 109. 113. 110. 109.
 109. 111. 113. 108. 107. 113.]
[3. 2. 6. 5. 7. 3. 5. 7. 2. 3. 4. 2. 1. 2. 3. 4. 4. 4. 6. 1.]
[15. 13. 16. 14. 13. 13. 17. 17. 18. 13. 15. 13. 17. 17. 16. 13. 11. 16.
 15. 14.]


# Trying to reduce overfitting through node removal

In [10]:
model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=200)

Train on 60000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/20

<tensorflow.python.keras.callbacks.History at 0x1ab4ec54a48>

In [11]:
model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 0s - loss: 1.1750 - accuracy: 0.8174


[1.1749584731578826, 0.8174]

In [17]:
l, a = model.evaluate(x_test, y_test, verbose=2)
or_weights = model.get_weights()
size = 128
for i in range(128):
    w = copy.deepcopy(or_weights)
    w[0][:,i] = 0
    w[1][i] = 0
    w[2][i,:] = 0
    tester_model.set_weights(w)
    nl, na = tester_model.evaluate(x_test, y_test, verbose=0)
    print(f"Node {i}:", 0.*(na - a) + 1.0*(l - nl))

10000/10000 - 0s - loss: 1.1750 - accuracy: 0.8174
Node 0: -0.0022306003236771013
Node 1: 0.0
Node 2: -0.23230817093372347
Node 3: 0.0
Node 4: 0.0
Node 5: 0.0002523882102965835
Node 6: 0.0
Node 7: 0.0
Node 8: 0.0
Node 9: 0.0
Node 10: 0.0
Node 11: 0.0
Node 12: 0.00016146503448477567
Node 13: 0.0
Node 14: 0.0
Node 15: 0.0
Node 16: 0.0
Node 17: 0.0
Node 18: 0.0
Node 19: 0.0
Node 20: 0.0
Node 21: 0.0
Node 22: 0.0
Node 23: 0.0
Node 24: 0.0
Node 25: 0.0
Node 26: 0.0
Node 27: 0.0
Node 28: 0.0
Node 29: -0.10123041025638593
Node 30: 0.0
Node 31: 0.0
Node 32: 0.0
Node 33: 0.0
Node 34: 0.0
Node 35: 0.0
Node 36: 0.0
Node 37: 0.0
Node 38: 0.0
Node 39: 0.0
Node 40: 0.0
Node 41: 0.0
Node 42: 0.0
Node 43: 0.0
Node 44: 0.0
Node 45: -0.010169116518497478
Node 46: -0.20006351330280317
Node 47: 0.0
Node 48: 0.0
Node 49: 0.0
Node 50: 0.0
Node 51: 0.0
Node 52: 0.0
Node 53: 0.0
Node 54: 0.0
Node 55: -0.02401607532978074
Node 56: 0.0
Node 57: 0.0
Node 58: -0.5975315176916123
Node 59: -0.26955549643039717
Node

In [35]:
loss, acc = model.evaluate(x_test, y_test, verbose=2)
end_not_reached = True
improved = False
size = 128
tol = -1e-30
current_pos = 0
best_pos = -1
best_change = tol
original2 = model.get_weights()
bas2 = [acc]
bls2 = [loss]
best_weights2 = model.get_weights()
nodes_removed2 = []
best_acc = 0
best_loss = 1e20
ol = loss
oa = acc
num_removed2 = 0
while end_not_reached or improved:
    if not(end_not_reached):
        end_not_reached = True
        improved = False
        current_pos = 0
        size -= 1
        nodes_removed2 += [best_pos]
        best_weights2[0][:,best_pos] = 0
        best_weights2[1][best_pos] = 0
        best_weights2[2][best_pos,:] = 0
        best_pos = -1
        #tol -= best_change
        ol = best_loss
        oa = best_acc
        bas2 += [best_acc]
        bls2 += [best_loss]
        print("Improvement has occured!! Accuracy:", best_acc, "--- Loss:", best_loss, '--- Change:', best_change, '--- New tol:', tol)
        best_change = tol
        num_removed2 += 1
    if current_pos in nodes_removed2:
        current_pos += 1
        if current_pos - num_removed2 >= size:
            end_not_reached = False
        continue
    w = copy.deepcopy(best_weights2)
    w[0][:,current_pos] = 0
    w[1][current_pos] = 0
    w[2][current_pos,:] = 0
    tester_model.set_weights(w)
    nl, na = tester_model.evaluate(x_test, y_test, verbose=0)
    if 0.1*(na - oa) + 0.9*(ol - nl) > best_change:
        best_change = 0.1*(na - oa) + 0.9*(ol - nl)
        print(best_change)
        best_pos = current_pos
        improved = True
        best_acc = na
        best_loss = nl
        print("Found something better")
    current_pos += 1
    if current_pos - num_removed2 >= size:
        end_not_reached = False
    if current_pos%20 == 0:
        print("Did 20 iterations")

tester_model.set_weights(best_weights2)
loss2, acc2 = tester_model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 0s - loss: 1.1750 - accuracy: 0.8174
0.0
Found something better
0.0002523882102965835
Found something better
Did 20 iterations
Did 20 iterations
Did 20 iterations
0.0029544466590880392
Found something better
Did 20 iterations
0.1240757795238494
Found something better
Did 20 iterations
Did 20 iterations
Improvement has occured!! Accuracy: 0.7968 --- Loss: 1.034807611322403 --- Change: 0.1240757795238494 --- New tol: -1e-30
0.0
Found something better
0.000252382249832106
Found something better
Did 20 iterations
Did 20 iterations
Did 20 iterations
0.0029544418573378864
Found something better
Did 20 iterations
0.0032456126403809017
Found something better
Did 20 iterations
0.008490776958465673
Found something better
Did 20 iterations
Improvement has occured!! Accuracy: 0.7842 --- Loss: 1.0239734141826629 --- Change: 0.008490776958465673 --- New tol: -1e-30
0.0
Found something better
0.0002523882102965835
Found something better
Did 20 iterations
Did 20 iterations
0.009005169973

In [36]:
for i in nodes_removed2:
    best_weights2[0][:,i] = np.random.normal(0, 2/np.sqrt(28*28 + 128), 784)
    best_weights2[1][i] = 0
    best_weights2[2][i,:] = np.random.normal(0, 2/np.sqrt(138), 10)

In [39]:
new_model = tf.keras.models.Sequential()
new_model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
new_model.add(tf.keras.layers.Dense(128, activation='relu'))
new_model.add(tf.keras.layers.Dense(10, activation='softmax'))

new_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

new_model.set_weights(best_weights2)

new_model.evaluate(x_test, y_test, verbose=2)
new_model.fit(x_train, y_train, epochs=5)
new_model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 1s - loss: 214.6456 - accuracy: 0.2633
Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
10000/10000 - 1s - loss: 0.9824 - accuracy: 0.8258


[0.9823681085109711, 0.8258]

In [25]:
loss, acc = model.evaluate(x_test, y_test, verbose=2)
end_not_reached = True
improved = False
size = 128
tol = -1e-30
current_pos = 0
best_pos = -1
best_change = tol
original2 = model.get_weights()
bas2 = [acc]
bls2 = [loss]
best_weights2 = model.get_weights()
nodes_removed2 = []
best_acc = 0
best_loss = 1e20
l = loss
a = acc
num_removed2 = 0

for i in range(128):
    w = copy.deepcopy(original2)
    w[0][:,i] = 0
    w[1][i] = 0
    w[2][i,:] = 0
    tester_model.set_weights(w)
    nl, na = tester_model.evaluate(x_test, y_test, verbose=0)
    change = 0.*(na - a) + 1.0*(l - nl)
    print(f"Node {i}:", change)
    if change > tol:
        nodes_removed2 += [i]
        num_removed2 += 1
        
for i in nodes_removed2:
    best_weights2[0][:,i] = 0
    best_weights2[1][i] = 0
    best_weights2[2][i,:] = 0

tester_model.set_weights(best_weights2)
loss2, acc2 = tester_model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 0s - loss: 1.1750 - accuracy: 0.8174
Node 0: -0.0024451125621796077
Node 1: 0.0
Node 2: -0.24894241242408754
Node 3: 0.0
Node 4: 0.0
Node 5: 0.00026931838989252554
Node 6: 0.0
Node 7: 0.0
Node 8: 0.0
Node 9: 0.0
Node 10: 0.0
Node 11: 0.0
Node 12: 0.00017940559387197297
Node 13: 0.0
Node 14: 0.0
Node 15: 0.0
Node 16: 0.0
Node 17: 0.0
Node 18: 0.0
Node 19: 0.0
Node 20: 0.0
Node 21: 0.0
Node 22: 0.0
Node 23: 0.0
Node 24: 0.0
Node 25: 0.0
Node 26: 0.0
Node 27: 0.0
Node 28: 0.0
Node 29: -0.10543379244804396
Node 30: 0.0
Node 31: 0.0
Node 32: 0.0
Node 33: 0.0
Node 34: 0.0
Node 35: 0.0
Node 36: 0.0
Node 37: 0.0
Node 38: 0.0
Node 39: 0.0
Node 40: 0.0
Node 41: 0.0
Node 42: 0.0
Node 43: 0.0
Node 44: 0.0
Node 45: -0.011132357144355787
Node 46: -0.2188261268138887
Node 47: 0.0
Node 48: 0.0
Node 49: 0.0
Node 50: 0.0
Node 51: 0.0
Node 52: 0.0
Node 53: 0.0
Node 54: 0.0
Node 55: -0.02602897639274615
Node 56: 0.0
Node 57: 0.0
Node 58: -0.6538350210666657
Node 59: -0.29442833447456374
Node

In [31]:
l, a = model.evaluate(x_test, y_test, verbose=2)
or_weights = model.get_weights()
size = 128
worst_remove = -1
wc = 0
w2 = model.get_weights()
for i in range(128):
    w = copy.deepcopy(or_weights)
    w[0][:,i] = 0
    w[1][i] = 0
    w[2][i,:] = 0
    tester_model.set_weights(w)
    nl, na = tester_model.evaluate(x_test, y_test, verbose=0)
    print(f"Node {i}:", 0.*(na - a) + 1.0*(l - nl))
    if 0.*(na - a) + 1.0*(l - nl) < wc:
        worst_remove = i
        wc = (l - nl)
w2[0][:,worst_remove] = 0
w2[1][worst_remove] = 0
w2[2][worst_remove,:] = 0
tester_model.set_weights(w2)
loss2, acc2 = tester_model.evaluate(x_test, y_test, verbose=2)
l = loss2
a = acc2
for i in range(128):
    w = copy.deepcopy(w2)
    w[0][:,i] = 0
    w[1][i] = 0
    w[2][i,:] = 0
    tester_model.set_weights(w)
    nl, na = tester_model.evaluate(x_test, y_test, verbose=0)
    print(f"Node {i}:", 0.*(na - a) + 1.0*(l - nl))

10000/10000 - 0s - loss: 1.1750 - accuracy: 0.8174
Node 0: -0.0024451125621796077
Node 1: 0.0
Node 2: -0.24894241242408754
Node 3: 0.0
Node 4: 0.0
Node 5: 0.00026931838989252554
Node 6: 0.0
Node 7: 0.0
Node 8: 0.0
Node 9: 0.0
Node 10: 0.0
Node 11: 0.0
Node 12: 0.00017940559387197297
Node 13: 0.0
Node 14: 0.0
Node 15: 0.0
Node 16: 0.0
Node 17: 0.0
Node 18: 0.0
Node 19: 0.0
Node 20: 0.0
Node 21: 0.0
Node 22: 0.0
Node 23: 0.0
Node 24: 0.0
Node 25: 0.0
Node 26: 0.0
Node 27: 0.0
Node 28: 0.0
Node 29: -0.10543379244804396
Node 30: 0.0
Node 31: 0.0
Node 32: 0.0
Node 33: 0.0
Node 34: 0.0
Node 35: 0.0
Node 36: 0.0
Node 37: 0.0
Node 38: 0.0
Node 39: 0.0
Node 40: 0.0
Node 41: 0.0
Node 42: 0.0
Node 43: 0.0
Node 44: 0.0
Node 45: -0.011132357144355787
Node 46: -0.2188261268138887
Node 47: 0.0
Node 48: 0.0
Node 49: 0.0
Node 50: 0.0
Node 51: 0.0
Node 52: 0.0
Node 53: 0.0
Node 54: 0.0
Node 55: -0.02602897639274615
Node 56: 0.0
Node 57: 0.0
Node 58: -0.6538350210666657
Node 59: -0.29442833447456374
Node

# Testing on higher node counts

In [265]:
size = 1024

In [266]:
model2 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(size, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model2.fit(x_train, y_train, epochs=10, batch_size=None)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1ab43677248>

In [233]:
model2.fit(x_train, y_train, epochs=5, batch_size=1024)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1ab390ce788>

In [238]:
model2.fit(x_train, y_train, epochs=5, batch_size=256)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1ab391002c8>

In [243]:
model2.fit(x_train, y_train, epochs=5, batch_size=32)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1ab474b4c88>

In [267]:
tester_model2 = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(size, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
tester_model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [268]:
model2.evaluate(x_test, y_test, verbose=2, batch_size=256)

10000/10000 - 0s - loss: 0.5534 - accuracy: 0.8573


[0.5534319600820541, 0.8573]

In [269]:
l, a = model2.evaluate(x_test, y_test, verbose=2, batch_size=256)
or_weights = model2.get_weights()
tol_low = -1e-5
tol_high = 1e-5
num_zeros, num_worse, num_important = (0, 0, 0)
z = []
wr = []
imp = []
for i in range(size):
    w = copy.deepcopy(or_weights)
    w[0][:,i] = 0
    w[1][i] = 0
    w[2][i,:] = 0
    tester_model2.set_weights(w)
    nl, na = tester_model2.evaluate(x_test, y_test, verbose=0, batch_size=256)
    print(f"Node {i}:", 1.*(na - a) + 0*(l - nl))
    change = l - nl
    if change <= tol_high and change >= tol_low:
        num_zeros += 1
        z += [i]
    elif change > 0:
        num_worse += 1
        wr += [i]
    else:
        num_important += 1
        imp += [i]

10000/10000 - 0s - loss: 0.5534 - accuracy: 0.8573
Node 0: 0.0
Node 1: -0.000299990177154541
Node 2: 0.0
Node 3: -9.995698928833008e-05
Node 4: 0.0
Node 5: 0.0
Node 6: 0.0
Node 7: -0.001199960708618164
Node 8: 0.00010001659393310547
Node 9: 0.0
Node 10: 0.0
Node 11: 0.0
Node 12: 0.0
Node 13: 0.0
Node 14: 0.0
Node 15: -0.000299990177154541
Node 16: 0.0
Node 17: 0.0
Node 18: -0.00019997358322143555
Node 19: 0.000800013542175293
Node 20: 0.0
Node 21: 0.0
Node 22: 0.0
Node 23: -0.000599980354309082
Node 24: -0.00019997358322143555
Node 25: 0.0
Node 26: 0.0
Node 27: 0.0
Node 28: 0.00010001659393310547
Node 29: -0.001199960708618164
Node 30: 0.0
Node 31: -9.995698928833008e-05
Node 32: 0.0
Node 33: -0.000299990177154541
Node 34: 0.0
Node 35: 0.0
Node 36: 0.0
Node 37: 0.00010001659393310547
Node 38: 0.0
Node 39: 0.00010001659393310547
Node 40: 0.0
Node 41: 0.0
Node 42: 0.0
Node 43: 0.0
Node 44: -9.995698928833008e-05
Node 45: -9.995698928833008e-05
Node 46: 0.0
Node 47: 0.0
Node 48: 0.0
Node 

In [270]:
print("Zero Nodes:", num_zeros)
print("Worse Nodes:", num_worse)
print("Important Nodes:", num_important)

Zero Nodes: 345
Worse Nodes: 296
Important Nodes: 383


In [271]:
print("######## IMPORTANT NODES ########")
for i in imp:
    w = copy.deepcopy(or_weights)
    w[0][:,i] = 0
    w[1][i] = 0
    w[2][i,:] = 0
    tester_model2.set_weights(w)
    nl, na = tester_model2.evaluate(x_test, y_test, verbose=0)
    print(f"Node {i}:", 0.*(na - a) + 1.0*(l - nl))

######## IMPORTANT NODES ########
Node 1: -0.00011387667655948785
Node 3: -0.00010128152370458832
Node 6: -3.623464107516128e-05
Node 7: -0.001998766946792685
Node 8: -0.001308627104759319
Node 9: -0.0010139400839805912
Node 11: -0.00024395577907565524
Node 12: -8.690321445470328e-05
Node 15: -0.00061111836433414
Node 17: -0.0001145778417588339
Node 21: -0.00017343966960914337
Node 22: -6.050019264225437e-05
Node 23: -0.0013901935100556484
Node 28: -0.0004116062402725884
Node 29: -0.0014539723873139287
Node 32: -2.040214538578944e-05
Node 33: -0.0019836359739303644
Node 43: -0.0001338123559951887
Node 45: -0.00010271003246309363
Node 46: -8.029680252075178e-05
Node 47: -1.0653376579350393e-05
Node 49: -1.1082053184563456e-05
Node 52: -0.002979259657859834
Node 54: -0.0003367197751998896
Node 55: -0.00018534734249120444
Node 56: -3.561494350434735e-05
Node 58: -0.004257445406913862
Node 70: -2.6546597480803236e-05
Node 72: -0.00643334937095652
Node 77: -0.00010503306388864875
Node 79: -

In [273]:
print("######## WORSE NODES ########")
tot = 0
for i in wr:
    w = copy.deepcopy(or_weights)
    w[0][:,i] = 0
    w[1][i] = 0
    w[2][i,:] = 0
    tester_model2.set_weights(w)
    nl, na = tester_model2.evaluate(x_test, y_test, verbose=0)
    print(f"Node {i}:", 0.*(na - a) + 1.0*(l - nl))
    tot += (l - nl)
print(tot)
print(tot / num_worse)

######## WORSE NODES ########
Node 0: 4.13911819457935e-05
Node 4: 0.00023835399150839987
Node 5: 0.00025930373668670015
Node 10: 0.00020082871913906786
Node 14: 0.00014250781536095936
Node 18: 0.00026486358642574537
Node 19: 0.0016526093482970294
Node 24: 0.00011118769645690474
Node 30: 4.8133587837173586e-05
Node 31: 0.00013076276779167006
Node 35: 0.00019327156543724744
Node 37: 5.299410820003292e-05
Node 39: 0.0006802566289900724
Node 44: 0.0002921863317488782
Node 51: 0.0002678472757339234
Node 57: 0.0009409231901168491
Node 59: 8.678886890400861e-05
Node 60: 0.0002991854667663274
Node 62: 0.00048684692382805483
Node 63: 3.161346912383678e-05
Node 65: 0.0010018054485320116
Node 66: 0.0033053781270980798
Node 67: 0.00037543771266934645
Node 68: 0.0002577392339705442
Node 74: 0.0006493238449095662
Node 75: 0.002567309141158991
Node 76: 1.1251425743097343e-05
Node 78: 0.0003851246595382163
Node 80: 0.0008524636268615327
Node 85: 0.0014576805830001094
Node 87: 6.532504558554031e-05
No

In [275]:
loss, acc = model2.evaluate(x_test, y_test, verbose=2, batch_size=512)
end_not_reached = True
improved = False
tol = -1e-5
current_pos = 0
best_pos = -1
best_change = tol
original2 = model2.get_weights()
bas2 = [acc]
bls2 = [loss]
best_weights2 = model2.get_weights()
nodes_removed2 = []
best_acc = 0
best_loss = 1e20
ol = loss
oa = acc
num_removed2 = 0
while end_not_reached or improved:
    if not(end_not_reached):
        end_not_reached = True
        improved = False
        current_pos = 0
        size -= 1
        nodes_removed2 += [best_pos]
        best_weights2[0][:,best_pos] = 0
        best_weights2[1][best_pos] = 0
        best_weights2[2][best_pos,:] = 0
        best_pos = -1
        #tol -= best_change
        ol = best_loss
        oa = best_acc
        bas2 += [best_acc]
        bls2 += [best_loss]
        print("Improvement has occured!! Accuracy:", best_acc, "--- Loss:", best_loss, '--- Change:', best_change, '--- New tol:', tol)
        best_change = tol
        num_removed2 += 1
    if current_pos in nodes_removed2:
        current_pos += 1
        if current_pos - num_removed2 >= size:
            end_not_reached = False
        continue
    w = copy.deepcopy(best_weights2)
    w[0][:,current_pos] = 0
    w[1][current_pos] = 0
    w[2][current_pos,:] = 0
    tester_model2.set_weights(w)
    nl, na = tester_model2.evaluate(x_test, y_test, verbose=0, batch_size=512)
    if 0.3*(na - oa) + 0.7*(ol - nl) > best_change:
        best_change = 0.3*(na - oa) + 0.7*(ol - nl)
        print(best_change)
        best_pos = current_pos
        improved = True
        best_acc = na
        best_loss = nl
        print("Found something better")
    current_pos += 1
    if current_pos - num_removed2 >= size:
        end_not_reached = False
    if current_pos%200 == 0:
        print("Did 200 iterations")

tester_model2.set_weights(best_weights2)
loss2, acc2 = tester_model2.evaluate(x_test, y_test, verbose=2)

10000/10000 - 0s - loss: 0.5534 - accuracy: 0.8573
2.899231910711286e-05
Found something better
0.00016685455322269503
Found something better
0.0001815255498886281
Found something better
0.0013968333435058875
Found something better
0.002253782577514662
Found something better
0.002828232312202461
Found something better
Did 20 iterations
0.003118926424980206
Found something better
0.0044161053323746305
Found something better
Did 20 iterations
Did 20 iterations
0.005940026946067834
Found something better
Did 20 iterations
Did 20 iterations
Improvement has occured!! Accuracy: 0.8581 --- Loss: 0.5452890724182129 --- Change: 0.005940026946067834 --- New tol: -1e-05
1.4953613292068013e-08
Found something better
0.00016685455322269503
Found something better
0.00018178560256957297
Found something better
0.0012985332298279027
Found something better
0.0022349351835251217
Found something better
0.0029833795356750343
Found something better
Did 20 iterations
0.0031984505271911547
Found something bet

KeyboardInterrupt: 

In [264]:
num_removed2

32

## Junk + Testing

In [19]:
type(model.get_weights()[0][:,0])

numpy.ndarray

In [22]:
old = model.get_weights()
old[0][:,0] = 0

In [26]:
old[1][0] = 0

In [31]:
old[2][0,:] = 0

In [30]:
np.shape(old[2])

(128, 10)

In [32]:
model.set_weights(old)

In [33]:
model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 1s - loss: 0.0739 - accuracy: 0.9776


[0.0738823753261473, 0.9776]

In [34]:
model.fit(x_train, y_train, epochs=5)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x2a95d055888>

In [35]:
model.evaluate(x_test, y_test, verbose=2)

10000/10000 - 0s - loss: 0.0727 - accuracy: 0.9798


[0.07267645624614087, 0.9798]

In [36]:
new = model.get_weights()