# Chapter 5.3

## Importing Libraries

In [1]:
from __future__ import absolute_import, division, print_function
from __future__ import unicode_literals

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import copy
import tqdm
from hfunc import models
from hfunc import metrics
import time

## Self-created functions

In [149]:
def estimate_node_importance_for_class(model, tester_model, layer_sizes, tol_low, tol_high, x, y, k):
    
    y_pred = model.predict(x)
    yp = np.argmax(y_pred, axis=1)
    a = np.mean((yp[y.reshape(-1) == k] == y[y == k]))
    
    or_weights = model.get_weights()
    weight_len = len(or_weights) - 3
    num_zeros, num_worse, num_important = (0, 0, 0)
    z = []
    wr = []
    imp = []
    amounts = []
    places = []
    avg_imp = np.zeros(len(layer_sizes))
    avg_zero = np.zeros(len(layer_sizes))
    avg_wr = np.zeros(len(layer_sizes))
    for layer, size in enumerate(layer_sizes):
        num_zeros, num_worse, num_important = (0, 0, 0)
        z = []
        wr = []
        imp = []
        for i in range(size):
            w = copy.deepcopy(or_weights)
            w[weight_len - (2*layer+1)][...,i] = 0
            w[weight_len - 2*layer][i] = 0
            tester_model.set_weights(w)
            y_pred = tester_model.predict(x)
            yp = np.argmax(y_pred, axis=1)
            na = np.mean((yp[y.reshape(-1) == k] == y[y == k]))
            change = na - a
            print(f'Node {i} class {k} importance:', change)
            if change <= tol_high and change >= tol_low:
                num_zeros += 1
                z += [i]
                avg_zero[layer] += change
            elif change > 0:
                num_worse += 1
                wr += [i]
                avg_wr[layer] += change
            else:
                num_important += 1
                imp += [i]
                avg_imp[layer] += change
        amounts.append((num_zeros, num_worse, num_important))
        places.append((z, wr, imp))
        
        if num_important > 0:
            avg_imp[layer] /= num_important
        if num_zeros > 0:
            avg_zero[layer] /= num_zeros
        if num_worse > 0:
            avg_wr[layer] /= num_worse
    
    return amounts, places, avg_imp, avg_zero, avg_wr

In [155]:
def estimate_node_to_consider_pruning(model, tester_model, layer_sizes, tol, x, y, k):
    y_pred = model.predict(x)
    yp = np.argmax(y_pred, axis=1)
    a = np.mean((yp[y.reshape(-1) == k] == y[y == k]))
    
    or_weights = model.get_weights()
    weight_len = len(or_weights) - 3
    to_consider = []
    for layer, size in enumerate(layer_sizes):
        to_con = []
        for i in range(size):
            w = copy.deepcopy(or_weights)
            w[weight_len - (2*layer+1)][...,i] = 0
            w[weight_len - 2*layer][i] = 0
            tester_model.set_weights(w)
            y_pred = tester_model.predict(x)
            yp = np.argmax(y_pred, axis=1)
            na = np.mean((yp[y.reshape(-1) == k] == y[y == k]))
            change = na - a
            if change >= tol:
                to_con.append(i)
        to_consider.append(to_con)
    return to_consider

In [171]:
def node_class_pruning(model, tester_model, x, y, layer_sizes, tol, k, ignore_cutoff=-1e-2):
    loss, acc = model.evaluate(x, y, verbose=0, batch_size=256)
    original = model.get_weights()
    weight_len = len(original) - 3
    best_weights = model.get_weights()
    ol = loss
    oa = acc
    amounts = []
    places = []
    to_consider = estimate_node_to_consider_pruning(model, tester_model, layer_sizes, tol, x, y, k)
    
    for layer, size in enumerate(layer_sizes):
        end_not_reached = True
        num_removed = 0
        nodes_removed = []
        nodes_to_estimate = to_consider[layer]
        size = len(nodes_to_estimate)
        current_pos = nodes_to_estimate[0]
        idx = 0
        while end_not_reached:
            w = copy.deepcopy(best_weights)
            w[weight_len - (2*layer+1)][...,current_pos] = 0
            w[weight_len - 2*layer][current_pos] = 0
            tester_model.set_weights(w)
            nl, na = tester_model.evaluate(x, y, verbose=0, batch_size=256)

            change = ol - nl
            print(f'Node {current_pos} Importance:', change)

            if change >= tol:
                oa = na
                ol = nl
                size -= 1
                nodes_removed += [current_pos]
                nodes_to_estimate.remove(current_pos)
                best_weights[weight_len - (2*layer+1)][..., current_pos] = 0
                best_weights[weight_len - 2*layer][current_pos] = 0
                num_removed += 1
                idx = 0
            elif change <= ignore_cutoff:
                size -= 1
                nodes_to_estimate.remove(current_pos)
            else:
                idx += 1
            if idx >= size:
                end_not_reached = False
            else:
                current_pos = nodes_to_estimate[idx]
    amounts.append(num_removed)
    places.append(nodes_removed)

    return best_weights, amounts, places

In [205]:
def node_pruning_class_loss(model, tester_model, x, y, layer_sizes, tol, k, ignore_cutoff=-1e-2):
    loss, acc = model.evaluate(x, y, verbose=0, batch_size=256)
    original = model.get_weights()
    weight_len = len(original) - 3
    best_weights = model.get_weights()
    ol = loss
    oa = acc
    amounts = []
    places = []
    
    y_pred = model.predict(x)
    y_class = np.array([1 if y == k else 0 for y in y.reshape(-1)])

    
    bce = tf.keras.losses.BinaryCrossentropy()
    
    ol = bce(y_class, y_pred[:,k]).numpy()
    bas = [oa]
    bls = [ol]
    
    for layer, size in enumerate(layer_sizes):
        end_not_reached = True
        num_removed = 0
        nodes_removed = []
        nodes_to_estimate = list(np.arange(size))
        current_pos = nodes_to_estimate[0]
        idx = 0
        while end_not_reached:
            w = copy.deepcopy(best_weights)
            w[weight_len - (2*layer+1)][...,current_pos] = 0
            w[weight_len - 2*layer][current_pos] = 0
            tester_model.set_weights(w)
            _, na = tester_model.evaluate(x, y, verbose=0, batch_size=256)
            y_pred = tester_model.predict(x)
            nl = bce(y_class, y_pred[:,k]).numpy()
            
            change = ol - nl
            print(f'Node {current_pos} Importance:', change)

            if change >= tol:
                oa = na
                ol = nl
                size -= 1
                nodes_removed += [current_pos]
                nodes_to_estimate.remove(current_pos)
                best_weights[weight_len - (2*layer+1)][..., current_pos] = 0
                best_weights[weight_len - 2*layer][current_pos] = 0
                num_removed += 1
                idx = 0
                bas += [oa]
                bls += [ol]
            elif change <= ignore_cutoff:
                size -= 1
                nodes_to_estimate.remove(current_pos)
            else:
                idx += 1
            if idx >= size:
                end_not_reached = False
            else:
                current_pos = nodes_to_estimate[idx]
    amounts.append(num_removed)
    places.append(nodes_removed)

    return best_weights, bas, bls, amounts, places

In [133]:
def node_pruning(model, tester_model, x, y, layer_sizes, tol, ignore_cutoff=-1e-2, method='exhaustive', K=10):

    loss, acc = model.evaluate(x, y, verbose=0, batch_size=256)
    original = model.get_weights()
    weight_len = len(original) - 3
    bas = [acc]
    bls = [loss]
    best_weights = model.get_weights()
    best_acc = 0
    best_loss = 1e20
    ol = loss
    oa = acc
    amounts = []
    places = []
    class_importance = []
    
    y_pred = tester_model.predict(x)
    class_acc = np.zeros(K)
    yp = np.argmax(y_pred, axis=1)
    for i in range(K):
        a = np.mean((yp[y.reshape(-1) == i] == y[y == i]))
        class_acc[i] = a

    for layer, size in enumerate(layer_sizes):
        end_not_reached = True
        num_removed = 0
        nodes_removed = []
        c_imp = []
        if method == 'exhaustive':
            current_pos = 0
            best_change = tol
            best_pos = -1
            improved = False
            while end_not_reached or improved:
                if not(end_not_reached):
                    end_not_reached = True
                    improved = False
                    current_pos = 0
                    size -= 1
                    nodes_removed += [best_pos]
                    best_weights[weight_len - (2*layer+1)][...,best_pos] = 0
                    best_weights[weight_len - 2*layer][best_pos] = 0
                    
                    tester_model.set_weights(best_weights)
                    y_pred = tester_model.predict(x)
                    temp_class_acc = np.zeros(K)
                    yp = np.argmax(y_pred, axis=1)
                    node_imp = []
                    for k in range(K):
                        a = np.mean((yp[y.reshape(-1) == k] == y[y == k]))
                        temp_class_acc[k] = a
                        if a - class_acc[k] > 0:
                            node_imp.append(k)
                        elif a - class_acc[k] < 0:
                            node_imp.append(-k)
                    class_acc = temp_class_acc
                    c_imp += [node_imp]
                    
                    best_pos = -1
                    ol = best_loss
                    oa = best_acc
                    bas += [best_acc]
                    bls += [best_loss]
                    best_change = tol
                    num_removed += 1
                if current_pos in nodes_removed:
                    current_pos += 1
                    if current_pos - num_removed >= size:
                        end_not_reached = False
                    continue
                w = copy.deepcopy(best_weights)
                w[weight_len - (2*layer+1)][...,current_pos] = 0
                w[weight_len - 2*layer][current_pos] = 0
                tester_model.set_weights(w)
                nl, na = tester_model.evaluate(x, y, verbose=0, batch_size=256)
                change = ol - nl
                
                if change >= best_change:
                    best_change = change
                    best_pos = current_pos
                    improved = True
                    best_acc = na
                    best_loss = nl
                current_pos += 1
                if current_pos - num_removed >= size:
                    end_not_reached = False
        elif method == 'greedy':
            nodes_to_estimate = list(np.arange(size))
            current_pos = nodes_to_estimate[0]
            idx = 0
            while end_not_reached:
                w = copy.deepcopy(best_weights)
                w[weight_len - (2*layer+1)][...,current_pos] = 0
                w[weight_len - 2*layer][current_pos] = 0
                tester_model.set_weights(w)
                nl, na = tester_model.evaluate(x, y, verbose=0, batch_size=256)
                
                change = ol - nl
                print(f'Node {current_pos} Importance:', change)
                    
                if change >= tol:
                    oa = na
                    ol = nl
                    size -= 1
                    nodes_removed += [current_pos]
                    nodes_to_estimate.remove(current_pos)
                    best_weights[weight_len - (2*layer+1)][..., current_pos] = 0
                    best_weights[weight_len - 2*layer][current_pos] = 0
                    
                    tester_model.set_weights(best_weights)
                    y_pred = tester_model.predict(x)
                    temp_class_acc = np.zeros(K)
                    yp = np.argmax(y_pred, axis=1)
                    node_imp = []
                    for k in range(K):
                        a = np.mean((yp[y.reshape(-1) == k] == y[y == k]))
                        temp_class_acc[k] = a
                        if a - class_acc[k] > 0:
                            node_imp.append(k)
                        elif a - class_acc[k] < 0:
                            node_imp.append(-k)
                    class_acc = temp_class_acc
                    c_imp += [node_imp]
                    
                    bas += [oa]
                    bls += [ol]
                    num_removed += 1
                    idx = 0
                elif change <= ignore_cutoff:
                    size -= 1
                    nodes_to_estimate.remove(current_pos)
                else:
                    idx += 1
                if idx >= size:
                    end_not_reached = False
                else:
                    current_pos = nodes_to_estimate[idx]
        amounts.append(num_removed)
        places.append(nodes_removed)
        class_importance.append(c_imp)

    return best_weights, bas, bls, amounts, places, class_importance

## Convolutional Neural Network

### CIFAR 10

In [125]:
np.random.seed(2020)

In [126]:
cifar = tf.keras.datasets.cifar10
(x_train, y_train), (x_test, y_test) = cifar.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Converting interger values to floats (0 to 1)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.85, stratify=None)

In [127]:
tester_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
tester_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [151]:
tol = -1e-5
tol_low = tol
tol_high = -tol
ig_cut = -1e-3
layer_sizes = [64, 256, 128, 64, 32]
K = 10

In [129]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.fit(x_train, y_train, epochs=5)
or_loss, or_acc = model.evaluate(x_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [152]:
amounts, places, avg_imp, avg_zero, avg_wr = estimate_node_importance_for_class(model, tester_model, layer_sizes, tol_low, tol_high, x_val, y_val, 2)

Node 0 class 2 importance: -0.08659217877094971
Node 1 class 2 importance: -0.06983240223463688
Node 2 class 2 importance: 0.0
Node 3 class 2 importance: 0.0
Node 4 class 2 importance: 0.06145251396648044
Node 5 class 2 importance: 0.12988826815642462
Node 6 class 2 importance: 0.0
Node 7 class 2 importance: 0.0
Node 8 class 2 importance: 0.0
Node 9 class 2 importance: 0.0
Node 10 class 2 importance: 0.0
Node 11 class 2 importance: 0.0
Node 12 class 2 importance: 0.0
Node 13 class 2 importance: -0.2094972067039106
Node 14 class 2 importance: 0.03910614525139666
Node 15 class 2 importance: -0.1005586592178771
Node 16 class 2 importance: 0.0
Node 17 class 2 importance: 0.0
Node 18 class 2 importance: 0.0
Node 19 class 2 importance: 0.0
Node 20 class 2 importance: 0.0
Node 21 class 2 importance: 0.0
Node 22 class 2 importance: 0.0
Node 23 class 2 importance: 0.0
Node 24 class 2 importance: 0.0572625698324023
Node 25 class 2 importance: 0.0
Node 26 class 2 importance: 0.0
Node 27 class 2 i

In [165]:
places[0]

([2,
  3,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  25,
  26,
  27,
  28,
  29,
  31,
  32,
  33,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  44,
  46,
  47,
  49,
  50,
  51,
  54,
  55,
  56,
  58,
  59,
  60,
  61,
  62],
 [4, 5, 14, 24, 34, 43, 45, 53],
 [0, 1, 13, 15, 30, 35, 48, 52, 57, 63])

In [172]:
best_weights, amounts, places = node_class_pruning(model, tester_model, x_val, y_val, layer_sizes, tol, 2, ig_cut)

Node 2 Importance: 0.0
Node 3 Importance: 0.0
Node 4 Importance: -0.1262328028678894
Node 5 Importance: -0.21309494972229004
Node 6 Importance: 0.0
Node 7 Importance: 0.0
Node 8 Importance: 0.0
Node 9 Importance: 0.0
Node 10 Importance: 0.0
Node 11 Importance: 0.0
Node 12 Importance: 3.361701965332031e-05
Node 14 Importance: -0.05073815584182739
Node 16 Importance: 0.0
Node 17 Importance: 0.0
Node 18 Importance: 0.0
Node 19 Importance: 0.0
Node 20 Importance: 0.0
Node 21 Importance: 0.0
Node 22 Importance: 0.0
Node 23 Importance: 0.0
Node 24 Importance: -0.08917170763015747
Node 25 Importance: 0.0
Node 26 Importance: 0.0
Node 27 Importance: 0.0
Node 28 Importance: 0.0
Node 29 Importance: 0.0
Node 31 Importance: 0.0
Node 32 Importance: 0.0
Node 33 Importance: 0.0
Node 34 Importance: -0.09444326162338257
Node 36 Importance: 0.0
Node 37 Importance: 0.0
Node 38 Importance: 0.0
Node 39 Importance: 0.0
Node 40 Importance: 0.0
Node 41 Importance: 0.0
Node 42 Importance: 0.0
Node 43 Importance

In [None]:
tester_model.set_weights(best_weights)

In [174]:
loss, acc = tester_model.evaluate(x_test, y_test)

y_pred = tester_model.predict(x_test)
class_acc = []
yp = np.argmax(y_pred, axis=1)
for i in range(K):
    a = np.mean((yp[y_test.reshape(-1) == i] == y_test[y_test == i]))
    class_acc.append(a)



In [175]:
loss

0.7773805856704712

In [176]:
acc

0.7353000044822693

In [177]:
class_acc

[0.772, 0.863, 0.606, 0.544, 0.657, 0.666, 0.823, 0.79, 0.852, 0.78]

In [179]:
y_pred = model.predict(x_test)
acc2 = []
yp = np.argmax(y_pred, axis=1)
for i in range(K):
    a = np.mean((yp[y_test.reshape(-1) == i] == y_test[y_test == i]))
    acc2.append(a)
print(acc2)

[0.845, 0.891, 0.47, 0.486, 0.627, 0.714, 0.861, 0.798, 0.864, 0.729]


In [188]:
ig_cut = -1e-5
bw, bas, bls, am, pl = node_pruning_class_loss(model, tester_model, x_val, y_val, layer_sizes, 0, 2, ig_cut)

Node 0 Importance: -0.019522727
Node 1 Importance: -0.018445969
Node 2 Importance: 0.0
Node 3 Importance: 0.0
Node 4 Importance: 0.011559606
Node 5 Importance: -0.013043433
Node 6 Importance: 0.0
Node 7 Importance: 0.0
Node 8 Importance: 0.0
Node 9 Importance: 0.0
Node 10 Importance: 0.0
Node 11 Importance: 0.0
Node 12 Importance: 0.0
Node 13 Importance: -0.049770966
Node 14 Importance: 0.0010525137
Node 15 Importance: -0.011515871
Node 16 Importance: 0.0
Node 17 Importance: 0.0
Node 18 Importance: 0.0
Node 19 Importance: 0.0
Node 20 Importance: 0.0
Node 21 Importance: 0.0
Node 22 Importance: 0.0
Node 23 Importance: 0.0
Node 24 Importance: -0.02328159
Node 25 Importance: 0.0
Node 26 Importance: 0.0
Node 27 Importance: 0.0
Node 28 Importance: 0.0
Node 29 Importance: 0.0
Node 30 Importance: 0.00065380335
Node 31 Importance: 0.0
Node 32 Importance: 0.0
Node 33 Importance: 0.0
Node 34 Importance: -0.0115106255
Node 35 Importance: -0.0009249747
Node 36 Importance: 0.0
Node 35 Importance: -0

In [193]:
tester_model.set_weights(bw)

loss, acc = tester_model.evaluate(x_test, y_test)

y_pred = tester_model.predict(x_test)
class_acc = []
yp = np.argmax(y_pred, axis=1)
for i in range(K):
    a = np.mean((yp[y_test.reshape(-1) == i] == y_test[y_test == i]))
    class_acc.append(a)



In [194]:
loss

1.0509511232376099

In [195]:
acc

0.6388000249862671

In [196]:
class_acc

[0.779, 0.433, 0.582, 0.077, 0.594, 0.788, 0.877, 0.767, 0.745, 0.746]

In [206]:
ig_cut = -1e-5
bw, bas, bls, am, pl = node_pruning_class_loss(model, tester_model, x_val, y_val, layer_sizes, 0, 0, ig_cut)

Node 0 Importance: -0.010325789
Node 1 Importance: -0.023939714
Node 2 Importance: 0.0
Node 3 Importance: 0.0
Node 4 Importance: 0.020928875
Node 5 Importance: -0.03393334
Node 6 Importance: 0.0
Node 7 Importance: 0.0
Node 8 Importance: 0.0
Node 9 Importance: 0.0
Node 10 Importance: 0.0
Node 11 Importance: 0.0
Node 12 Importance: 0.0
Node 13 Importance: -0.021112204
Node 14 Importance: -0.0060062483
Node 15 Importance: -0.008146167
Node 16 Importance: 0.0
Node 17 Importance: 0.0
Node 18 Importance: 0.0
Node 19 Importance: 0.0
Node 20 Importance: 0.0
Node 21 Importance: 0.0
Node 22 Importance: 0.0
Node 23 Importance: 0.0
Node 24 Importance: -0.023126602
Node 25 Importance: 0.0
Node 26 Importance: 0.0
Node 27 Importance: 0.0
Node 28 Importance: 0.0
Node 29 Importance: 0.0
Node 30 Importance: 2.3424625e-05
Node 31 Importance: 0.0
Node 32 Importance: 0.0
Node 33 Importance: 0.0
Node 34 Importance: -0.0031661838
Node 35 Importance: -0.028169692
Node 36 Importance: 0.0
Node 37 Importance: 0.

In [207]:
tester_model.set_weights(bw)

loss, acc = tester_model.evaluate(x_test, y_test)

y_pred = tester_model.predict(x_test)
class_acc = []
yp = np.argmax(y_pred, axis=1)
for i in range(K):
    a = np.mean((yp[y_test.reshape(-1) == i] == y_test[y_test == i]))
    class_acc.append(a)



In [208]:
loss

0.947386622428894

In [209]:
acc

0.6721000075340271

In [210]:
class_acc

[0.766, 0.796, 0.546, 0.287, 0.495, 0.876, 0.802, 0.693, 0.802, 0.658]

In [211]:
bls

[0.13522112,
 0.13522112,
 0.13522112,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.11429225,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.114268824,
 0.11350553,
 0.11350553,
 0.11350553,
 0.11350553,
 0.11350553,
 0.11350553,
 0.11350553,
 0.11350553,
 0.11350553,
 0.112827785,
 0.112827785,
 0.11269963,
 0.11200803,
 0.111953236,
 0.11178929,
 0.11178929,
 0.11178929,
 0.11178929,
 0.11164742,
 0.11154707,
 0.11142693,
 0.111186706,
 0.111186706,
 0.11111382,
 0.111062,
 0.111062,
 0.111062,
 0.11086433,
 0.11086433,
 0.11086433,
 0.11086433,
 0.11086433,
 0.11086433,
 0.110816814,
 0.109826125,
 0.10982

In [134]:
best_weights, _, _, amounts, places, class_importances = node_pruning(model, tester_model, x_val, y_val, layer_sizes, tol, ig_cut, method='greedy',k)
tester_model.set_weights(best_weights)

pruned_loss, pruned_acc = tester_model.evaluate(x_test, y_test)

y_pred = tester_model.predict(x_test)
pruned_class_acc = []
yp = np.argmax(y_pred, axis=1)
for i in range(K):
    a = np.mean((yp[y_test.reshape(-1) == i] == y_test[y_test == i]))
    pruned_class_acc.append(a)

Node 0 Importance: -0.0789225697517395
Node 1 Importance: -0.13867038488388062
Node 2 Importance: 0.0
Node 3 Importance: 0.0
Node 4 Importance: -0.1262328028678894
Node 5 Importance: -0.21309494972229004
Node 6 Importance: 0.0
Node 7 Importance: 0.0
Node 8 Importance: 0.0
Node 9 Importance: 0.0
Node 10 Importance: 0.0
Node 11 Importance: 0.0
Node 12 Importance: 3.361701965332031e-05
Node 13 Importance: -0.05307281017303467
Node 14 Importance: -0.05073815584182739
Node 15 Importance: -0.10720211267471313
Node 16 Importance: 0.0
Node 17 Importance: 0.0
Node 18 Importance: 0.0
Node 19 Importance: 0.0
Node 20 Importance: 0.0
Node 21 Importance: 0.0
Node 22 Importance: 0.0
Node 23 Importance: 0.0
Node 24 Importance: -0.08917170763015747
Node 25 Importance: 0.0
Node 26 Importance: 0.0
Node 27 Importance: 0.0
Node 28 Importance: 0.0
Node 29 Importance: 0.0
Node 30 Importance: 0.0019381046295166016
Node 31 Importance: 0.0
Node 32 Importance: 0.0
Node 33 Importance: 0.0
Node 34 Importance: -0.0

In [135]:
print(f'Original loss: {or_loss}')
print(f'Loss after pruning based on the loss: {pruned_loss}')

Original loss: 0.8323136568069458
Loss after pruning based on the loss: 0.7854679226875305


In [136]:
print(f'Original accuracy: {or_acc}')
print(f'Accuracy after pruning based on the loss: {pruned_acc}')

Original accuracy: 0.7285000085830688
Accuracy after pruning based on the loss: 0.7337999939918518


In [141]:
print('######## Who removed each node ########')
for l in range(len(layer_sizes)):
    print(f'####### LAYER {5-l} #######')
    for i, node in enumerate(places[l]):
        print(f"Node {node}: {class_importances[l][i]}")

######## Who removed each node ########
####### LAYER 5 #######
Node 2: []
Node 3: []
Node 6: []
Node 7: []
Node 8: []
Node 9: []
Node 10: []
Node 11: []
Node 12: []
Node 16: []
Node 17: []
Node 18: []
Node 19: []
Node 20: []
Node 21: []
Node 22: []
Node 23: []
Node 25: []
Node 26: []
Node 27: []
Node 28: []
Node 29: []
Node 30: [0, -1, -2, 3, 4, 5, -7, 8, 9]
Node 31: []
Node 32: []
Node 33: []
Node 36: []
Node 37: []
Node 38: []
Node 39: []
Node 40: []
Node 41: []
Node 42: []
Node 44: []
Node 46: []
Node 47: []
Node 49: []
Node 50: [-4]
Node 51: []
Node 54: []
Node 55: []
Node 56: []
Node 58: []
Node 59: []
Node 60: []
Node 61: []
Node 62: []
####### LAYER 4 #######
Node 0: [0, -1, 2, -3, 4, -5, -6, -7, -8, 9]
Node 1: []
Node 3: [-1, 2, 3, -4, 5, 6, -7, -8, 9]
Node 4: [0, -1, 2, 3, -4, -5, -6, -7, 8, -9]
Node 6: [0, 1, -2, 3, 4, -5, -6, 7, -8, 9]
Node 7: []
Node 8: []
Node 9: [0, 2, -3, -4, 5, 6, -8, 9]
Node 10: [2, -3, -4, -5, 6, 7, -8, 9]
Node 11: []
Node 15: [0, 2, -8, 9]
Node 17: 

In [144]:
to_rem = []
w_t = model.get_weights()
weight_len = len(w_t) - 3
for l in range(len(layer_sizes)):
    for i, imp in enumerate(class_importances[l]):
        if 2 in imp:
            w_t[weight_len - (2*l+1)][...,places[l][i]] = 0
            w_t[weight_len - 2*l][places[l][i]] = 0

In [145]:
tester_model.set_weights(w_t)

In [146]:
print(pruned_class_acc)

[0.802, 0.832, 0.534, 0.534, 0.708, 0.695, 0.809, 0.784, 0.851, 0.789]


In [147]:
y_pred = tester_model.predict(x_val)
acc = []
yp = np.argmax(y_pred, axis=1)
for i in range(K):
    a = np.mean((yp[y_val.reshape(-1) == i] == y_val[y_val == i]))
    acc.append(a)
print(acc)

[0.7973333333333333, 0.8753213367609255, 0.6145251396648045, 0.5043370508054523, 0.6675824175824175, 0.7059620596205962, 0.8322056833558863, 0.7802631578947369, 0.8576972833117723, 0.8073136427566807]


In [118]:
sum(yp[y_val.reshape(-1) == 9] == y_val[y_val == 9])

366

In [119]:
len(y_val[y_val == 0])

750

In [120]:
len(y_val[yp == 0])

578

In [121]:
y_class = np.array([1 if yp == 0 else 0 for yp in y_val])
y_pred = tester_model.predict(x_val)
y_pred_flat = np.argmax(y_pred, axis=1)
y_pred_class = np.array([1 if yp == 0 else 0 for yp in y_pred_flat])
na = (y_pred_class == y_class).mean()
print(na)

0.944


In [77]:
y_pred_class

array([1, 1, 1, ..., 1, 1, 1])

In [78]:
y_pred_class == y_class

array([False, False, False, ..., False,  True, False])

In [46]:
sum(y_pred_class)

666

In [47]:
sum(y_class)

711

In [122]:
class_acc = np.array([1,2,3,1,2,3])
temp = np.array([1,5,7,6,43,56])
print(class_acc)
class_acc = temp
print(class_acc)
temp = np.zeros(6)
print(class_acc)

[1 2 3 1 2 3]
[ 1  5  7  6 43 56]
[ 1  5  7  6 43 56]


In [123]:
print(temp)

[0. 0. 0. 0. 0. 0.]


In [181]:
y_pred[:,2]

array([2.8329187e-03, 7.6660450e-07, 2.2183869e-03, ..., 4.5126255e-04,
       1.1181285e-03, 2.6334104e-05], dtype=float32)