### Experiments for Section 2.4: SMI Behaviour in DNNs

In [None]:
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
import os
import json
import pickle

from utils import models
from utils import datasets
from utils import entropy_estimators as ee

In [None]:
tf.test.is_gpu_available(
    cuda_only=False, min_cuda_compute_capability=None
)

## MLP

Hyperparameters:

In [None]:
cfg = {}
cfg['dataset'] = 'Fashion_MNIST'
cfg['model'] = 'MLP'
cfg['width'] = [1024,1024,1024,1024]
cfg['optimizer'] = 'SGD'
cfg['learning_rate'] = 0.01
cfg['batch_size']    = 32
cfg['n_epochs'] = 50
cfg['patience'] = 20

cfg['dropout'] = []
cfg['noise_ratio'] = 0
cfg['batch_norm'] = []
cfg['weight_decay'] = []

Compute the SMI of each layer prior to training (epoch 0).

In [None]:
all_smi = []

for n_run in range(5):
    run = n_run+1
    print('Run:',run)
    arch =  '-'.join(map(str,cfg['width']))
    exp_name = 'dpi/'+cfg['model']+'_'+arch+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)
    if not os.path.exists(cfg['exp_name']):
        print("Making directory", cfg['exp_name'])
        os.makedirs(cfg['exp_name'])
    fname = cfg['exp_name'] + '/config.json'
    with open(fname, 'w') as f:
        json.dump(cfg, f)
    
    trn, tst = datasets.get_dataset(cfg)
    model = models.get_model(cfg, trn)
    if not os.path.exists(cfg['exp_name']+'/saved_models'):
        print("Making directory", cfg['exp_name']+'/saved_models')
        os.makedirs(cfg['exp_name']+'/saved_models')
    model.save(cfg['exp_name']+"/saved_models/model-00")
    
    smi_all_layers = []
    for l,layer in enumerate(model.layers):
        int_model = keras.Model(inputs=model.inputs, outputs=model.layers[l].output)
        activity = int_model.predict(trn.X[:10000])
        smi = ee.compute_smi(x=activity, y=trn.y[:10000], m=500)
        smi_all_layers.append(smi)
        print(f'Layer {l}: SI(T;Y) = {smi:.3f}')
    fname = cfg['exp_name']+'/smi_0'
    print("Saving", fname)
    with open(fname, 'wb') as f:
        pickle.dump(smi_all_layers, f, pickle.HIGHEST_PROTOCOL)
    all_smi.append(smi_all_layers)
    
fname = exp_name+'/smi_0'
print("Saving", fname)
with open(fname, 'wb') as f:
    pickle.dump(all_smi, f, pickle.HIGHEST_PROTOCOL)

Train the networks for 50 epochs and save the models at epoch 5 and epoch 50.

In [None]:
for n_run in range(5):
    tf.keras.backend.clear_session()
    run = n_run+1
    print('Run:',run)
    arch =  '-'.join(map(str,cfg['width']))
    exp_name = 'dpi/'+cfg['model']+'_'+arch+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)
    
    trn, tst = datasets.get_dataset(cfg)
    model = keras.models.load_model(cfg['exp_name']+"/saved_models/model-00")
    
    class CustomSaver(keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs={}):
            if epoch == 5:
                self.model.save(cfg['exp_name']+"/saved_models/model-05")
    
    r = model.fit(x=trn.X, y=trn.Y, 
              verbose    = 1, 
              batch_size = cfg['batch_size'],
              epochs = 50,
              callbacks=[CustomSaver()])
    model.save(cfg['exp_name']+"/saved_models/model-50")

Compute the SMI for each layer at epoch 5.

In [None]:
all_smi = []

for n_run in range(5):
    tf.keras.backend.clear_session()
    run = n_run+1
    print('Run:',run)
    arch =  '-'.join(map(str,cfg['width']))
    exp_name = 'dpi/'+cfg['model']+'_'+arch+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)
    
    model = keras.models.load_model(cfg['exp_name']+"/saved_models/model-05")

    smi_all_layers = []
    for l,layer in enumerate(model.layers):
        int_model = keras.Model(inputs=model.inputs, outputs=model.layers[l].output)
        activity = int_model.predict(trn.X[:10000])
        smi = ee.compute_smi(x=activity, y=trn.y[:10000], m=500)
        smi_all_layers.append(smi)
        print('Layer %d: SI(T;Y) = %0.3f'%(l,smi))
    fname = cfg['exp_name']+'/smi_5'
    print("Saving", fname)
    with open(fname, 'wb') as f:
        pickle.dump(smi_all_layers, f, pickle.HIGHEST_PROTOCOL)
    all_smi.append(smi_all_layers)
    
fname = exp_name+'/smi_5'
print("Saving", fname)
with open(fname, 'wb') as f:
    pickle.dump(all_smi, f, pickle.HIGHEST_PROTOCOL)

Compute the SMI for each layer at epoch 50:

In [None]:
all_smi = []

for n_run in range(2):
    tf.keras.backend.clear_session()
    run = n_run+1
    print('Run:',run)
    arch =  '-'.join(map(str,cfg['width']))
    exp_name = 'dpi/'+cfg['model']+'_'+arch+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)
    
    model = keras.models.load_model(cfg['exp_name']+"/saved_models/model-50")

    smi_all_layers = []
    for l,layer in enumerate(model.layers):
        int_model = keras.Model(inputs=model.inputs, outputs=model.layers[l].output)
        activity = int_model.predict(trn.X[:10000])
        smi = ee.compute_smi(x=activity, y=trn.y[:10000], m=500)
        smi_all_layers.append(smi)
        print('Layer %d: SI(T;Y) = %0.3f'%(l,smi))
    fname = cfg['exp_name']+'/smi_50'
    print("Saving", fname)
    with open(fname, 'wb') as f:
        pickle.dump(smi_all_layers, f, pickle.HIGHEST_PROTOCOL)
    all_smi.append(smi_all_layers)
    
fname = exp_name+'/smi_50'
print("Saving", fname)
with open(fname, 'wb') as f:
    pickle.dump(all_smi, f, pickle.HIGHEST_PROTOCOL)

## CNN

Hyperparameters:

In [None]:
cfg = {}
cfg['dataset'] = 'Fashion_MNIST'
cfg['model'] = 'CNN_Global'
cfg['width'] = [32,32,32,32,32]
cfg['optimizer'] = 'SGD'
cfg['learning_rate'] = 0.01
cfg['batch_size']    = 32
cfg['n_epochs'] = 50
cfg['patience'] = 20

cfg['dropout'] = []
cfg['noise_ratio'] = 0
cfg['batch_norm'] = []

Compute the SMI of each layer prior to training (epoch 0).

In [None]:
all_smi = []

for n_run in range(5):
    run = n_run+1
    print('Run:',run)
    arch =  '-'.join(map(str,cfg['width']))
    exp_name = 'dpi/'+cfg['model']+'_'+arch+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)
    if not os.path.exists(cfg['exp_name']):
        print("Making directory", cfg['exp_name'])
        os.makedirs(cfg['exp_name'])
    fname = cfg['exp_name'] + '/config.json'
    with open(fname, 'w') as f:
        json.dump(cfg, f)
    
    trn, tst = datasets.get_dataset(cfg)
    model = models.get_model(cfg, trn)
    if not os.path.exists(cfg['exp_name']+'/saved_models'):
        print("Making directory", cfg['exp_name']+'/saved_models')
        os.makedirs(cfg['exp_name']+'/saved_models')
    model.save(cfg['exp_name']+"/saved_models/model-00")
    
    smi_all_layers = []
    count = 0
    for l,layer in enumerate(model.layers):
        if isinstance(layer, keras.layers.InputLayer) or isinstance(layer, keras.layers.Activation) or isinstance(layer, keras.layers.GlobalAveragePooling2D):
            int_model = keras.Model(inputs=model.inputs, outputs=model.layers[l].output)
            activity = int_model.predict(trn.X[:10000])
            if len(activity.shape) > 2:
                activity = activity.reshape(activity.shape[0],-1)
            smi = ee.compute_smi(x=activity, y=trn.y[:10000], m=500)
            smi_all_layers.append(smi)
            print(f'Layer {count}: SI(T;Y) = {smi:.3f}')
            count += 1
    fname = cfg['exp_name']+'/smi_0'
    print("Saving", fname)
    with open(fname, 'wb') as f:
        pickle.dump(smi_all_layers, f, pickle.HIGHEST_PROTOCOL)
    all_smi.append(smi_all_layers)
    
fname = exp_name+'/smi_0'
print("Saving", fname)
with open(fname, 'wb') as f:
    pickle.dump(all_smi, f, pickle.HIGHEST_PROTOCOL)

Train the networks for 50 epochs and save the models at epoch 5 and epoch 50.

In [None]:
for n_run in range(5):
    tf.keras.backend.clear_session()
    run = n_run+1
    print('Run:',run)
    arch =  '-'.join(map(str,cfg['width']))
    exp_name = 'dpi/'+cfg['model']+'_'+arch+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)
    
    trn, tst = datasets.get_dataset(cfg)
    model = keras.models.load_model(cfg['exp_name']+"/saved_models/model-00")
    
    class CustomSaver(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if epoch == 5:
            self.model.save(cfg['exp_name']+"/saved_models/model-05")
    
    r = model.fit(x=trn.X, y=trn.Y, 
              verbose    = 1, 
              batch_size = cfg['batch_size'],
              epochs = 50,
              callbacks=[CustomSaver()])
    model.save(cfg['exp_name']+"/saved_models/model-50")

Compute the SMI for each layer at epoch 5.

In [None]:
all_smi = []

for n_run in range(5):
    tf.keras.backend.clear_session()
    run = n_run+1
    print('Run:',run)
    arch =  '-'.join(map(str,cfg['width']))
    exp_name = 'dpi/'+cfg['model']+'_'+arch+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)
    
    model = keras.models.load_model(cfg['exp_name']+"/saved_models/model-05")

    smi_all_layers = []
    count = 0
    for l,layer in enumerate(model.layers):
        if isinstance(layer, keras.layers.InputLayer) or isinstance(layer, keras.layers.Activation) or isinstance(layer, keras.layers.GlobalAveragePooling2D):
            int_model = keras.Model(inputs=model.inputs, outputs=model.layers[l].output)
            activity = int_model.predict(trn.X[:10000])
            if len(activity.shape) > 2:
                activity = activity.reshape(activity.shape[0],-1)
            smi = ee.compute_smi(x=activity, y=trn.y[:10000], m=500)
            smi_all_layers.append(smi)
            print(f'Layer {count}: SI(T;Y) = {smi:.3f}')
            count += 1
    fname = cfg['exp_name']+'/smi_5'
    print("Saving", fname)
    with open(fname, 'wb') as f:
        pickle.dump(smi_all_layers, f, pickle.HIGHEST_PROTOCOL)
    all_smi.append(smi_all_layers)
    
fname = exp_name+'/smi_5'
print("Saving", fname)
with open(fname, 'wb') as f:
    pickle.dump(all_smi, f, pickle.HIGHEST_PROTOCOL)

Compute the SMI for each layer at epoch 50.

In [None]:
all_smi = []

for n_run in range(5):
    tf.keras.backend.clear_session()
    run = n_run+1
    print('Run:',run)
    arch =  '-'.join(map(str,cfg['width']))
    exp_name = 'dpi/'+cfg['model']+'_'+arch+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)
    
    model = keras.models.load_model(cfg['exp_name']+"/saved_models/model-50")

    smi_all_layers = []
    count = 0
    for l,layer in enumerate(model.layers):
        if isinstance(layer, keras.layers.InputLayer) or isinstance(layer, keras.layers.Activation) or isinstance(layer, keras.layers.GlobalAveragePooling2D):
            int_model = keras.Model(inputs=model.inputs, outputs=model.layers[l].output)
            activity = int_model.predict(trn.X[:10000])
            if len(activity.shape) > 2:
                activity = activity.reshape(activity.shape[0],-1)
            smi = ee.compute_smi(x=activity, y=trn.y[:10000], m=500)
            smi_all_layers.append(smi)
            print(f'Layer {count}: SI(T;Y) = {smi:.3f}')
            count += 1
    fname = cfg['exp_name']+'/smi_50'
    print("Saving", fname)
    with open(fname, 'wb') as f:
        pickle.dump(smi_all_layers, f, pickle.HIGHEST_PROTOCOL)
    all_smi.append(smi_all_layers)
    
fname = exp_name+'/smi_50'
print("Saving", fname)
with open(fname, 'wb') as f:
    pickle.dump(all_smi, f, pickle.HIGHEST_PROTOCOL)

## VGG

Hyperparameters:

In [None]:
cfg = {}
cfg['dataset'] = 'CIFAR10'
cfg['model'] = 'VGG16'
cfg['optimizer'] = 'SGD'
cfg['learning_rate'] = 0.001
cfg['batch_size']    = 32
cfg['n_epochs'] = 50
cfg['noise_ratio'] = 0

Compute the SMI of each layer prior to training (epoch 0).

In [None]:
all_smi = []

for n_run in range(5):
    run = n_run+1
    print('Run:',run)    
    exp_name = 'dpi/'+cfg['model']+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)
    if not os.path.exists(cfg['exp_name']):
        print("Making directory", cfg['exp_name'])
        os.makedirs(cfg['exp_name'])
    fname = cfg['exp_name'] + '/config.json'
    with open(fname, 'w') as f:
        json.dump(cfg, f)

    trn, tst = datasets.get_dataset(cfg)
    model = models.get_model(cfg, trn)
    if not os.path.exists(cfg['exp_name']+'/saved_models'):
        print("Making directory", cfg['exp_name']+'/saved_models')
        os.makedirs(cfg['exp_name']+'/saved_models')
    model.save(cfg['exp_name']+"/saved_models/model-00")

    smi_all_layers = []
    for l,layer in enumerate(model.layers):
        int_model = keras.Model(inputs=model.inputs, outputs=model.layers[l].output)
        activity = int_model.predict(trn.X[:10000])
        if len(activity.shape) > 2:
            activity = activity.reshape(activity.shape[0],-1)
        smi = ee.compute_smi(x=activity, y=trn.y[:10000], m=500)
        smi_all_layers.append(smi)
        print(f'Layer {model.layers[l].name}: SI(T;Y) = {smi:.3f}')

    fname = cfg['exp_name']+'/smi_0'
    print("Saving", fname)
    with open(fname, 'wb') as f:
        pickle.dump(smi_all_layers, f, pickle.HIGHEST_PROTOCOL)
    all_smi.append(smi_all_layers)
    
fname = exp_name+'/smi_0'
print("Saving", fname)
with open(fname, 'wb') as f:
    pickle.dump(all_smi, f, pickle.HIGHEST_PROTOCOL)

Train the networks for 50 epochs and save the models at epoch 5 and epoch 50.

In [None]:
for n_run in range(5):
    tf.keras.backend.clear_session()
    run = n_run+1
    print('Run:',run)
    exp_name = 'dpi/'+cfg['model']+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)

    trn, tst = datasets.get_dataset(cfg)
    model = keras.models.load_model(cfg['exp_name']+"/saved_models/model-00")

    class CustomSaver(keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs={}):
            if epoch == 5:
                self.model.save(cfg['exp_name']+"/saved_models/model-05")

    r = model.fit(x=trn.X, y=trn.Y, 
              verbose    = 1, 
              batch_size = cfg['batch_size'],
              epochs = cfg['n_epochs'],
              validation_data=(tst.X, tst.Y),
              callbacks=[CustomSaver()])
    model.save(cfg['exp_name']+"/saved_models/model-50")

Compute the SMI for each layer at epoch 5.

In [None]:
all_smi = []

for n_run in range(5):
    tf.keras.backend.clear_session()
    run = n_run+1
    print('Run:',run)
    exp_name = 'dpi/'+cfg['model']+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)

    model = keras.models.load_model(cfg['exp_name']+"/saved_models/model-05")

    smi_all_layers = []
    for l,layer in enumerate(model.layers):
        int_model = keras.Model(inputs=model.inputs, outputs=model.layers[l].output)
        activity = int_model.predict(trn.X[:10000])
        if len(activity.shape) > 2:
            activity = activity.reshape(activity.shape[0],-1)
        smi = ee.compute_smi(x=activity, y=trn.y[:10000], m=500)
        smi_all_layers.append(smi)
        print(f'Layer {model.layers[l].name}: SI(T;Y) = {smi:.3f}')

    fname = cfg['exp_name']+'/smi_5'
    print("Saving", fname)
    with open(fname, 'wb') as f:
        pickle.dump(smi_all_layers, f, pickle.HIGHEST_PROTOCOL)
    all_smi.append(smi_all_layers)
    
fname = exp_name+'/smi_5'
print("Saving", fname)
with open(fname, 'wb') as f:
    pickle.dump(all_smi, f, pickle.HIGHEST_PROTOCOL)

Compute the SMI for each layer at epoch 50.

In [None]:
all_smi = []

for n_run in range(5):
    tf.keras.backend.clear_session()
    run = n_run+1
    print('Run:',run)
    exp_name = 'dpi/'+cfg['model']+'_'+cfg['dataset']+'_'+cfg['optimizer']
    cfg['exp_name'] = exp_name + '/run_%d'%(run)

    model = keras.models.load_model(cfg['exp_name']+"/saved_models/model-50")

    smi_all_layers = []
    for l,layer in enumerate(model.layers):
        int_model = keras.Model(inputs=model.inputs, outputs=model.layers[l].output)
        activity = int_model.predict(trn.X[:10000])
        if len(activity.shape) > 2:
            activity = activity.reshape(activity.shape[0],-1)
        smi = ee.compute_smi(x=activity, y=trn.y[:10000], m=500)
        smi_all_layers.append(smi)
        print(f'Layer {model.layers[l].name}: SI(T;Y) = {smi:.3f}')

    fname = cfg['exp_name']+'/smi_50'
    print("Saving", fname)
    with open(fname, 'wb') as f:
        pickle.dump(smi_all_layers, f, pickle.HIGHEST_PROTOCOL)
    all_smi.append(smi_all_layers)
    
fname = exp_name+'/smi_50'
print("Saving", fname)
with open(fname, 'wb') as f:
    pickle.dump(all_smi, f, pickle.HIGHEST_PROTOCOL)  