# Visualization of Layer Activation

In [1]:
import torch
from utils import data_generator
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
import os
import matplotlib.gridspec as gridspec
from utils import get_batch, make_experiment_dir

In [2]:
use_pgf = True
# dataset_selection = "B"
dataset_selection = "A"

## Configuration and global variables

In [3]:
matplotlib.rcParams.update({'font.size': 12})
plt.rcParams['svg.fonttype'] = 'none'

if use_pgf:
    matplotlib.use("pgf")
    pgf_with_rc_fonts = {
        "font.family": "serif",
        "font.serif": [],                   # use latex default serif font
        "font.sans-serif": ["DejaVu Sans"], # use a specific sans-serif font
    }
    matplotlib.rcParams.update(pgf_with_rc_fonts)
    matplotlib.rcParams.update({'pgf.rcfonts': False})

In [4]:
if dataset_selection == "B":
    model_path = "./Activation_Visualization/BnA__activation_visu__models/BnA/Iteration_0/0_base_B/model.pt"
elif dataset_selection == "A":
    model_path = "./Activation_Visualization/AnB__activation_visu__models/BnA/Iteration_0/0_base_B/model.pt"
    
experiments_path = "./Activation_Visualization/Experiments/"
inputs = {}
activations = {}
forward_hooks = []
input_sequences = []
input_sequence_name_mapping = {}

In [5]:
dataset_config = {
    'test_size': 0.2,
    'train_valid_split': [1, 0],
    'sampling_size': 1,
    # 'data_file_name': r'./Transferability_Investigation/synthetic_data.p',
    'data_file_name': r'./data/Synthetic/synthetic_data.p',
    # 'data_file_name': r'./data/Synthetic/synthetic_data_reverse.p',
    # 'data_file_name': r'./data/household_power_consumption.p',
    # 'data_file_name': r'./data/household_power_consumption_diff.p',
    'sample_frequency': 1,
    'max_data': 9999999,
    'target_variables': ['Soll'],
    # 'target_variables': ['Global_active_power [Soll]', 'Global_reactive_power [Soll]', 'Voltage [Soll]',
    #                      'Global_intensity [Soll]', 'Sub_metering_1 [Soll]', 'Sub_metering_2 [Soll]',
    #                      'Sub_metering_3 [Soll]'],
    'seed': 1,
    'series_x': '11',
    'series_y': '11',
    'pca': False,
    'batch_size': 1,
    'cuda': True,
}

if dataset_selection == "B":
    dataset_config['data_file_name'] = r'./data/Synthetic/synthetic_data.p'
elif dataset_selection == "A":
    dataset_config['data_file_name'] = r'./data/Synthetic/synthetic_data_reverse.p'

## Functions to register hooks behind ReLU and Blocks

In [6]:
def add_forward_hook_function(name):
    activations[name] = []
    inputs[name] = []
    def hook_function(module, input, output):
        activations[name].append(output[0].data.numpy())
        inputs[name].append(input[0].data.numpy())
    hook_function.__name__ = "hook_"+name
    forward_hooks.append(hook_function)
    return hook_function

In [7]:
def register_hooks(model):
    for resBlock_idx in range(len(model.tcn.network)):
        hook_func_relu_1 = add_forward_hook_function("relu_"+str(resBlock_idx)+"_1")
        hook_func_relu_2 = add_forward_hook_function("relu_"+str(resBlock_idx)+"_2")
        hook_func_out = add_forward_hook_function("out_"+str(resBlock_idx))
        model.tcn.network[resBlock_idx].relu1.register_forward_hook(hook_func_relu_1)
        model.tcn.network[resBlock_idx].relu2.register_forward_hook(hook_func_relu_2)
        model.tcn.network[resBlock_idx].register_forward_hook(hook_func_out)
        # model.tcn.network[resBlock_idx].conv1.register_forward_hook(hook_func1)
        # model.tcn.network[resBlock_idx].conv2.register_forward_hook(hook_func2)
        # model.tcn.network[resBlock_idx].conv1.bias.data.fill_(0)
        # model.tcn.network[resBlock_idx].conv2.bias.data.fill_(0)
        # if model.tcn.network[resBlock_idx].downsample is not None:
        #     model.tcn.network[resBlock_idx].downsample.bias.data.fill_(0)
        if model.tcn.network[resBlock_idx].downsample is not None:
            hook_func_downsample = add_forward_hook_function("downsample_" + str(resBlock_idx))
            model.tcn.network[resBlock_idx].downsample.register_forward_hook(hook_func_downsample)

## Plot Functions

### Layer Activation Plots

In [8]:
def plot_layer_activations(output_path=None):
    x = np.arange(num_resblocks)
    x_ticks = []
    y = []
    for input_sequence_idx in range(len(input_sequences)):
        output_activation = activations["out_"+str(num_resblocks-1)][input_sequence_idx].sum()
        input_activation = torch.sum(input_sequences[input_sequence_idx])*50
        layer_activations = []
        for resblock in range(num_resblocks):
            layer_activations.append(activations["relu_"+str(resblock)+"_2"][input_sequence_idx].sum() / (output_activation-input_activation))
            x_ticks.append("Residual Block"+str(resblock+1))
        y.append(layer_activations)
    y = np.array(y)

    fig, ax = plt.subplots(figsize=(10, 6.8))
    for input in range(y.shape[0]):
        ax.plot(y[input], label=input_sequence_name_mapping[input])
    # ax.plot(np.mean(y, axis=0), linewidth=4, label="Mean")
    ax.set_xticks(x)
    ax.set_xticklabels(x_ticks)
    ax.set_ylabel("Module Influence")
    ax.set_xlabel("Network Module")
    ax.grid()
    plt.legend()
    plt.tight_layout()
    
    if use_pgf:
        fig.savefig(os.path.join(output_path, "module_influence.pgf"), transparency=True)
    else:
        fig.savefig(os.path.join(output_path, "module_influence.svg"))

### Residual Block Activation

In [9]:
def plot_res_block_activations(input_sequence_idx, num_columns=10, neurons_to_plot=None, sequence_clip=50, plot_path=None):
    if neurons_to_plot is None:
        neurons_to_plot = [range(num_neurons[2*resblock + 1]) for resblock in range(num_resblocks)]
    elif type(neurons_to_plot) is int:
        neurons_to_plot = [range(neurons_to_plot) for resblock in range(num_resblocks)]
    elif hasattr(neurons_to_plot, '__iter__'):
        if len(neurons_to_plot) != num_resblocks:
            raise ValueError('List neurons_to_plot must be of length num_resblocks!')
    else:
        raise ValueError("Parameter neurons_to_plot must be either None or int or iterable")

    # num_rows = int(np.sum([len(neurons_to_plot[resblock]) for resblock in range(num_resblocks)] + len(neurons_to_plot[-1])) / num_columns)
    num_rows = int(np.sum([len(neurons_to_plot[resblock]) for resblock in range(num_resblocks)]) / num_columns)
    # height_ratios = [2] + [1] * (num_rows + num_resblocks)
    height_ratios = ([1.5] + [1] + ([1] * int(num_rows/num_resblocks) + [0.5]) * num_resblocks)[:-1]
    # width_ratios = [1/7 * num_columns] + [1] * num_columns
    width_ratios = [0.6] + [1] * num_columns
    
    print("Num rows: {}, Num cols: {}".format(num_rows, num_columns))
    print("Height_ratios: ", height_ratios)
    
    figure, axs = plt.subplots(num_rows + 1 + num_resblocks, num_columns + 1, sharex=True, sharey=True,
                               gridspec_kw={'height_ratios': height_ratios, 'width_ratios': width_ratios},
                               figsize=(20, 13))
    gs = axs[0, 0].get_gridspec()
    for ax in axs[0, :]:
        ax.remove()
    for resblock in range(num_resblocks):
        for ax in axs[1 + int(resblock*(num_rows/num_resblocks+1)), :]:
            ax.remove()

    input_sequence = input_sequences[input_sequence_idx][0, 0, :].numpy()
    input_len = len(input_sequence)

    input_ax = figure.add_subplot(gs[0, 1:])
    input_ax.plot(input_sequence, 'ro', c='r', ms=2)
    label_ax_inp = figure.add_subplot(gs[0, 0])
    label_ax_inp.axis('off')
    # label_ax_inp.set_ylim((0, 1))
    # label_ax_inp.set_xlim((0, 1))
    label_ax_inp.text(0, 0.5, "Input")

    neuron_counter = 0
    for resblock in range(num_resblocks):
        start_row = 2 + int(resblock * (num_rows / num_resblocks + 1))
        end_row = start_row + int(len(neurons_to_plot[resblock]) / num_columns)
        for ax in axs[start_row:end_row, 0]:
            ax.remove()
        label_ax_resblock = figure.add_subplot(gs[start_row:end_row, 0])
        label_ax_resblock.axis('off')
        # label_ax_resblock.set_ylim((0, 1))
        # label_ax_resblock.set_xlim((0, 1))
        label_ax_resblock.text(0, 0.5, "Block {}".format(resblock+1), fontsize = 14)
        for idx, neuron_idx in enumerate(neurons_to_plot[resblock]):
            row_coordiante = int(neuron_counter / num_columns) + resblock + 2
            col_coordiante = 1 + (neuron_counter) % num_columns
            activation_abs_sum = np.sum(
                activations["relu_"+str(resblock)+"_2"][input_sequence_idx][neurons_to_plot[resblock][idx], :])
            if activation_abs_sum != 0:
                axs[row_coordiante, col_coordiante].plot(
                    activations["relu_"+str(resblock)+"_2"][input_sequence_idx][neurons_to_plot[resblock][idx], :],
                    linewidth=1)
            else:
                axs[row_coordiante, col_coordiante].set_facecolor('darkgray')
            axs[row_coordiante, col_coordiante].text(
                0.5, 0.78, '{0:.2f}'.format(activation_abs_sum), horizontalalignment='center',
                verticalalignment='center', transform=axs[row_coordiante, col_coordiante].transAxes, fontsize = 9)
            if col_coordiante == 1:
                axs[row_coordiante, col_coordiante].yaxis.set_tick_params(which='both', labelleft=True)
            neuron_counter = neuron_counter + 1

    if sequence_clip > 0:
        axs[2,1].set_xlim(0, sequence_clip-1)
    elif sequence_clip < 0:
        axs[2, 1].set_xlim(input_len+sequence_clip-1, input_len-1)
        
    #plt.tight_layout()

    if plot_path is None:
        plt.show()
    else:
        if use_pgf:
            figure.savefig(os.path.join(plot_path, "activation_maps__"+input_sequence_name_mapping[input_sequence_idx].replace(' ', '_')+".pgf"), transparency=True, bbox_inches='tight')
        else:
            figure.savefig(os.path.join(plot_path, "activation_maps__"+input_sequence_name_mapping[input_sequence_idx].replace(' ', '_')+".svg"), format='svg')

### Relative Neuron Activation over Network Modules

In [10]:
def plot_relative_neuron_activation(input_sequence_idx=0, output_path=None, num_neurons_to_plot=None):
    x = np.arange(num_resblocks + 3)
    x_ticks = []
    y = []
    
    plot_neurons = np.arange(num_neurons[0])
    if num_neurons_to_plot is not None:
        plot_neurons = get_max_activated_channel_manipulations(idx)[:, :num_neurons_to_plot, 0].astype(int)
    labels = ["Channel {}".format(i+1) for i in plot_neurons]
        
    for neuron_idx in range(num_neurons[0]):
        neuron_activations = []
        neuron_activations.append(1)
        neuron_activations.append(abs(activations['downsample_0'][input_sequence_idx][neuron_idx].sum()))
        # print(neuron_activations[-1])
        for resblock in range(num_resblocks):
            neuron_activations.append(activations['out_'+str(resblock)][input_sequence_idx][neuron_idx].sum())
        neuron_activations.append(abs(activations['out_3'][input_sequence_idx][neuron_idx].sum()*model.linear.weight[0, neuron_idx].item()))
        y.append(neuron_activations)
    y = np.array(y)

    for col_idx in range(y.shape[1]):
        y[:, col_idx] = y[:, col_idx] / np.sum(y[:, col_idx])

    x_ticks.append("__")
    x_ticks.append("Init")
    x_ticks.append("Conv1D Downsample")
    for resblock in range(num_resblocks):
        x_ticks.append("ResBlock {}".format(resblock))
    x_ticks.append("Linear Layer")

    fig, ax = plt.subplots(figsize=(12,9))
    ax.stackplot(x, y, labels=labels)
    ax.set_xticklabels(x_ticks)
    ax.set_title("Channel Activation Plot: {}".format(input_sequence_name_mapping[input_sequence_idx]))
    ax.set_xlabel("Network Module")
    ax.set_ylabel("Fraction of the total Activation")
    # plt.legend()
    if output_path is not None:
        if use_pgf:
            fig.savefig(os.path.join(output_path, input_sequence_name_mapping[input_sequence_idx])+".pgf", transparency=True)
        else:
            fig.savefig(os.path.join(output_path, input_sequence_name_mapping[input_sequence_idx])+".svg", trformat='svg')
    else:
        plt.show()

## Helper Functions

### List of maximally activated Neurons

In [11]:
def get_max_activated_channel_manipulations(input_sequence_idx, reverse=True):
    result = []
    for resblock in range(num_resblocks):
        layer_result = []
        for neuron_idx in range(num_neurons[resblock]):
            layer_result.append([neuron_idx, activations["relu_"+str(resblock)+"_2"][input_sequence_idx][neuron_idx].sum()])
        layer_result_sorted = sorted(layer_result,key=lambda x: x[1], reverse=reverse)
        result.append(layer_result_sorted)
    return np.array(result)

## Load the model

In [12]:
model = torch.load(open(model_path, 'rb'), map_location='cpu')
model.eval()

num_resblocks = len(model.tcn.network)
num_layers = num_resblocks*2
num_neurons = []
for resblock in range(num_resblocks):
    num_neurons.append(model.tcn.network[resblock].conv1.bias.shape[0])
    num_neurons.append(model.tcn.network[resblock].conv2.bias.shape[0])
    
register_hooks(model)

## Generate input sequences

In [13]:
# Set params signal length and period length for the signal generation
signal_len = 200
period_len = 10

### Load Training Data

In [14]:
data, x_labels, y_labels, pca_scaler = data_generator(dataset_config)

x_train = data.x_train
x_test = data.x_test
x_tune = data.x_valid[0]
y_train = data.y_train
y_test = data.y_test
y_tune = data.y_valid[0]

### Generate Sequences

In [15]:
input_sequences.append(torch.zeros((1, 1, signal_len)).index_fill(2, torch.LongTensor([4]), 1))
input_sequence_name_mapping[len(input_sequences)-1] = 'Dirac Function'

input_sequences.append(torch.Tensor([((i) % period_len) / period_len for i in range(signal_len)]).reshape((1, 1, signal_len)))
input_sequence_name_mapping[len(input_sequences) - 1] = 'Sawtooth'

input_sequences.append(torch.Tensor(np.sin(np.arange(signal_len) * 2 * np.pi / period_len) + 1).reshape((1, 1, signal_len)))
input_sequence_name_mapping[len(input_sequences) - 1] = 'Sine'

input_sequences.append(torch.Tensor([1 if i % period_len < (period_len / 2) else 0 for i in range(signal_len)]).reshape((1, 1, signal_len)))
input_sequence_name_mapping[len(input_sequences) - 1] = 'Square'

input_sequences.append(torch.zeros((1, 1, signal_len)).fill_(1))
input_sequence_name_mapping[len(input_sequences) - 1] = 'Constant'

input_sequences.append(torch.Tensor(np.arange(0, signal_len / 10, 0.1)).reshape((1, 1, signal_len)))
input_sequence_name_mapping[len(input_sequences) - 1] = 'Linear'

input_sequences.append(torch.Tensor([abs(((i) % period_len) / period_len - 0.5) for i in range(signal_len)]).reshape((1, 1, signal_len)))
input_sequence_name_mapping[len(input_sequences) - 1] = 'Triangular'

input_sequences.append(get_batch(x_train, y_train, int(np.random.rand() * (x_train.shape[2] - signal_len)), signal_len)[0][0:1, 1:2, :])
input_sequence_name_mapping[len(input_sequences) - 1] = 'Training Sequence'

## Feed sequences into the network

In [16]:
for idx in range(len(input_sequences)):
    model(input_sequences[idx])

## Generate Plots

In [17]:
# create experiment folder 
# experiment_dir = make_experiment_dir(experiments_path)
experiment_dir = './Activation_Visualization/Experiments/2019-07-19 15-01-48.156239__A__final/'

### ResBlock Activations

In [18]:
# Set params for clipping the output sequences and set number of columns in the plot
sequence_clip = 50
num_columns = 10

# only plot the num_neurons_to_plot <= 50 neurons with the highest activation
num_neurons_to_plot = 1
neurons_to_plot = None

In [19]:
for idx in range(len(input_sequences)):
    #neurons_to_plot = get_max_activated_channel_manipulations(idx)[:, :num_neurons_to_plot, 0].astype(int)
    plot_res_block_activations(input_sequence_idx=idx, num_columns=num_columns, neurons_to_plot=neurons_to_plot, sequence_clip=sequence_clip, plot_path=experiment_dir)

Num rows: 20, Num cols: 10
Height_ratios:  [1.5, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1]
Num rows: 20, Num cols: 10
Height_ratios:  [1.5, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1]
Num rows: 20, Num cols: 10
Height_ratios:  [1.5, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1]
Num rows: 20, Num cols: 10
Height_ratios:  [1.5, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1]
Num rows: 20, Num cols: 10
Height_ratios:  [1.5, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1]
Num rows: 20, Num cols: 10
Height_ratios:  [1.5, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1]
Num rows: 20, Num cols: 10
Height_ratios:  [1.5, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1]
Num rows: 20, Num cols: 10
Height_ratios:  [1.5, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 1, 1, 0.5,

In [20]:
for idx in range(len(input_sequences)):
    plot_relative_neuron_activation(input_sequence_idx=idx, output_path=None)

  % get_backend())


In [21]:
plot_layer_activations(experiment_dir)

In [22]:
for key in list(activations.keys()):
    inputs[key] = []
    activations[key] = []
input_sequences = []

for batch_idx in range(x_test.shape[2] - signal_len):
#for batch_idx in range(500):
    if batch_idx % 10 == 0:
        print(f"Predicting batch nr. {batch_idx} out of {x_test.shape[2] - signal_len}")
    batch = get_batch(x_test, y_test, batch_idx, signal_len)[0][0:1, 1:2, :]
    model(batch)

Predicting batch nr. 0 out of 1800
Predicting batch nr. 10 out of 1800
Predicting batch nr. 20 out of 1800
Predicting batch nr. 30 out of 1800
Predicting batch nr. 40 out of 1800
Predicting batch nr. 50 out of 1800
Predicting batch nr. 60 out of 1800
Predicting batch nr. 70 out of 1800
Predicting batch nr. 80 out of 1800
Predicting batch nr. 90 out of 1800
Predicting batch nr. 100 out of 1800
Predicting batch nr. 110 out of 1800
Predicting batch nr. 120 out of 1800
Predicting batch nr. 130 out of 1800
Predicting batch nr. 140 out of 1800
Predicting batch nr. 150 out of 1800
Predicting batch nr. 160 out of 1800
Predicting batch nr. 170 out of 1800
Predicting batch nr. 180 out of 1800
Predicting batch nr. 190 out of 1800
Predicting batch nr. 200 out of 1800
Predicting batch nr. 210 out of 1800
Predicting batch nr. 220 out of 1800
Predicting batch nr. 230 out of 1800
Predicting batch nr. 240 out of 1800
Predicting batch nr. 250 out of 1800
Predicting batch nr. 260 out of 1800
Predicting b

In [23]:
activation_sums = [np.zeros(50)]*4

activation_means = {}
for module_name in ['relu_0_2', 'relu_1_2', 'relu_2_2', 'relu_3_2']:
    activation_means[module_name] = np.array(activations[module_name]).sum(axis=2).mean(axis=0)

In [24]:
activation_mean_df = pd.DataFrame(activation_means)
print("The average activation of all manipulations for ResBlocks 1 through 4:")
#for i in activation_mean_df.round(2)['relu_3_2'].values:
#    print(i) 
activation_mean_df

The average activation of all manipulations for ResBlocks 1 through 4:


Unnamed: 0,relu_0_2,relu_1_2,relu_2_2,relu_3_2
0,492.303009,111.747643,127.880501,181.656265
1,353.012512,172.139587,87.859955,132.849289
2,149.799637,12.566501,0.0,22.105091
3,100.375992,14.659655,0.0,20.687305
4,0.0,158.685471,174.721634,229.592102
5,268.133636,89.65873,44.153465,0.0
6,228.321686,43.974369,4.009449,0.0
7,0.0,147.253433,133.082718,244.868179
8,214.698242,61.678093,63.911331,5.199739
9,139.492279,29.947077,64.393631,7.700817


In [25]:
activation_mean_df.corr()

Unnamed: 0,relu_0_2,relu_1_2,relu_2_2,relu_3_2
relu_0_2,1.0,0.092376,0.037263,0.018511
relu_1_2,0.092376,1.0,0.668465,0.789486
relu_2_2,0.037263,0.668465,1.0,0.754535
relu_3_2,0.018511,0.789486,0.754535,1.0


In [26]:
print("The channels with the highest manipulation activation for ResBlocks 1:")
activation_mean_df.sort_values('relu_0_2', axis=0, ascending=False).head(10)

The channels with the highest manipulation activation for ResBlocks 1:


Unnamed: 0,relu_0_2,relu_1_2,relu_2_2,relu_3_2
40,500.176056,1.401301,61.390606,0.000303
0,492.303009,111.747643,127.880501,181.656265
45,382.675903,146.768372,104.251625,87.193909
38,382.4823,130.437668,132.755768,119.150574
1,353.012512,172.139587,87.859955,132.849289
34,310.587494,93.553352,33.32185,0.0
35,288.470276,10.050556,66.402199,0.996154
13,285.970642,140.188019,199.46463,291.506653
21,278.284241,195.177338,2.099226,138.684143
16,274.848206,33.7822,7.552365,0.0


In [27]:
print("The channels with the highest manipulation activation for ResBlocks 2:")
activation_mean_df.sort_values('relu_1_2', axis=0, ascending=False).head(10)

The channels with the highest manipulation activation for ResBlocks 2:


Unnamed: 0,relu_0_2,relu_1_2,relu_2_2,relu_3_2
21,278.284241,195.177338,2.099226,138.684143
1,353.012512,172.139587,87.859955,132.849289
4,0.0,158.685471,174.721634,229.592102
24,222.088989,153.667923,164.420822,240.150635
7,0.0,147.253433,133.082718,244.868179
45,382.675903,146.768372,104.251625,87.193909
13,285.970642,140.188019,199.46463,291.506653
31,0.0,137.193405,223.404388,155.016678
38,382.4823,130.437668,132.755768,119.150574
32,221.743668,117.960732,49.70557,116.644386


In [28]:
print("The channels with the highest manipulation activation for ResBlocks 3:")
activation_mean_df.sort_values('relu_2_2', axis=0, ascending=False).head(10)

The channels with the highest manipulation activation for ResBlocks 3:


Unnamed: 0,relu_0_2,relu_1_2,relu_2_2,relu_3_2
31,0.0,137.193405,223.404388,155.016678
13,285.970642,140.188019,199.46463,291.506653
10,205.968246,89.079926,184.811157,8.867789
4,0.0,158.685471,174.721634,229.592102
24,222.088989,153.667923,164.420822,240.150635
7,0.0,147.253433,133.082718,244.868179
38,382.4823,130.437668,132.755768,119.150574
0,492.303009,111.747643,127.880501,181.656265
45,382.675903,146.768372,104.251625,87.193909
1,353.012512,172.139587,87.859955,132.849289


In [29]:
print("The channels with the highest manipulation activation for ResBlocks 4:")
activation_mean_df.sort_values('relu_3_2', axis=0, ascending=False).head(10)

The channels with the highest manipulation activation for ResBlocks 4:


Unnamed: 0,relu_0_2,relu_1_2,relu_2_2,relu_3_2
13,285.970642,140.188019,199.46463,291.506653
7,0.0,147.253433,133.082718,244.868179
24,222.088989,153.667923,164.420822,240.150635
4,0.0,158.685471,174.721634,229.592102
0,492.303009,111.747643,127.880501,181.656265
31,0.0,137.193405,223.404388,155.016678
21,278.284241,195.177338,2.099226,138.684143
1,353.012512,172.139587,87.859955,132.849289
38,382.4823,130.437668,132.755768,119.150574
32,221.743668,117.960732,49.70557,116.644386


In [30]:
for col in activation_mean_df.columns:
    print(f'{activation_mean_df[activation_mean_df[col]==0].shape[0]} dead neuron(s) in {col}')

4 dead neuron(s) in relu_0_2
2 dead neuron(s) in relu_1_2
7 dead neuron(s) in relu_2_2
13 dead neuron(s) in relu_3_2


In [31]:
activation_mean_cum_df = pd.DataFrame()
activation_mean_cum_df['relu_0_2'] = activation_mean_df['relu_0_2']
for block in range(1,4):
    activation_mean_cum_df['relu_'+str(block)+'_2'] = activation_mean_cum_df['relu_'+str(block-1)+'_2'] + activation_mean_df['relu_'+str(block)+'_2']

In [32]:
activation_mean_cum_df
x = activation_mean_df.columns
y = activation_mean_df.values

fig, ax = plt.subplots(figsize=(12,15))
ax.stackplot(x, y)
#ax.set_xticklabels(x_ticks)
#ax.set_title("Channel Activation Plot: {}".format(input_sequence_name_mapping[input_sequence_idx]))
ax.set_xlabel("Network Module")
ax.set_ylabel("Total Activation")

Text(0, 0.5, 'Total Activation')

In [33]:
np.std(np.array(activations['relu_3_2']).sum(axis=2), axis=0)

array([3.7096603e+01, 4.2919971e+01, 1.2004096e+01, 7.1927881e+00,
       5.1311390e+01, 0.0000000e+00, 0.0000000e+00, 5.0395615e+01,
       3.2524462e+00, 9.8824539e+00, 2.6324799e+00, 8.1413887e-02,
       1.6931885e+01, 7.5089684e+01, 3.1778183e+00, 0.0000000e+00,
       0.0000000e+00, 1.7011354e+00, 2.8357171e-02, 0.0000000e+00,
       0.0000000e+00, 3.9822254e+01, 6.6588593e+00, 6.2066932e+00,
       6.1474373e+01, 2.5121663e+00, 5.2011957e+00, 0.0000000e+00,
       2.4294126e+00, 0.0000000e+00, 6.4955544e+00, 5.8914665e+01,
       2.6994286e+01, 0.0000000e+00, 0.0000000e+00, 5.7051313e-01,
       4.1525860e+00, 2.0616915e+01, 1.6706116e+01, 2.0507543e+00,
       1.5261322e-03, 1.8715228e+00, 0.0000000e+00, 1.5115102e+01,
       2.6871278e+00, 2.2702076e+01, 0.0000000e+00, 1.9646498e+01,
       0.0000000e+00, 6.4080958e+00], dtype=float32)

In [34]:
np.mean(np.array(activations['relu_3_2']).sum(axis=2), axis=0)

array([1.8165627e+02, 1.3284929e+02, 2.2105091e+01, 2.0687305e+01,
       2.2959210e+02, 0.0000000e+00, 0.0000000e+00, 2.4486818e+02,
       5.1997395e+00, 7.7008171e+00, 8.8677893e+00, 4.6569478e-02,
       6.6467110e+01, 2.9150665e+02, 2.1287351e+00, 0.0000000e+00,
       0.0000000e+00, 5.1694221e+00, 8.6347619e-03, 0.0000000e+00,
       0.0000000e+00, 1.3868414e+02, 7.9492259e+00, 3.0789349e+01,
       2.4015063e+02, 2.1194975e+00, 2.1530846e+01, 0.0000000e+00,
       1.5235422e+00, 0.0000000e+00, 1.1791686e+01, 1.5501668e+02,
       1.1664439e+02, 0.0000000e+00, 0.0000000e+00, 9.9615359e-01,
       9.1794186e+00, 4.3738323e+01, 1.1915057e+02, 1.2593509e+00,
       3.0318619e-04, 9.3776321e-01, 0.0000000e+00, 8.5476913e+00,
       1.6754950e+01, 8.7193909e+01, 0.0000000e+00, 4.0190002e+01,
       0.0000000e+00, 6.2034407e+00], dtype=float32)

In [35]:
final_activation = np.array(activations['out_3']).mean(axis=0)
final_activation.shape

(50, 200)

In [36]:
## only to verify how the bias and linear weights are applied to the final resBlock output
#
#manual_result = []
#for time_step in range(200):
#    time_step_result = 0
#    for channel in range(50):
#        time_step_result += final_activation[channel, time_step] * model.linear.weight[0, channel].item()
#    time_step_result += model.linear.bias.item()
#    manual_result.append(time_step_result)
#manual_result = np.array(manual_result)
#true_result = model.linear(torch.Tensor(final_activation).view((1,50,200)).transpose(1,2)).transpose(1,2)[0, 0, :].detach().numpy()
#print(manual_result, "\n", true_result)

In [37]:
final_activation_mean = final_activation.sum(axis=1)
final_activation_mean.shape

(50,)

In [38]:
final_activation_mean_scaled = final_activation_mean * model.linear.weight[0].detach().numpy()
final_activation_mean_scaled

array([-25.791216 , -29.770973 ,   6.339761 ,   4.9586186, -12.819422 ,
         2.202875 ,   4.037536 , -17.158644 ,   6.8134437,   6.012519 ,
        -3.4208786,   7.0462146,   8.65969  , -19.969051 ,   2.9791844,
         3.0739796,   3.485335 ,   5.4982734,   6.2807584,   4.5516543,
         4.680796 , -17.39781  ,   4.780748 ,   6.6658893, -21.025614 ,
         5.6688247,   6.7976456,   0.2760217,   6.1693234,   5.441935 ,
         6.8244224, -10.218779 , -17.764706 ,   2.8205185,   0.8942052,
         4.26103  ,   5.4359984,   6.8160124, -29.810795 ,   5.1186614,
       -16.313679 ,   5.8977733,   2.509643 ,   5.0556064,   7.614464 ,
       -27.394215 ,   4.817239 ,   7.8838835,   5.5701647,   6.2224965],
      dtype=float32)

In [39]:
final_activation_means_df = pd.DataFrame()
final_activation_means_df['mean_out_3'] = final_activation_mean
final_activation_means_df['mean_out_3_scaled'] = np.abs(final_activation_mean_scaled)

In [40]:
matplotlib.rcParams.update({'font.size': 11})

In [41]:
final_activation_means_df_sorted_out3 = final_activation_means_df.sort_values('mean_out_3', axis=0, ascending=False)
labels = final_activation_means_df_sorted_out3.index.values
x = np.arange(final_activation_means_df_sorted_out3['mean_out_3'].values.shape[0])
y = final_activation_means_df_sorted_out3['mean_out_3'].values
fig, ax = plt.subplots(figsize=(15,6))
ax.plot(y)
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_xlabel('Channel', fontsize=18*0.8)
ax.set_ylabel('Activation', fontsize=18*0.8)
ax.set_title('Channel Activation after Residual Block 3')
plt.tight_layout()
#fig.savefig(os.path.join(experiment_dir, 'channel_activation_distribution_out_3.svg'), format='svg')
fig.savefig(os.path.join(experiment_dir, 'channel_activation_distribution_out_3.pgf'))
plt.show()

In [42]:
final_activation_means_df_sorted_out3_scaled = final_activation_means_df.sort_values('mean_out_3_scaled', axis=0, ascending=False)
labels = final_activation_means_df_sorted_out3_scaled.index.values
x = np.arange(final_activation_means_df_sorted_out3_scaled['mean_out_3_scaled'].values.shape[0])
y = final_activation_means_df_sorted_out3_scaled['mean_out_3_scaled'].values
fig, ax = plt.subplots(figsize=(10,4))
ax.plot(y)
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_xlabel('Channel', fontsize=18*0.8)
ax.set_ylabel('Activation', fontsize=18*0.8)
#ax.set_title('Channel Activation after the Linear Layer')
plt.tight_layout()
#fig.savefig(os.path.join(experiment_dir, 'channel_activation_distribution_out_linear.svg'), format='svg')
fig.savefig(os.path.join(experiment_dir, 'channel_activation_distribution_out_linear.pgf'))
plt.show()

In [43]:
experiment_dir

'./Activation_Visualization/Experiments/2019-07-19 15-01-48.156239__A__final/'