# Generate graph for multiple benign and attacked imgs

## Import packages

In [1]:
# Set auto reload for python functions
%reload_ext autoreload
%autoreload 2

# Helper libraries
import os
import cv2
import glob
import tqdm
import json
import numpy as np
from time import time
import tensorflow as tf
import matplotlib.pyplot as plt
from networkx.readwrite import json_graph

# GoogLeNet
import lucid.modelzoo.vision_models as models

# Libraries provided by Massif project
import constant
import A_matrix
import I_matrix
import model_helper
import gen_graph

print(tf.VERSION)

1.15.0


Using TensorFlow backend.


## Import GoogLeNet (InceptionV1) model

In [2]:
googlenet = models.InceptionV1()
googlenet.load_graphdef()



















## Get constants and hyperparameters

In [3]:
args = constant.Args
arg_keys = [arg for arg in args.__dict__.keys() if '__' not in arg]

print('Constants and hyperparameters in args:')
print(arg_keys)

Constants and hyperparameters in args:
['gpu', 'batch_A', 'batch_I', 'layer', 'k_A', 'layer_sizes', 'layers', 'layer_blk_sizes', 'blk_headers', 'num_classes', 'img_width', 'img_height']


## Read benign images

In [4]:
benign_dirpath = '../data/sample-images/sample-benign'
correct_classname = 'badger'

benign_imgs = []
for benign_filename in os.listdir(benign_dirpath):
    if 'jpg' in benign_filename:
        benign_img = plt.imread('{}/{}'.format(benign_dirpath, benign_filename))
        benign_imgs.append(benign_img)

## Read attacked images with different strength

In [6]:
epss = [0.5, 1, 1.5, 2, 2.5, 3, 3.5]
attacked_imgs = {eps: [] for eps in epss}

for eps in epss:
    attacked_dirpath = '../data/sample-images/attacked-{}'.format(eps)
    for attacked_filename in os.listdir(attacked_dirpath):        
        if 'npy' in attacked_filename:
            img = np.load('{}/{}'.format(attacked_dirpath, attacked_filename))
            attacked_imgs[eps].append(img)

## A_matrix: generate nodes

### A_matrix of the **benign** images

In [5]:
# See if there have already been A-matrices
A_matrix_dirpath = '/Users/haekyu/data/massif/aggregated/badger-weasel/A-mat'
try:
    exist_A = False
    file_list = os.listdir(A_matrix_dirpath)
    for file in file_list:
        if 'benign' in file:
            exist_A = True
except:
    exist_A = False
print('exist_A:', exist_A)

exist_A: True


In [6]:
# Read A-matrix if it exists
if exist_A:
    with open('{}/{}'.format(A_matrix_dirpath, 'A-benign-badger-weasel.json')) as f:
        A_benign = json.load(f)
    for layer in args.layers:
        A_benign[layer] = list(map(lambda x: float(x), A_benign[layer]))

In [7]:
# Generate A-matrix if there is not any
if not exist_A:
    # Initialize A-matrix
    A_benign = A_matrix.init_A_matrix_single_class(args)

    # Get activation score of all neurons in all layers for all images
    act_scores_benign = model_helper.get_all_layers_activation_score(googlenet, benign_imgs, args.layers)

    # Generate A-matrix based on the activatin score
    for layer in args.layers:
        median_act_score_across_imgs = np.median(act_scores_benign[layer], axis=0)
        A_benign[layer] = median_act_score_across_imgs

    # Get string converted A-matrix to save it into json file
    str_A_benign = {layer: A_benign[layer].astype(str).tolist() for layer in args.layers}

    # Save the string converted A-matrix into json file
    targeted_classname = 'weasel'
    A_matrix_benign_dirpath = '/Users/haekyu/data/massif/aggregated/badger-weasel/A-mat'
    filename = '{}/A-benign-{}-{}.json'.format(A_matrix_benign_dirpath, correct_classname, targeted_classname)
    with open (filename, 'w') as f:
        json.dump(str_A_benign, f) 

### A_matrix of the **attacked** image with different strength

In [11]:
# Read A-matrices if they exist
if exist_A:
    A_attacked_dict = {}
    for eps in epss:
        A_attacked_dict[eps] = {}
        with open('{}/{}-{}.json'.format(A_matrix_dirpath, 'A-attacked-badger-weasel', eps)) as f:
            A_attacked = json.load(f)
        for layer in args.layers:
            A_attacked_dict[eps][layer] = list(map(lambda x: float(x), A_attacked[layer]))

In [33]:
# Generate A-matrices if there are not matrices
if not exist_A:
    # Initialize A_attacked_dict
    A_attacked_dict = {}
    for eps in epss:
        A_attacked = A_matrix.init_A_matrix_single_class(args)
        A_attacked_dict[eps] = A_attacked

    # generate A-matrix for atttacked images with different eps
    for eps in epss:

        # Get the activation scores
        act_scores_attacked = model_helper.get_all_layers_activation_score(googlenet, attacked_imgs[eps], args.layers)

        # Save the median activation scores
        for layer in args.layers:
            median_act_score_across_imgs = np.median(act_scores_attacked[layer], axis=0)
            A_attacked_dict[eps][layer] = median_act_score_across_imgs

    # Get string converted A-matrix to save it into json file
    str_A_attacked_dict = {eps: {layer: A_attacked_dict[eps][layer].astype(str).tolist() for layer in args.layers} for eps in epss}

    # Save the string converted A-matrix
    A_matrix_attacked_dirpath = '/Users/haekyu/data/massif/aggregated/badger-weasel/A-mat'
    for eps in epss:
        filename = '{}/A-attacked-{}-{}-{}.json'.format(A_matrix_attacked_dirpath, correct_classname, targeted_classname, eps)
        with open (filename, 'w') as f:
            json.dump(str_A_attacked_dict[eps], f) 

## Top neurons

### Get top neurons from **Benign** images

In [8]:
# See if there have already been computed top neurons
top_neuron_dirpath = '/Users/haekyu/data/massif/aggregated/badger-weasel/top-neurons'
try:
    exist_top_neurons = False
    file_list = os.listdir(top_neuron_dirpath)
    for file in file_list:
        if 'benign' in file:
            exist_top_neurons = True
except:
    exist_top_neurons = False
print('exist_top_neurons:', exist_top_neurons)

exist_top_neurons: True


In [9]:
# Read top neurons if there have already been computed top neurons
if exist_top_neurons:
    file_path = '{}/{}'.format(top_neuron_dirpath, 'top-neurons-benign.json')
    with open(file_path, 'r') as f:
        top_neurons_benign = json.load(f)
    for layer in args.layers:
        top_neurons_benign[layer] = list(map(
            lambda x: {'neuron': int(x['neuron']), 'weight': float(x['weight'])}, 
            top_neurons_benign[layer]
        ))

In [34]:
# Generate top neurons if there have not been computed top neurons
if not exist_top_neurons:
    top_p = 0.2
    top_neurons_benign = {}
    for layer in A_benign:
        num = int(len(A_benign[layer]) * top_p)
        sorted_neurons = np.argsort(-A_benign[layer])[:num]
        top_neurons_benign[layer] = [{'neuron': str(neuron), 'weight': str(A_benign[layer][neuron])} for neuron in sorted_neurons]

    file_path = '{}/{}.json'.format(top_neuron_dirpath, 'top-neurons-benign')
    with open(file_path, 'w') as f:
        json.dump(top_neurons_benign, f)

### Get top neurons from **Attacked** images

In [36]:
# Read top neruons if there have already been computed top neurons
# if exist_top_neurons:
top_neurons_attacked_all_eps = {}

for eps in epss:
    top_neurons_attacked_all_eps[eps] = {}

    file_path = '{}/{}-{}.json'.format(top_neuron_dirpath, 'top-neurons-attacked', eps)
    with open(file_path, 'r') as f:
        top_neurons_attacked = json.load(f)

    for layer in args.layers:
        top_neurons_attacked_all_eps[eps][layer] = list(map(
            lambda x: {'neuron': int(x['neuron']), 'weight': float(x['weight'])}, 
            top_neurons_attacked[layer]
        ))

In [35]:
# Generate top neurons if there have not been computed top neurons
# if not exist_top_neurons:
top_p = 0.2
for eps in A_attacked_dict:
    top_neurons_attacked = {}
    A_attacked = A_attacked_dict[eps]
    for layer in A_attacked:
        num = int(len(A_attacked[layer]) * top_p)
        sorted_neurons = np.argsort(-A_attacked[layer])[:num]
        top_neurons_attacked[layer] = [{'neuron': str(neuron), 'weight': str(A_attacked[layer][neuron])} for neuron in sorted_neurons]

    graph_dir_path = '/Users/haekyu/data/massif/aggregated/badger-weasel/top-neurons'
    file_path = '{}/{}-{}.json'.format(graph_dir_path, 'top-neurons-attacked', eps)
    with open(file_path, 'w') as f:
        json.dump(top_neurons_attacked, f)

## I_matrix: generate edges

### I_matrix of the **benign** images

In [10]:
# See if there have already been I-matrices
I_matrix_dirpath = '/Users/haekyu/data/massif/aggregated/badger-weasel/I-mat'
try:
    exist_I = False
    file_list = os.listdir(I_matrix_dirpath)
    for file in file_list:
        if 'benign' in file:
            exist_I = True
except:
    exist_I = False
print('exist_I:', exist_I)

exist_I: True


In [11]:
# Read I-matrix if there has already been the matrix
if exist_I:
    with open('{}/{}'.format(I_matrix_dirpath, 'I-benign-badger-weasel.json')) as f:
        I_benign = json.load(f)
    for blk in I_benign.keys():
        I_benign[blk] = np.array(I_benign[blk], dtype=float)

In [37]:
# Generate I-matrices if there is not any matrix
if not exist_I:
    # Generate I matrix for benign images
    I_benign = I_matrix.gen_aggregated_I_matrix(args, benign_imgs, googlenet)

    # Convert I matrix into string to save the matrix into json file
    blks = list(I_benign.keys())
    str_I_benign = {blk: I_benign[blk].astype(str).tolist() for blk in blks}

    # Save the string converted I-matrix into json file
    I_matrix_benign_dirpath = '/Users/haekyu/data/massif/aggregated/badger-weasel/I-mat'
    filename = '{}/I-benign-{}-{}.json'.format(I_matrix_benign_dirpath, correct_classname, targeted_classname)
    with open (filename, 'w') as f:
        json.dump(str_I_benign, f) 

### I_matrix of the **attacked** images with different strength

In [21]:
# Read I-matrices if there have already been the matrices
if exist_I:
    Is_attacked = {}
    for eps in epss:
        file_path = '{}/{}-{}.json'.format(I_matrix_dirpath, 'I-attacked-badger-weasel', eps)
        with open(file_path) as f:
            I_Attacked = json.load(f)
            for blk in I_Attacked.keys():
                I_Attacked[blk] = np.array(I_Attacked[blk], dtype=float)
        Is_attacked[eps] = I_Attacked

In [38]:
# Generate I-matrices if there are not matrices
if not exist_I:
    # Initialize Is_attacked
    Is_attacked = {}
    for eps in epss:
        I_attacked = I_matrix.gen_aggregated_I_matrix(args, attacked_imgs[eps], googlenet)
        Is_attacked[eps] = I_attacked

    # Save I matrix for attacked images with different eps
    I_matrix_attacked_dirpath = '/Users/haekyu/data/massif/aggregated/badger-weasel/I-mat'

    for eps in epss:
        filename = '{}/I-attacked-{}-{}-{}.json'.format(I_matrix_attacked_dirpath, correct_classname, targeted_classname, eps)

        # Convert I matrix into string
        str_I_attacked = {blk: Is_attacked[eps][blk].astype(str).tolist() for blk in blks}

        # Save the string converted I-matrix into json file
        with open (filename, 'w') as f:
            json.dump(str_I_attacked, f) 

## Graph

### [No Need] Generate **full** graph of the **Benign** images 

In [39]:
no_need = False
if not no_need:
    G_benign = gen_graph.gen_full_graph(args, A_benign, I_benign)
    G_benign_json = json_graph.node_link_data(G_benign)
    benign_str_parsed_nodes = list(map(lambda x: {'weight': str(x['weight']), 'id': x['id']}, G_benign_json['nodes']))
    benign_str_parsed_links = list(map(lambda x: {'source': x['source'], 'target': x['target'], 'weight': str(x['weight'])}, G_benign_json['links']))
    G_benign_str_json = {'nodes': benign_str_parsed_nodes, 'links': benign_str_parsed_links}
    graph_dir_path = '/Users/haekyu/data/massif/aggregated/badger-weasel/full-graph'
    file_path = '{}/{}.json'.format(graph_dir_path, 'G-benign')
    with open(file_path, 'w') as f:
        json.dump(G_benign_str_json, f)

### [No Need] Generate **full** graph of the **Attacked** images with different strength

In [40]:
no_need = False
if not no_need:
    for eps in epss:
        G_attacked = gen_graph.gen_full_graph(args, A_attacked_dict[eps], Is_attacked[eps])
        G_attacked_json = json_graph.node_link_data(G_attacked)
        attacked_str_parsed_nodes = list(map(lambda x: {'weight': str(x['weight']), 'id': x['id']}, G_attacked_json['nodes']))
        attacked_str_parsed_links = list(map(lambda x: {'source': x['source'], 'target': x['target'], 'weight': str(x['weight'])}, G_attacked_json['links']))
        G_attacked_str_json = {'nodes': attacked_str_parsed_nodes, 'links': attacked_str_parsed_links}
        graph_dir_path = '/Users/haekyu/data/massif/aggregated/badger-weasel/full-graph'
        file_path = '{}/{}-{}.json'.format(graph_dir_path, 'G-attacked', eps)
        with open(file_path, 'w') as f:
            json.dump(G_attacked_str_json, f)

### Generate **graph** for only **top neurons**' of the **Benign** images

In [12]:
# If we have already computed full graph and top neurons
graph_dir = '/Users/haekyu/data/massif/aggregated/badger-weasel/full-graph'
file_path = '{}/{}'.format(graph_dir, 'G-benign.json')

# Read full graph
with open (file_path, 'r') as f:
    G_benign_raw = json.load(f)
        
# Parse nodes    
G_benign = {'nodes': [], 'links': []}
node_dict = {}
for node in G_benign_raw['nodes']:
    layer, neuron = node['id'].split('-')
    neuron = int(neuron)
    top_neurons_of_layer = list(map(lambda x: x['neuron'], top_neurons_benign[layer]))
    if neuron in top_neurons_of_layer:
        G_benign['nodes'].append({'id': node['id'], 'weight': node['weight']})
        node_dict[node['id']] = True
        
# Parse edges
for edge in G_benign_raw['links']:
    src, dst, w = edge['source'], edge['target'], edge['weight']
    if (src in node_dict) and (dst in node_dict):
        G_benign['links'].append(edge)
        
# Save the graph
top_graph_dir = '/Users/haekyu/data/massif/aggregated/badger-weasel/top-graph'
file_path = '{}/{}'.format(top_graph_dir, 'G-top-benign.json')
with open(file_path, 'w') as f:
    json.dump(G_benign, f)

### Generate **graph** for only **top neurons**' from the **Attacked** images

In [42]:
for eps in epss:
    # Read full graph
    file_path = '{}/{}-{}.json'.format(graph_dir, 'G-attacked', eps)
    with open (file_path, 'r') as f:
        G_attacked_raw = json.load(f)
        
    # Parse nodes
    G_attacked = {'nodes': [], 'links': []}
    node_dict = {}
    for node in G_attacked_raw['nodes']:
        layer, neuron = node['id'].split('-')
        neuron = int(neuron)
        top_neurons_of_layer = list(map(lambda x: x['neuron'], top_neurons_attacked_all_eps[eps][layer]))
        if neuron in top_neurons_of_layer:
            G_attacked['nodes'].append({'id': node['id'], 'weight': node['weight']})
            node_dict[node['id']] = True

    # Parse edges
    for edge in G_attacked_raw['links']:
        src, dst, w = edge['source'], edge['target'], edge['weight']
        if (src in node_dict) and (dst in node_dict):
            G_attacked['links'].append(edge)

    # Save the graph
    top_graph_dir = '/Users/haekyu/data/massif/aggregated/badger-weasel/top-graph'
    file_path = '{}/{}-{}.json'.format(top_graph_dir, 'G-top-attacked', eps)
    with open(file_path, 'w') as f:
        json.dump(G_attacked, f)