## Import packages

In [1]:
import os
import json
import shutil
import numpy as np
from sklearn.cluster import DBSCAN

from utils import TopKKeeper

## Alignability of embeddings

### Load embeddings

In [2]:
def load_json(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data

In [3]:
input_dir = '../../data/embedding/emb-1'
emb = {}
for input_name in os.listdir(input_dir):
    input_path = os.path.join(input_dir, input_name)
    model_code = input_name.split('-')[1]
    if 'pretrained' not in model_code:
        model_name = '-'.join(model_code.split('_')[:-1])
        epoch = int(model_code.split('_')[-1])
        if model_name not in emb:
            emb[model_name] = {}
        emb[model_name][epoch] = load_json(input_path)
    else:
        emb[model_code] = load_json(input_path)

In [4]:
list(emb.keys())

['vgg16-512-0.05-0.9',
 'inception-v3-512-1.5-0.9',
 'inception-v3-512-0.5-0.9',
 'vgg16-512-0.01-0.9',
 'inception_v3_pretrained',
 'vgg16_pretrained']

### Find clusters - setting

In [265]:
alignability_output_dir = '../../data/human-experiment/alignability-example'
ex_patch_dir = '../../data/neuron_feature'

### Find Clusters - (1) Same model, different training stages

In [27]:
def find_clusters_in_the_same_model(emb, model_name, dim=30, eps=0.5):

    # Parse neuron index and id
    num_instances, idx, idx2id, id2idx = 0, 0, {}, {}
    for epoch in emb[model_name]:
        num_instances += len(emb[model_name][epoch])
        for neuron in emb[model_name][epoch]:
            neuron_id = '{}-{}'.format(epoch, neuron)
            idx2id[idx] = neuron_id
            id2idx[neuron_id] = idx
            idx += 1
    
    # Run DBSCAN and get clusters
    X = np.zeros((num_instances, dim))
    for epoch in emb[model_name]:
        for neuron in emb[model_name][epoch]:
            neuron_id = '{}-{}'.format(epoch, neuron)
            idx = id2idx[neuron_id]
            X[idx] = emb[model_name][epoch][neuron]
    clustering = DBSCAN(eps=eps, min_samples=2).fit(X)
    clusters = {}
    for idx, label in enumerate(clustering.labels_):
        if label not in clusters:
            clusters[label] = []
        clusters[label].append(idx2id[idx])
        
    # Filter clusters
    clusters_of_multi_epochs = {}
    for cluster_key in clusters:

        if cluster_key == -1:
            continue

        epochs = {}
        for neuron in clusters[cluster_key]:
            epoch = int(neuron.split('-')[0])
            epochs[epoch] = True

        if len(epochs) > 1:
            clusters_of_multi_epochs[cluster_key] = clusters[cluster_key]
            
    # Sample neurons
    neurons = []
    for cluster_key in clusters_of_multi_epochs:
        neuron = np.random.choice(clusters_of_multi_epochs[cluster_key])
        neurons.append(neuron)
        
    # Sample clusters
    sampled_clusters = {}
    for target_neuron in neurons:
        target_emb = X[id2idx[target_neuron]]
        sampled_clusters[target_neuron] = TopKKeeper(15)
        for idx, neuron_emb in enumerate(X):
            neuron_id = idx2id[idx]
            d2 = -np.sum((neuron_emb - target_emb) ** 2)
            sampled_clusters[target_neuron].insert(d2, key=neuron_id)

    # Parse sampled clusters
    for neuron in sampled_clusters:
        sampled_clusters[neuron] = [
            sampled_clusters[neuron].keys, 
            [-e for e in sampled_clusters[neuron].vals]
        ]
        
    # Collect clusters
    selected_clusters, k = [], 5
    for key in sampled_clusters:
        neurons = sampled_clusters[key][0]

        # Add neurons of different epoch
        neurons_by_epoch = {}
        for i, neuron in enumerate(neurons):
            epoch, layer, neuron_idx = neuron.split('-')
            epoch = int(epoch)
            neuron_id = '{}-{}'.format(layer, neuron_idx)

            if epoch not in neurons_by_epoch:
                neurons_by_epoch[epoch] = []
            neurons_by_epoch[epoch].append(neuron)

        selected_neurons = []
        for epoch in neurons_by_epoch:
            selected_neurons.append(neurons_by_epoch[epoch][0])

        # Collect more neurons up to 5
        for neuron in neurons:
            if len(selected_neurons) >= k:
                break
            if neuron not in selected_neurons:
                selected_neurons.append(neuron)

        selected_clusters.append(selected_neurons)
    
    return selected_clusters

In [37]:
def sample_neuron(model, emb):
    fst_epoch = list(emb[model])[0]
    neurons = list(emb[model][fst_epoch])
    return np.random.choice(neurons, 1)[0]
    
sample_neuron('inception-v3-512-0.5-0.9', emb)

'InceptionC_13-536'

In [158]:
models = [
    ['inception-v3-512-0.5-0.9', 0.5],
    ['inception-v3-512-1.5-0.9', 0.5],
    ['vgg16-512-0.01-0.9', 0.47],
    ['vgg16-512-0.05-0.9', 0.47]
]

clusters = {}
for model_name, eps in models:
    selected_clusters = find_clusters_in_the_same_model(emb, model_name, dim=30, eps=eps)
    
    model_name.replace('inception-v3', 'inception_v3')
    
    parsed_clusters = []
    for cluster in selected_clusters:
        parsed_cluster = []
        for neuron in cluster:
            epoch, layer, neuron_idx = neuron.split('-')
            neuron_id = '{}-{}'.format(layer, neuron_idx)
            model_code = '{}-{}'.format(model_name, epoch)
            parsed_cluster.append({'model': model_code, 'neuron': neuron_id})
        parsed_clusters.append(parsed_cluster)
            
    clusters[model_name] = parsed_clusters

In [159]:
for i, model in enumerate(clusters):
    
    # Cluster
    model_clusters = clusters[model]
    sample_idxs = np.random.choice(len(model_clusters), 4, replace=False)
    
    for j in range(4):
        
        # Sample neuron
        rand_model = model_clusters[sample_idxs[j]][0]['model']
        while True:
            rand_neuron = sample_neuron(model, emb)
            
            can_break = True
            for neuron_model_id in model_clusters[sample_idxs[j]]:
                neuron_id = neuron_model_id['neuron']
                if neuron_id == rand_neuron:
                    can_break = False
                    
            if can_break:
                break
                
        print(i * 4 + j)
        print('rand_neuron:', rand_neuron)
        
        # Cluster
        neuron_info = model_clusters[sample_idxs[j]]
        neuron_info.append(rand_neuron)
        np.random.shuffle(neuron_info)
        rand_neuron_idx = neuron_info.index(rand_neuron)

        # Generate output file
        dir_name = 'set-{}'.format(i * 4 + j)
        p_dir_path = dir_path = os.path.join(
            alignability_output_dir, 
            'NeuEvo-same_model-diff_epoch'
        )
        if not os.path.exists(p_dir_path):
            os.mkdir(p_dir_path)
        dir_path = os.path.join(p_dir_path, dir_name)
        if not os.path.exists(dir_path):
            os.mkdir(dir_path)
        img_dir_path = os.path.join(dir_path, 'img')
        if not os.path.exists(img_dir_path):
            os.mkdir(img_dir_path)

        # Save neuron_info
        out_path = os.path.join(dir_path, 'neuron.json')
        neuron_output = {}
        for neuron_i, neuron in enumerate(neuron_info):
            if neuron_i == rand_neuron_idx:
                neuron_output['neuron-{}'.format(neuron_i)] = {'model': rand_model, 'neuron': rand_neuron, 'random': 'yes'}
            else:
                neuron_output['neuron-{}'.format(neuron_i)] = neuron
                neuron_output['neuron-{}'.format(neuron_i)]['random'] = 'no'
        with open(out_path, 'w') as f:
            json.dump(neuron_output, f, indent=4)
            
        # Save example patches
        for neuron in neuron_output:
            neuron_id = neuron_output[neuron]['neuron']
            model_name = neuron_output[neuron]['model'].replace('inception-v3', 'inception_v3')
            sub_img_dir_path = os.path.join(img_dir_path, neuron)
            ex_patch_dir_path = os.path.join(ex_patch_dir, model_name, 'data')
            img_name_lst = []
            for img_name in os.listdir(ex_patch_dir_path):
                if neuron_id + '-' in img_name:
                    img_name_lst.append(img_name)
            
            img_path_lst = []
            for img_name in img_name_lst:
                img_path_lst.append(os.path.join(ex_patch_dir_path, img_name))
            
            if not os.path.exists(sub_img_dir_path):
                os.mkdir(sub_img_dir_path)
            
            for img_i, img_path in enumerate(img_path_lst):
                shutil.copy2(img_path, os.path.join(sub_img_dir_path, 'img-{}.jpg'.format(img_i)))

0
rand_neuron: InceptionD_16-1019
1
rand_neuron: InceptionC_14-227
2
rand_neuron: InceptionE_18-1837
3
rand_neuron: BasicConv2d_5-65
4
rand_neuron: InceptionE_18-520
5
rand_neuron: InceptionB_10-93
6
rand_neuron: InceptionD_16-22
7
rand_neuron: InceptionB_10-361
8
rand_neuron: Sequential_0_Conv2d_17-77
9
rand_neuron: Sequential_0_Conv2d_17-6
10
rand_neuron: Sequential_0_Conv2d_24-342
11
rand_neuron: Sequential_0_Conv2d_12-52
12
rand_neuron: Sequential_0_Conv2d_21-301
13
rand_neuron: Sequential_0_Conv2d_10-191
14
rand_neuron: Sequential_0_Conv2d_24-185
15
rand_neuron: Sequential_0_Conv2d_28-120


### Find Clusters - (1) Same model, different training stages - random random

In [132]:
models = [
    ['inception-v3-512-0.5-0.9', [3, 11, 121]],
    ['inception-v3-512-1.5-0.9', [4, 69, 71]],
    ['vgg16-512-0.01-0.9', [5, 21, 207]],
    ['vgg16-512-0.05-0.9', [3, 11, 13]]
]

k = 5

for i, model_info in enumerate(models):
    
    # Get model name and epochs
    model = model_info[0]
    epochs = model_info[1]
    
    # Generate example sets
    for j in range(4):
        
        print(i * 4 + j)
        
        # Randomly sample two neurons
        while True:
            rand_neuron1 = sample_neuron(model, emb)
            rand_neuron2 = sample_neuron(model, emb)
            if rand_neuron1 != rand_neuron2:
                rand_epoch1 = np.random.choice(epochs)
                rand_epoch2 = np.random.choice(epochs)
                rand_neuron_id_1 = '{}-{}'.format(rand_epoch1, rand_neuron1)
                rand_neuron_id_2 = '{}-{}'.format(rand_epoch2, rand_neuron2)
                break
        print('rand_neuron_id_1:', rand_neuron_id_1)
        print('rand_neuron_id_2:', rand_neuron_id_2)
                
        # Collect embedding of all neurons of all epochs
        num_neurons, idx, idx2id, id2idx = 0, 0, {}, {}
        for epoch in emb[model]:
            num_neurons += len(emb[model][epoch])
            for neuron in emb[model][epoch]:
                neuron_id = '{}-{}'.format(epoch, neuron)
                idx2id[idx] = neuron_id
                id2idx[neuron_id] = idx
                idx += 1
        
        # Find rand_neuron_id_1's 100 closest neurons
        target_emb = np.array(emb[model][rand_epoch1][rand_neuron1])
        cluster = TopKKeeper(100)
        for neuron_idx in range(num_neurons):
            neuron_id = idx2id[neuron_idx]
            epoch = int(neuron_id.split('-')[0])
            neuron = '-'.join(neuron_id.split('-')[1:])
            neuron_emb = np.array(emb[model][epoch][neuron])
            d2 = -np.sum((neuron_emb - target_emb) ** 2)
            cluster.insert(d2, key=neuron_id)

        # Parse cluster
        cluster = [cluster.keys, [-v for v in cluster.vals]]
        
        # Collect neurons
        selected_neurons = cluster[0][:k]
        print(cluster[1][:k])
        
        # See if the neurons have the same epoch
        same_epoch = True
        for neuron_id in selected_neurons:
            epoch = int(neuron_id.split('-')[0])
            if epoch != rand_epoch1:
                same_epoch = False
        
        # Find a neuron of different epoch
        if same_epoch:
            for neuron_id in cluster[0][k:]:
                epoch = int(neuron_id.split('-')[0])
                if epoch != rand_epoch1:
                    selected_neurons[-1] = neuron_id
                    break
        print(same_epoch)
                    
        # Shuffle neurons
        selected_neurons.append(rand_neuron_id_2)
        np.random.shuffle(selected_neurons)
        rand_neuron_idx = selected_neurons.index(rand_neuron_id_2)
        
        # Generate output file
        dir_name = 'set-{}'.format(i * 4 + j)
        p_dir_path = dir_path = os.path.join(
            alignability_output_dir, 
            'NeuEvo-same_model-diff_epoch'
        )
        if not os.path.exists(p_dir_path):
            os.mkdir(p_dir_path)
        dir_path = os.path.join(p_dir_path, dir_name)
        if not os.path.exists(dir_path):
            os.mkdir(dir_path)
        img_dir_path = os.path.join(dir_path, 'img')
        if not os.path.exists(img_dir_path):
            os.mkdir(img_dir_path)

        # Save selected_neurons
        out_path = os.path.join(dir_path, 'neuron.json')
        neuron_output = {}
        for neuron_i, neuron_id in enumerate(selected_neurons):
            epoch = neuron_id.split('-')[0]
            neuron = '-'.join(neuron_id.split('-')[1:])
            model_code = '{}-{}'.format(model, epoch)
            if neuron_i == rand_neuron_idx:
                neuron_output['neuron-{}'.format(neuron_i)] = {'model': model_code, 'neuron': neuron, 'random': 'yes'}
            else:
                neuron_output['neuron-{}'.format(neuron_i)] = {'model': model_code, 'neuron': neuron, 'random': 'no'}
        with open(out_path, 'w') as f:
            json.dump(neuron_output, f, indent=4)
            
        # Save example patches
        for neuron in neuron_output:
            neuron_id = neuron_output[neuron]['neuron']
            model_name = neuron_output[neuron]['model'].replace('inception-v3', 'inception_v3')
            sub_img_dir_path = os.path.join(img_dir_path, neuron)
            ex_patch_dir_path = os.path.join(ex_patch_dir, model_name, 'data')
            img_name_lst = []
            for img_name in os.listdir(ex_patch_dir_path):
                if neuron_id + '-' in img_name:
                    img_name_lst.append(img_name)
            
            img_path_lst = []
            for img_name in img_name_lst:
                img_path_lst.append(os.path.join(ex_patch_dir_path, img_name))
            
            if not os.path.exists(sub_img_dir_path):
                os.mkdir(sub_img_dir_path)
            
            for img_i, img_path in enumerate(img_path_lst):
                shutil.copy2(img_path, os.path.join(sub_img_dir_path, 'img-{}.jpg'.format(img_i)))
        
        print()

rand_neuron_id_1: 11-InceptionE_17-1313
rand_neuron_id_2: 11-InceptionA_7-79
[0.0, 0.18873100000000007, 0.20379100000000003, 0.209142, 0.209142]
False

rand_neuron_id_1: 11-InceptionA_7-56
rand_neuron_id_2: 11-InceptionC_14-656
[0.0, 0.16598700000000002, 0.167827, 0.21836899999999998, 0.225968]
True

rand_neuron_id_1: 3-InceptionE_17-443
rand_neuron_id_2: 3-InceptionE_17-52
[0.0, 0.19486900000000001, 0.19486900000000001, 0.202764, 0.215451]
True

rand_neuron_id_1: 3-InceptionE_18-1679
rand_neuron_id_2: 3-InceptionC_14-761
[0.0, 0.13601599999999997, 0.13601599999999997, 0.15477600000000002, 0.158598]
False

rand_neuron_id_1: 69-InceptionE_18-1932
rand_neuron_id_2: 71-InceptionE_18-606
[0.0, 0.144467, 0.15775899999999998, 0.159425, 0.161312]
False

rand_neuron_id_1: 71-InceptionD_16-424
rand_neuron_id_2: 4-InceptionC_12-379
[0.0, 0.0, 0.0, 0.0, 0.0]
True

rand_neuron_id_1: 71-InceptionE_17-482
rand_neuron_id_2: 71-InceptionC_13-629
[0.0, 0.2601540000000001, 0.281659, 0.308499, 0.31243899

### Find Clusters - (2) Different models, different training stages

In [157]:
# Setting
dim = 30
eps = 0.27
k = 5
num_rand_samples = 16

In [158]:
# Count the number of instances
idx, num_instances, idx2id, id2idx = 0, 0, {}, {}
for model_name in emb:
    if 'vgg16_pretraind' in model_name: 
        continue
        
    if 'pretrained' in model_name:
        num_instances += len(emb[model_name])
        for neuron in emb[model_name]:
            neuron_id = '{}-{}'.format(model_name, neuron)
            idx2id[idx] = neuron_id
            id2idx[neuron_id] = idx
            idx += 1
    else:
        for epoch in emb[model_name]:
            num_instances += len(emb[model_name][epoch])
            for neuron in emb[model_name][epoch]:
                neuron_id = '{}-{}-{}'.format(epoch, model_name, neuron)
                idx2id[idx] = neuron_id
                id2idx[neuron_id] = idx
                idx += 1
        
# Generate X for all embeddings
X = np.zeros((num_instances, dim))
for idx in range(num_instances):

    neuron_id = idx2id[idx]
    
    if 'vgg16_pretraind' in neuron_id: 
        continue
        
    if 'pretrained' in neuron_id:
        model_name = neuron_id.split('-')[0]
        neuron = '-'.join(neuron_id.split('-')[1:])
        neuron_emb = emb[model_name][neuron]
        X[idx] = neuron_emb[:]
    else:
        epoch = int(neuron_id.split('-')[0])
        layer = neuron_id.split('-')[-2]
        neuron_idx = neuron_id.split('-')[-1]
        neuron = '{}-{}'.format(layer, neuron_idx)
        model_name = '-'.join(neuron_id.split('-')[1:-2])
        neuron_emb = emb[model_name][epoch][neuron]
        X[idx] = neuron_emb[:]
    
# Run DBSCAN
clustering = DBSCAN(eps=eps, min_samples=2).fit(X)

In [159]:
def get_model_name(neuron_id):
    if 'pretrained' in neuron_id:
        model_name = neuron_id.split('-')[0]
    else:
        model_name = '-'.join(neuron_id.split('-')[1:-2])
    return model_name

In [160]:
# Filter clusters
raw_clusters = {}
for idx, cluster_label in enumerate(clustering.labels_):
    if cluster_label == -1:
        continue
    if cluster_label not in raw_clusters:
        raw_clusters[cluster_label] = []
    raw_clusters[cluster_label].append(idx2id[idx])

clusters = []
for cluster_label in raw_clusters:
    neurons = raw_clusters[cluster_label]
    
    have_diff_models = False
    fst_neuron_model = get_model_name(neurons[0])
    for another_neuron in neurons[1:]:
        model_name = get_model_name(another_neuron)
        if fst_neuron_model != model_name:
            have_diff_models = True
            break
            
    if have_diff_models:
        clusters.append(raw_clusters[cluster_label])

In [161]:
len(clusters)

23

In [162]:
# Sample neurons
sampled_idxs = np.random.choice(len(clusters), num_rand_samples, replace=False)
sampled_neurons = [np.random.choice(clusters[sampled_idx], 1)[0] for sampled_idx in sampled_idxs]

# Intruder
rand_model = 'inception-v3-512-0.5-0.9'
rand_epoch = list(emb[rand_model])[0]

# Generate example sets
success = 0
for sampled_neuron in sampled_neurons:
    
    if success >= num_rand_samples:
        break
    
    # Find neighbors
    sampled_emb = X[id2idx[sampled_neuron]]
    neighbors = TopKKeeper(15)
    for idx, neuron_emb in enumerate(X):
        neuron_id = idx2id[idx]
        d2 = -np.sum((neuron_emb - sampled_emb) ** 2)
        neighbors.insert(d2, key=neuron_id)
    neighbors = [neighbors.keys, [-v for v in neighbors.vals]]
    print(neighbors)
        
    # Parse neighbors so that they include neurons of different model
    same_model, sampled_neuron_model = True, get_model_name(sampled_neuron)
    for nei_neuron_id in neighbors[0]:
        nei_model = get_model_name(nei_neuron_id)
        if nei_model != sampled_neuron_model:
            same_model = False
    if same_model:
        continue
        
    # Add the closest neighbor of different model
    selected_neighbors = [sampled_neuron]
    for nei_neuron_id in neighbors[0]:
        nei_model = get_model_name(nei_neuron_id)
        if nei_model != sampled_neuron_model:
            selected_neighbors.append(nei_neuron_id)
            break
            
    # Add remaining neighbors
    for nei_neuron_id in neighbors[0]:
        if len(selected_neighbors) >= k:
            break
        if nei_neuron_id in selected_neighbors:
            continue
        selected_neighbors.append(nei_neuron_id)
        
    # Sample intruder
    while True:
        rand_neuron = sample_neuron(rand_model, emb)
        rand_neuron_id = '{}-{}'.format(rand_epoch, rand_neuron)
        if rand_neuron_id not in selected_neighbors:
            selected_neighbors.append(rand_neuron_id)
            break
            
    # Shuffle neurons
    np.random.shuffle(selected_neighbors)
    rand_neuron_idx = selected_neighbors.index(rand_neuron_id)
    
    # Generate output file
    dir_name = 'set-{}'.format(success)
    p_dir_path = dir_path = os.path.join(
        alignability_output_dir, 
        'NeuEvo-diff_model-diff_epoch'
    )
    if not os.path.exists(p_dir_path):
        os.mkdir(p_dir_path)
    dir_path = os.path.join(p_dir_path, dir_name)
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    img_dir_path = os.path.join(dir_path, 'img')
    if not os.path.exists(img_dir_path):
        os.mkdir(img_dir_path)
        
    # Save selected_neighbors
    out_path = os.path.join(dir_path, 'neuron.json')
    neuron_output = {}
    for neuron_i, neuron_id in enumerate(selected_neighbors):
        model_name = get_model_name(neuron_id)
        if 'pretrained' not in model_name:
            epoch = neuron_id.split('-')[0]
            model_code = '{}-{}'.format(model_name, epoch)
        else:
            model_code = model_name
        layer, neuron = neuron_id.split('-')[-2], neuron_id.split('-')[-1]
        neuron = '{}-{}'.format(layer, neuron)
        if neuron_i == rand_neuron_idx:
            neuron_output['neuron-{}'.format(neuron_i)] = {'model': '{}-{}'.format(rand_model, rand_epoch), 'neuron': neuron, 'random': 'yes'}
        else:
            neuron_output['neuron-{}'.format(neuron_i)] = {'model': model_code, 'neuron': neuron, 'random': 'no'}
    with open(out_path, 'w') as f:
        json.dump(neuron_output, f, indent=4)
        
    # Save example patches
    for neuron in neuron_output:
        neuron_id = neuron_output[neuron]['neuron']
        model_name = neuron_output[neuron]['model'].replace('inception-v3', 'inception_v3')
        
        sub_img_dir_path = os.path.join(img_dir_path, neuron)
        ex_patch_dir_path = os.path.join(ex_patch_dir, model_name, 'data')
        img_name_lst = []
        if 'pretrained' not in ex_patch_dir_path:
            ex_patch_dir_path = ex_patch_dir_path.replace('_', '-').replace('inception-v3', 'inception_v3')
        ex_patch_dir_path = ex_patch_dir_path.replace('neuron-feature', 'neuron_feature')
        
        for img_name in os.listdir(ex_patch_dir_path):
            if neuron_id + '-' in img_name:
                img_name_lst.append(img_name)

        img_path_lst = []
        for img_name in img_name_lst:
            img_path_lst.append(os.path.join(ex_patch_dir_path, img_name))

        if not os.path.exists(sub_img_dir_path):
            os.mkdir(sub_img_dir_path)

        for img_i, img_path in enumerate(img_path_lst):
            shutil.copy2(img_path, os.path.join(sub_img_dir_path, 'img-{}.jpg'.format(img_i)))
            
    success += 1
    

[['3-vgg16-512-0.05-0.9-Sequential_0_Conv2d_0-63', '3-vgg16-512-0.05-0.9-Sequential_0_Conv2d_0-44', '11-vgg16-512-0.05-0.9-Sequential_0_Conv2d_0-44', '21-vgg16-512-0.01-0.9-Sequential_0_Conv2d_0-51', '3-vgg16-512-0.05-0.9-Sequential_0_Conv2d_0-12', '207-vgg16-512-0.01-0.9-Sequential_0_Conv2d_0-51', '11-vgg16-512-0.05-0.9-Sequential_0_Conv2d_0-12', '21-vgg16-512-0.01-0.9-Sequential_0_Conv2d_0-60', '4-inception-v3-512-1.5-0.9-InceptionC_14-213', '4-inception-v3-512-1.5-0.9-InceptionD_16-725', '71-inception-v3-512-1.5-0.9-InceptionC_11-241', 'vgg16_pretrained-Sequential_0_Conv2d_0-20', '69-inception-v3-512-1.5-0.9-InceptionC_14-612', '69-inception-v3-512-1.5-0.9-InceptionD_16-1124', '71-inception-v3-512-1.5-0.9-InceptionE_18-654'], [0.0, 1.0000000000000019e-06, 0.052313000000000005, 0.052339, 0.060419999999999995, 0.08314400000000001, 0.08708099999999999, 0.19397200000000003, 0.22527000000000003, 0.22527000000000003, 0.22988700000000004, 0.24198899999999998, 0.24799000000000002, 0.2479900

### Find Clusters - (3) Base model

In [220]:
# Setting
dim = 30
eps = 0.35
k = 5
num_rand_samples = 16
model = 'inception_v3_pretrained'

In [221]:
# Generate X for embeddings
num_instances = len(emb[model])
X = np.zeros((num_instances, dim))
idx2id, id2idx = {}, {}
for idx, neuron_id in enumerate(emb[model]):
    neuron_emb = emb[model][neuron_id]
    idx2id[idx] = neuron_id
    id2idx[neuron_id] = idx
    X[idx] = neuron_emb
        
# Run DBSCAN
clustering = DBSCAN(eps=eps, min_samples=2).fit(X)

In [222]:
# Filter clusters
raw_clusters = {}
for idx, cluster_label in enumerate(clustering.labels_):
    if cluster_label == -1:
        continue
    if cluster_label not in raw_clusters:
        raw_clusters[cluster_label] = []
    raw_clusters[cluster_label].append(idx2id[idx])
    
clusters = []
for cluster_label in raw_clusters:
    neurons = raw_clusters[cluster_label]
    if len(neurons) > 1:
        clusters.append(raw_clusters[cluster_label])
        
len(clusters)

34

In [223]:
# Sample neurons
sampled_idxs = np.random.choice(len(clusters), num_rand_samples, replace=False)
sampled_neurons = [np.random.choice(clusters[sampled_idx], 1)[0] for sampled_idx in sampled_idxs]

In [232]:
# Generate example sets
neurons = list(id2idx.keys())
for sample_neuron_idx, sampled_neuron in enumerate(sampled_neurons):
    
    # Find neighbors
    sampled_emb = X[id2idx[sampled_neuron]]
    neighbors = TopKKeeper(15)
    for idx, neuron_emb in enumerate(X):
        neuron_id = idx2id[idx]
        d2 = -np.sum((neuron_emb - sampled_emb) ** 2)
        neighbors.insert(d2, key=neuron_id)
    neighbors = [neighbors.keys, [-v for v in neighbors.vals]]
    selected_neighbors = neighbors[0][:k]
    
    # Sample intruder
    while True:
        rand_neuron = np.random.choice(neurons, 1)[0]
        if rand_neuron not in selected_neighbors:
            selected_neighbors.append(rand_neuron)
            break
    
    # Shuffle neurons
    np.random.shuffle(selected_neighbors)
    rand_neuron_idx = selected_neighbors.index(rand_neuron)
    
    # Generate output file
    dir_name = 'set-{}'.format(sample_neuron_idx)
    p_dir_path = dir_path = os.path.join(
        alignability_output_dir, 
        'NeuEvo-base_model'
    )
    if not os.path.exists(p_dir_path):
        os.mkdir(p_dir_path)
    dir_path = os.path.join(p_dir_path, dir_name)
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    img_dir_path = os.path.join(dir_path, 'img')
    if not os.path.exists(img_dir_path):
        os.mkdir(img_dir_path)
        
    # Save selected_neighbors
    out_path = os.path.join(dir_path, 'neuron.json')
    neuron_output = {}
    for neuron_i, neuron_id in enumerate(selected_neighbors):
        layer, neuron = neuron_id.split('-')[-2], neuron_id.split('-')[-1]
        neuron = '{}-{}'.format(layer, neuron)
        if neuron_i == rand_neuron_idx:
            neuron_output['neuron-{}'.format(neuron_i)] = {'model': model, 'neuron': neuron, 'random': 'yes'}
        else:
            neuron_output['neuron-{}'.format(neuron_i)] = {'model': model, 'neuron': neuron, 'random': 'no'}
    with open(out_path, 'w') as f:
        json.dump(neuron_output, f, indent=4)
                
    # Save example patches
    for neuron in neuron_output:
        neuron_id = neuron_output[neuron]['neuron']
        model_name = neuron_output[neuron]['model'].replace('inception-v3', 'inception_v3')
        
        sub_img_dir_path = os.path.join(img_dir_path, neuron)
        ex_patch_dir_path = os.path.join(ex_patch_dir, model_name, 'data')
        img_name_lst = []
        if 'pretrained' not in ex_patch_dir_path:
            ex_patch_dir_path = ex_patch_dir_path.replace('_', '-').replace('inception-v3', 'inception_v3')
        ex_patch_dir_path = ex_patch_dir_path.replace('neuron-feature', 'neuron_feature')
        
        for img_name in os.listdir(ex_patch_dir_path):
            if neuron_id + '-' in img_name:
                img_name_lst.append(img_name)

        img_path_lst = []
        for img_name in img_name_lst:
            img_path_lst.append(os.path.join(ex_patch_dir_path, img_name))

        if not os.path.exists(sub_img_dir_path):
            os.mkdir(sub_img_dir_path)

        for img_i, img_path in enumerate(img_path_lst):
            shutil.copy2(img_path, os.path.join(sub_img_dir_path, 'img-{}.jpg'.format(img_i)))

### Find Clusters - (4) Randomly selected

In [237]:
# Setting
dim = 30
k = 6
num_rand_samples = 20
model = 'inception_v3_pretrained'

In [240]:
for i in range(num_rand_samples):
    rand_neurons = np.random.choice(neurons, k, replace=False)
    
    # Generate output file
    dir_name = 'set-{}'.format(i)
    p_dir_path = dir_path = os.path.join(
        alignability_output_dir, 
        'Random'
    )
    if not os.path.exists(p_dir_path):
        os.mkdir(p_dir_path)
    dir_path = os.path.join(p_dir_path, dir_name)
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    img_dir_path = os.path.join(dir_path, 'img')
    if not os.path.exists(img_dir_path):
        os.mkdir(img_dir_path)
        
    # Save neurons
    out_path = os.path.join(dir_path, 'neuron.json')
    neuron_output = {}
    for neuron_i, neuron_id in enumerate(rand_neurons):
        neuron_output['neuron-{}'.format(neuron_i)] = {'model': model, 'neuron': neuron_id, 'random': 'yes'}
    with open(out_path, 'w') as f:
        json.dump(neuron_output, f, indent=4)
    
    # Save example patches
    for neuron in neuron_output:
        neuron_id = neuron_output[neuron]['neuron']
        model_name = neuron_output[neuron]['model'].replace('inception-v3', 'inception_v3')
        
        sub_img_dir_path = os.path.join(img_dir_path, neuron)
        ex_patch_dir_path = os.path.join(ex_patch_dir, model_name, 'data')
        img_name_lst = []
        if 'pretrained' not in ex_patch_dir_path:
            ex_patch_dir_path = ex_patch_dir_path.replace('_', '-').replace('inception-v3', 'inception_v3')
        ex_patch_dir_path = ex_patch_dir_path.replace('neuron-feature', 'neuron_feature')
        
        for img_name in os.listdir(ex_patch_dir_path):
            if neuron_id + '-' in img_name:
                img_name_lst.append(img_name)

        img_path_lst = []
        for img_name in img_name_lst:
            img_path_lst.append(os.path.join(ex_patch_dir_path, img_name))

        if not os.path.exists(sub_img_dir_path):
            os.mkdir(sub_img_dir_path)

        for img_i, img_path in enumerate(img_path_lst):
            shutil.copy2(img_path, os.path.join(sub_img_dir_path, 'img-{}.jpg'.format(img_i)))

### Find Clusters - (5) Hand-picked

In [257]:
hand_picked = {
    'cyan': ['BasicConv2d_0-0', 'BasicConv2d_0-2', 'BasicConv2d_0-17', 'InceptionA_9-19', 'InceptionB_10-760'],
    'blue': ['BasicConv2d_0-4', 'BasicConv2d_0-14', 'BasicConv2d_1-20', 'InceptionA_7-131', 'InceptionA_9-280'],
    'bright-red-yellow': ['BasicConv2d_0-30', 'BasicConv2d_2-3', 'InceptionA_8-248', 'InceptionA_9-226', 'InceptionB_10-706'],
    'cross': ['BasicConv2d_5-54', 'BasicConv2d_5-125', 'InceptionA_7-21', 'InceptionA_7-52', 'InceptionA_7-60'],
    'green': ['InceptionA_7-113', 'InceptionA_7-251', 'InceptionA_8-27', 'InceptionA_8-228', 'InceptionA_8-236'],
    'corner': ['InceptionA_7-194', 'InceptionA_7-204', 'InceptionA_7-217', 'InceptionA_8-60', 'InceptionA_8-121'],
    'eyes': ['InceptionB_10-192', 'InceptionB_10-251', 'InceptionB_10-576', 'InceptionC_11-55', 'InceptionC_11-201'],
    'letter': ['InceptionB_10-241', 'InceptionB_10-491', 'InceptionB_10-707', 'InceptionC_11-68', 'InceptionC_11-411'],
    'nose': ['InceptionB_10-478', 'InceptionC_11-263', 'InceptionC_11-623', 'InceptionC_11-638', 'InceptionC_12-136-6'],
    'human': ['InceptionC_11-195', 'InceptionC_11-249-7', 'InceptionC_11-680', 'InceptionC_12-110', 'InceptionC_12-325'],
    'diagonal': ['InceptionC_11-578', 'InceptionC_11-614', 'InceptionC_11-632', 'InceptionC_11-641', 'InceptionC_11-646'],
    'spreading': ['InceptionC_11-262', 'InceptionC_11-593', 'InceptionC_12-47', 'InceptionC_12-264', 'InceptionC_12-279'],
    'stripe-check': ['InceptionC_11-605', 'InceptionC_11-651', 'InceptionC_11-677', 'InceptionC_11-706', 'InceptionC_11-731'],
    'hair': ['InceptionC_12-217', 'InceptionC_12-211', 'InceptionC_12-249', 'InceptionC_12-359', 'InceptionC_12-361'],
    'head': ['InceptionC_12-301', 'InceptionC_12-305', 'InceptionC_12-313', 'InceptionC_12-327', 'InceptionC_12-388'],
    'red': ['InceptionC_12-416', 'InceptionC_12-424', 'InceptionC_12-432', 'InceptionC_12-441', 'InceptionC_12-447'],
    'circle': ['InceptionC_11-568', 'InceptionC_11-732', 'InceptionC_11-750', 'InceptionC_12-645', 'InceptionC_12-661'],
    'roof': ['InceptionC_11-288', 'InceptionC_12-276', 'InceptionC_12-493', 'InceptionC_12-658', 'InceptionC_12-731'],
    'dog': ['InceptionC_12-506', 'InceptionC_12-525', 'InceptionC_12-529', 'InceptionC_12-744', 'InceptionC_13-264'],
    'leaves': ['InceptionC_12-667', 'InceptionC_12-699', 'InceptionC_12-721', 'InceptionC_13-602', 'InceptionC_13-667'],
    'sky': ['InceptionC_11-594', 'InceptionC_12-396', 'InceptionC_12-403', 'InceptionC_13-642', 'InceptionC_14-340'],
    'watermark': ['InceptionA_7-223', 'InceptionA_8-31', 'InceptionA_8-115', 'InceptionC_13-208', 'InceptionC_13-380'],
    'candle': ['InceptionA_7-97', 'InceptionA_7-111', 'InceptionA_8-286', 'InceptionC_14-650', 'InceptionD_16-1251'],
    'grass': ['InceptionC_12-395', 'InceptionC_12-433', 'InceptionC_12-482', 'InceptionC_14-592', 'InceptionC_12-62'],
    'fingers': ['InceptionC_12-619', 'InceptionC_12-669', 'InceptionC_13-491', 'InceptionC_13-561', 'InceptionC_13-738'],
    'ball': ['InceptionE_18-535', 'InceptionE_18-537', 'InceptionE_18-567', 'InceptionE_18-612', 'InceptionE_18-640'],
    'vertical': ['InceptionC_11-619', 'InceptionC_11-624', 'InceptionC_12-156', 'InceptionC_11-747', 'InceptionE_17-1374'],
    'x-shape': ['BasicConv2d_5-125', 'InceptionC_11-259', 'InceptionC_11-305', 'InceptionC_12-335', 'InceptionE_17-1347'],
    'yellow': ['BasicConv2d_0-22', 'BasicConv2d_0-28', 'InceptionA_9-160', 'InceptionA_7-119', 'InceptionE_18-1049'],
    'frame': ['BasicConv2d_4-33', 'BasicConv2d_4-35', 'BasicConv2d_4-39', 'InceptionA_9-118', 'InceptionA_9-119'],
    'bird': ['InceptionC_12-444', 'InceptionC_13-213', 'InceptionC_13-403', 'InceptionC_13-564', 'InceptionE_17-1967'],
    'blue-point': ['BasicConv2d_0-4', 'BasicConv2d_0-14', 'BasicConv2d_1-20', 'InceptionC_12-504', 'InceptionC_12-562']
}

In [258]:
model = 'inception_v3_pretrained'

In [269]:
for i, key in enumerate(hand_picked):
    
    picked_neurons = hand_picked[key][:]
    
    # Sample intruder
    while True:
        rand_neuron = np.random.choice(neurons, 1)[0]
        if rand_neuron not in picked_neurons:
            picked_neurons.append(rand_neuron)
            break
    
    # Sample intruder
    rand_neurons = np.random.choice(neurons, 1, replace=False)
    
    # Shuffle neurons
    np.random.shuffle(picked_neurons)
    rand_neuron_idx = picked_neurons.index(rand_neuron)
    
    # Generate output file
    dir_name = 'set-{}'.format(i)
    p_dir_path = dir_path = os.path.join(
        alignability_output_dir, 
        'Hand-picked'
    )
    if not os.path.exists(p_dir_path):
        os.mkdir(p_dir_path)
    dir_path = os.path.join(p_dir_path, dir_name)
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)
    img_dir_path = os.path.join(dir_path, 'img')
    if not os.path.exists(img_dir_path):
        os.mkdir(img_dir_path)
        
    # Save neurons
    out_path = os.path.join(dir_path, 'neuron.json')
    neuron_output = {}
    for neuron_i, neuron_id in enumerate(picked_neurons):
        if neuron_i == rand_neuron_idx:
            neuron_output['neuron-{}'.format(neuron_i)] = {'model': model, 'neuron': neuron_id, 'random': 'yes'}
        else:
            neuron_output['neuron-{}'.format(neuron_i)] = {'model': model, 'neuron': neuron_id, 'random': 'no'}
    with open(out_path, 'w') as f:
        json.dump(neuron_output, f, indent=4)
    
    # Save example patches
    for neuron in neuron_output:
        neuron_id = neuron_output[neuron]['neuron']
        model_name = neuron_output[neuron]['model'].replace('inception-v3', 'inception_v3')
        
        sub_img_dir_path = os.path.join(img_dir_path, neuron)
        ex_patch_dir_path = os.path.join(ex_patch_dir, model_name, 'data')
        img_name_lst = []
        if 'pretrained' not in ex_patch_dir_path:
            ex_patch_dir_path = ex_patch_dir_path.replace('_', '-').replace('inception-v3', 'inception_v3')
        ex_patch_dir_path = ex_patch_dir_path.replace('neuron-feature', 'neuron_feature')
        
        for img_name in os.listdir(ex_patch_dir_path):
            if neuron_id + '-' in img_name:
                img_name_lst.append(img_name)

        img_path_lst = []
        for img_name in img_name_lst:
            img_path_lst.append(os.path.join(ex_patch_dir_path, img_name))

        if not os.path.exists(sub_img_dir_path):
            os.mkdir(sub_img_dir_path)

        for img_i, img_path in enumerate(img_path_lst):
            shutil.copy2(img_path, os.path.join(sub_img_dir_path, 'img-{}.jpg'.format(img_i)))

### Find Evolution

In [276]:
label_output_dir = '../../data/human-experiment/evolution-label-example'
ex_patch_dir = '../../data/neuron_feature'

In [273]:
num_samples_per_model = 25
models = [
    ['inception-v3-512-0.5-0.9', [3, 11, 121]],
    ['inception-v3-512-1.5-0.9', [4, 69, 71]],
    ['vgg16-512-0.01-0.9', [5, 21, 207]],
    ['vgg16-512-0.05-0.9', [3, 11, 13]]
]

In [286]:
for model_name, epochs in models:
    
    # Generate output path
    model_dir_path = os.path.join(label_output_dir, model_name)
    if not os.path.exists(model_dir_path):
        os.mkdir(model_dir_path)
    
    # Sample neurons
    all_neurons = list(emb[model_name][epochs[0]].keys())
    sampled_neurons = np.random.choice(all_neurons, num_samples_per_model, replace=False)
    
    # Generate data
    for i, sampled_neuron in enumerate(sampled_neurons):
        
        # Generate subdir path
        sub_dir_path = os.path.join(model_dir_path, 'set-{}'.format(i))
        if not os.path.exists(sub_dir_path):
            os.mkdir(sub_dir_path)
            
        # Write neuron info
        with open(os.path.join(sub_dir_path, 'neuron.json'), 'w') as f:
            neuron_info = {
                'model': model_name,
                'epochs': epochs,
                'neuron': sampled_neuron
            }
            json.dump(neuron_info, f, indent=4)
        
        # Save image
        for epoch_i, epoch in enumerate(epochs):
            
            # Directory to save images
            img_dir_path = os.path.join(
                sub_dir_path,
                'stage-{}'.format(epoch_i)
            )
            if not os.path.exists(img_dir_path):
                os.mkdir(img_dir_path)
                
            # Directory path for example patches
            model_code = '{}-{}'.format(model_name, epoch)
            ex_patch_dir_path = os.path.join(ex_patch_dir, model_code, 'data')
            ex_patch_dir_path = ex_patch_dir_path.replace('_', '-').replace('inception-v3', 'inception_v3')
            ex_patch_dir_path = ex_patch_dir_path.replace('neuron-feature', 'neuron_feature')
            
            # Image paths
            img_name_lst = []
            for img_name in os.listdir(ex_patch_dir_path):
                if sampled_neuron + '-' in img_name:
                    img_name_lst.append(img_name)

            img_path_lst = []
            for img_name in img_name_lst:
                img_path_lst.append(os.path.join(ex_patch_dir_path, img_name))

            # Save images
            for img_i, img_path in enumerate(img_path_lst):
                shutil.copy2(img_path, os.path.join(img_dir_path, 'img-{}.jpg'.format(img_i)))
        