## Compute the features from all layers and check.

1. https://keras.io/api/applications/#vgg19
2. Make the computation a bit modular.
3. Plot the TCAV scores with features from all layers.

### Layers in VGG19

1. Conv3x3 (64), Conv3x3 (64), MaxPool
2. Conv3x3 (128), Conv3x3 (128), MaxPool
3. Conv3x3 (256), Conv3x3 (256), Conv3x3 (256), Conv3x3 (256), MaxPool
4. Conv3x3 (512), Conv3x3 (512), Conv3x3 (512), Conv3x3 (512), MaxPool
5. Conv3x3 (512), Conv3x3 (512), Conv3x3 (512), Conv3x3 (512), MaxPool
6. FC + Softmax (we remove this, since include_top = False)

In [1]:
import cv2
import numpy as np
from glob import glob
from tqdm import tqdm
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input, decode_predictions
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import concurrent.futures
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
class ModelInstantiate:
    def __init__(self, layer, mode):
        self.layer = layer
        self.mode = mode
        
    def get_model(self):
        if self.mode == 'VGG19':
            base_model = VGG19(weights='imagenet', include_top=False)
        else:
            base_model = VGG16(weights='imagenet', include_top=False)
        model = Model(inputs=base_model.input, outputs=base_model.get_layer(self.layer).output)
        return model
    
    def _get_features(self, img):
        model = self.get_model()
        img_data = image.img_to_array(img)
        img_data = np.expand_dims(img_data, axis=0)
        img_data = preprocess_input(img_data)
        fts = model.predict(img_data)
        fts_np = np.array(fts)
        return np.squeeze(fts_np)

In [3]:
class ImageReader:
    def __init__(self, classes, folderpath, label_dict):
        self.classes = classes
        self.folderpath = folderpath
        self.label_dict = label_dict
        self.images = []
        self.labels = []
        
    def read_images(self):
        for base in self.classes:
            for ix, imgp in tqdm(enumerate(glob(self.folderpath + base + '/*.jpg'))):
                label = imgp.split('/')[-2]
                img = cv2.imread(imgp)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (224,224))
                self.images.append(img)
                self.labels.append(self.label_dict[label])
    
    def get_images_and_labels(self):
        self.read_images()
        return self.images, self.labels

In [4]:
layers = ["block1_pool", "block2_pool", "block3_pool", "block4_pool", "block5_pool"]
modes = ["VGG16", "VGG19"]
classes = ["zebra", "tiger"]
classes_dict = {"zebra": 1, "tiger": 0}
concepts = ["zebra_stripes", "tiger_stripes"]
concepts_dict = {"zebra_stripes": 1, "tiger_stripes": 0}
folderpath = "/Users/Janjua/Desktop/QCRI/Work/zebra_stripes_data/"

reader_base = ImageReader(classes, folderpath, classes_dict)
reader_concept = ImageReader(concepts, folderpath, concepts_dict)

In [5]:
concept_imgs, concept_labels = reader_concept.get_images_and_labels()

79it [00:00, 646.43it/s]
79it [00:00, 486.24it/s]


In [6]:
print('Reading data for both - Zebra and Tiger!')
zebra_reader_base = ImageReader([classes[0]], folderpath, classes_dict)
tiger_reader_base = ImageReader([classes[1]], folderpath, classes_dict)

zebra_imgs, zebra_labels = zebra_reader_base.get_images_and_labels()
tiger_imgs, tiger_labels = tiger_reader_base.get_images_and_labels()

79it [00:00, 613.28it/s]
0it [00:00, ?it/s]

Reading data for both - Zebra and Tiger!


79it [00:00, 519.57it/s]


In [7]:
print(len(concept_imgs), len(zebra_imgs))
print(len(tiger_imgs))

158 79
79


In [None]:
concept_fts_dict_VGG16 = {
                    "block1_pool": [],
                    "block2_pool": [],
                    "block3_pool": [],
                    "block4_pool": [],
                    "block5_pool": []
                 }

zebra_fts_dict_VGG16 = {
                    "block1_pool": [],
                    "block2_pool": [],
                    "block3_pool": [],
                    "block4_pool": [],
                    "block5_pool": []
                 }

print('Go get coffee - this takes some time.')

with concurrent.futures.ProcessPoolExecutor() as executor:    
    for layer in layers:
        model_instance =  ModelInstantiate(layer, modes[0])
        print('Running for layer # ', layer)

        for ix, val in tqdm(enumerate(concept_imgs)):
            fts_1 = executor.submit(model_instance._get_features, val)
            concept_fts_dict_VGG16[layer].append(fts_1.result())

        for ix, val in tqdm(enumerate(zebra_imgs)):
            fts_2 = executor.submit(model_instance._get_features, val)
            zebra_fts_dict_VGG16[layer].append(fts_2.result())

0it [00:00, ?it/s]

Go get coffee - this takes some time.
Running for layer #  block1_pool
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


1it [00:00,  1.09it/s]

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


2it [00:01,  1.10it/s]

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


3it [00:02,  1.07it/s]

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


4it [00:03,  1.10it/s]

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


5it [00:04,  1.11it/s]

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


6it [00:06,  1.07s/it]

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


7it [00:07,  1.15s/it]

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


158it [03:27,  1.32s/it]
79it [03:31,  2.68s/it]
0it [00:00, ?it/s]

Running for layer #  block2_pool


158it [09:22,  3.56s/it]
79it [05:29,  4.17s/it]
0it [00:00, ?it/s]

Running for layer #  block3_pool


158it [14:37,  5.55s/it]
79it [08:32,  6.48s/it]
0it [00:00, ?it/s]

Running for layer #  block4_pool


158it [19:40,  7.47s/it]
40it [05:32,  8.71s/it]

In [None]:
no_concepts = 158
no_imgs = 79
lbls_np = np.array(concept_labels)
clf = LogisticRegression(random_state=0)
concept_cavs = {
                    "block1_pool": [],
                    "block2_pool": [],
                    "block3_pool": [],
                    "block4_pool": [],
                    "block5_pool": []
                 }

for key, value in concept_fts_dict_VGG16.items():
    print("For layer # - ", key)
    fts_np = np.array(value)
    fts_np = fts_np.reshape(no_concepts, -1)
    clf.fit(fts_np, lbls_np)
    # compute CAVs
    if len(clf.coef_) == 1:
        cavs = [-1 * clf.coef_[0], clf.coef_[0]]
    else:
        cavs = [c for c in clf.coef_]
    concept_cavs[key].append(cavs)

In [None]:
# since we have CAVs all computed, now we have to compute TCAV.
def get_direction(layer, concept):
    cavs = concept_cavs[layer][0] # since it is a list of lists
    return cavs[concepts.index(concept)]

def compute_direc_derivative(x, y):
    a = np.dot(x, y)
    return a < 0

In [None]:
# for each base image (zebra) feature, compute the directional derivative and get the TCAV score.

tcavs = {}
for key, value in zebra_fts_dict_VGG16.items():
    count = 0
    print("For layer # - ", key)
    cav = get_direction(key, 'zebra_stripes')
    for ft in value: # loop over every feature
        dydx = compute_direc_derivative(ft, cav)
        if dydx: count += 1
    tcav = float(count) / float(len(value))
    tcavs[key] = tcav

In [None]:
print('Printing TCAV scores for each block!')
print(tcavs)