### Preparing concepts data for Testing with Concept Activation Vectors (TCAV) on Imagenet 

###### The next code is adapted based on the code of the tensorflow tcav implementation

In [None]:
from google.colab import drive

drive.mount("/content/drive", force_remount=True)
%cd '/content/drive/My Drive/Colab Notebooks/Labs/Computer Vision/CAPTUM_methods'

Mounted at /content/drive
/content/drive/My Drive/Colab Notebooks/Labs/Computer Vision/CAPTUM_methods


In [None]:
"""
Copyright 2018 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
""" Downloads models and datasets for imagenet

    Content downloaded:
        - Imagenet images for the zebra class.
        - Full Broden dataset(http://netdissect.csail.mit.edu/)
        - Inception 5h model(https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/inception5h.py)
        - Mobilenet V2 model(https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)

    Functionality:
        - Downloads open source models(Inception and Mobilenet)
        - Downloads the zebra class from imagenet, to illustrate a target class
        - Extracts three concepts from the Broden dataset(striped, dotted, zigzagged)
        - Structures the data in a format that can be readily used by TCAV
        - Creates random folders with examples from Imagenet. Those are used by TCAV.

    Example usage:

    python download_and_make_datasets.py --source_dir=YOUR_FOLDER --number_of_images_per_folder=50 --number_of_random_folders=10
"""
import subprocess
import os
import argparse
from tensorflow.io import gfile
from imagenet import imagenet_and_broden_fetcher as fetcher


def make_concepts_targets_and_randoms(source_dir, number_of_images_per_folder, number_of_random_folders):
    # Run script to download data to source_dir
    if not gfile.exists(source_dir):
        gfile.makedirs(source_dir)
    if not gfile.exists(os.path.join(source_dir,'broden1_224/')) or not gfile.exists(os.path.join(source_dir,'inception5h')):
        subprocess.call(['bash' , 'FetchDataAndModels.sh', source_dir])

    # Determine classes that we will fetch
    #imagenet_classes = ['zebra']
    imagenet_classes = ['bobcat','cat','coydog' ,'suricate', 'wildcat', 'bulldog', 'coondog', 'dog', 'housedog', 'watchdog']
    broden_concepts = ['blotchy','braided','dotted','fibrous','flecked','matted','striped','woven','wrinkled']

    # make targets from imagenet
    imagenet_dataframe = fetcher.make_imagenet_dataframe( os.path.join("imagenet",'imagenet_url_map.csv'))
    for image in imagenet_classes:
        fetcher.fetch_imagenet_class(source_dir, image, number_of_images_per_folder, imagenet_dataframe)

    # Make concepts from broden
    for concept in broden_concepts:
        fetcher.download_texture_to_working_folder(broden_path=os.path.join(source_dir, 'broden1_224'),
                                                   saving_path=source_dir,
                                                   texture_name=concept,
                                                   number_of_images=number_of_images_per_folder)

    # Make random folders. If we want to run N random experiments with tcav, we need N+1 folders.
    fetcher.generate_random_folders(
        working_directory=source_dir,
        random_folder_prefix="random500",
        number_of_random_folders=number_of_random_folders+1,
        number_of_examples_per_folder=number_of_images_per_folder,
        imagenet_dataframe=imagenet_dataframe
    )

source_dir='tcav_data'

    # create folder if it doesn't exist

if not gfile.exists(source_dir):
    gfile.makedirs(os.path.join(source_dir))
    print("Created source directory at " + source_dir)

# Make data
make_concepts_targets_and_randoms(source_dir=source_dir, number_of_images_per_folder=300, number_of_random_folders=4)
print("Successfully created data at " + source_dir)

You requested 300 but we were only able to find 88 good images from imageNet for concept bobcat
You requested 300 but we were only able to find 106 good images from imageNet for concept cat
You requested 300 but we were only able to find 18 good images from imageNet for concept coydog
You requested 300 but we were only able to find 144 good images from imageNet for concept suricate
You requested 300 but we were only able to find 44 good images from imageNet for concept wildcat
You requested 300 but we were only able to find 75 good images from imageNet for concept bulldog
You requested 300 but we were only able to find 57 good images from imageNet for concept coondog
You requested 300 but we were only able to find 227 good images from imageNet for concept dog
You requested 300 but we were only able to find 170 good images from imageNet for concept housedog
You requested 300 but we were only able to find 154 good images from imageNet for concept watchdog




Successfully created data at tcav_data


In [None]:
import os
from pathlib import Path
import shutil

to_sort_path = os.path.join("data_tcav_concepts", "broden1_224","images","dtd")
to_sort_path = os.path.join( "broden1_224","images","dtd")

path_concepts = os.path.join( "data","tcav","image","concepts")
if not os.path.isdir(path_concepts):
    Path(path_concepts).mkdir(mode=0o007, parents=True, exist_ok=True)


for folderName, subfolders, filenames in os.walk(to_sort_path):
  for filename in filenames:
    #if os.path.isfile(filename):
        #print(type(filename))
        strs = filename.split('_')
        _folder = str(strs[0])
        folder_root = os.path.join(path_concepts, _folder)
        if not os.path.isdir(folder_root):
            Path(folder_root).mkdir(mode=0o007, parents=True, exist_ok=True)

        shutil.move(
            os.path.join(to_sort_path, filename), 
            os.path.join(folder_root, filename)
        )