<a href="https://colab.research.google.com/github/siddsuresh97/prep_tutorial/blob/main/tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
Size
# Download data
# Generate activation of avg_pool and pool1
# Perform experiment

In [2]:
BASE_DIR = os.getcwd()

In [10]:
#@title imports

import logging
import os
import numpy as np
import time
import pickle
from datetime import datetime
import deepdish as dd

from tensorflow.keras.backend import clear_session
from tensorflow.keras.applications.resnet50 import preprocess_input as preprocess_input_resnet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50
import tensorflow as tf

In [6]:
#@title helper functions - intermediate layer features


def store_dataset_fnames(intermediate_layer_names, dataset_dir, batch_size, stim_type, features_dir):
    # Stores filenames of stimuli in a pickle file
    # Fnames of stimuli contain metadata about labels
    # Only one intermediate layer is used because the fnames are same regardless of the intermediate layer
    datagen = ImageDataGenerator(preprocessing_function=preprocess_input_resnet50)
    if not os.path.exists(features_dir):
        os.makedirs(features_dir)
    for layer in intermediate_layer_names[0]:
        generator = datagen.flow_from_directory(dataset_dir, shuffle = False, batch_size = batch_size)
        filenames = generator.filenames
        fname_dict = {'fnames':filenames}
        pickle.dump( fname_dict, open(os.path.join(features_dir,"filenames_{}.p".format(stim_type)), "wb" ))  
    logging.info(datetime.now().strftime("%H:%M:%S"))


def store_intermediate_layer_features(model_name, intermediate_layer_names, dataset_dir, batch_size, stim_type, features_dir):
    for layer in intermediate_layer_names:
        logging.info('------------------------------- {} ----------------------------'.format(layer))
        if model_name == 'resnet50':
            datagen = ImageDataGenerator(preprocessing_function=preprocess_input_resnet50)
        else:
            logging.error('Models apart from resnet50 not implemented')
        generator = datagen.flow_from_directory(dataset_dir, shuffle = False, batch_size = batch_size, target_size = (224, 224))
        len = generator.n
        batches = np.ceil(len/batch_size)
        extract_and_store(model_name, 1, layer, generator, features_dir, stim_type, batches)
        extract_and_store(model_name, 2, layer, generator, features_dir, stim_type, batches)
    return


def extract_intermediate_layer_representations(model_name, intermediate_layer_names, dataset_base_dir, batch_size, stim_type, features_base_dir, conditions, exp_name):
    """
    This function extracts and stores intermediate layer representations given a model
    and a dataset   
    """
    if exp_name == '1a':
        stim_name = 'random_stim'
    elif exp_name == '1b':
        stim_name = 'test_stim' 
    else:
        logging.error('Only Exp 1a, 1b implemented')

    # condition is set size if exp is 1a or 1b, otherwise it is color diveristy
    for condition in conditions:
        if exp_name in ['1a' or '1b']:
            dataset_dir = os.path.join(dataset_base_dir, '{}_generated_stimuli'.format(condition), stim_name)
            features_dir = os.path.join(features_base_dir, 'set_size_{}'.format(condition))
        elif exp_name in ['2a']:
            stim_type = condition
            dataset_dir = os.path.join(dataset_base_dir, condition)
            features_dir = os.path.join(features_base_dir)
        else:
            logging.error('Only 1a, 1b, 2a activation extraction implemented')
        store_dataset_fnames(intermediate_layer_names, dataset_dir, batch_size, stim_type, features_dir)
        start = time.time()
        store_intermediate_layer_features(model_name, intermediate_layer_names, dataset_dir, batch_size, stim_type, features_dir)
        logging.info('Total time to extract intermediate layer reprsentations (in seconds): {}'.format(time.time()-start))


def extract_and_store(model_name, part, layer, generator, features_dir, stim_type, batches):
    '''
    Extracts intermediate layer features and stores them in two h5 files
    '''
    if model_name == 'resnet50':
        model = ResNet50(weights='imagenet', include_top=True)
    else:
        logging.error('Models apart from resnet not implementde')
    extractor = tf.keras.Model(inputs=model.inputs,
                                outputs=[model.get_layer(layer).output])
    features_dict = {'fnames':[],'features':[]}
    if part == 1:
        min_range = 0
        max_range = int(batches)//2
    elif part == 2:
        min_range = int(batches)//2
        max_range = int(batches)
    for batch in range(min_range, max_range):
        time_for_generator_operation = time.time()
        x,y = generator.next()
        # logging.info('Time for generator %f' % time.time()-time_for_generator_operation)
        time_for_prediction = time.time()
        generator_features = extractor.predict(x)
        features_dict['features'].append(generator_features)
        # logging.info('Prediction Time = %f'%time.time()-time_for_prediction)
        time_for_deletion = time.time()
        del generator_features
        # logging.info('Time_for_deletion = ', time.time()-time_for_deletion)
        idx = (generator.batch_index - 1) * generator.batch_size
        features_dict['fnames'].append(generator.filenames[idx : idx + generator.batch_size])
    del extractor
    del model
    if not os.path.exists(features_dir):
        os.makedirs(features_dir)
    dd.io.save(os.path.join(features_dir, 'resnet_50_features_{}_{}_part_{}.h5'.format(stim_type, layer, part)), features_dict)
    del features_dict
    gc.collect()
    clear_session()
    logging.info("Saved {} part {}".format(layer, part))
    return 



In [1]:
#@title link to drive 

# Average Size

## Extract intermediate layer representations

In [11]:
model_name = 'resnet50'
intermediate_layer_names = ['avg_pool', 'pool1_pool']
dataset_dir = os.path.join(BASE_DIR, "data/average_size")
batch_size = 256
stim_type = 'avg_size'
features_dir = os.path.join(dataset_dir, "features")
conditions = [4, 8]
exp_name = '1a'
extract_intermediate_layer_representations(model_name=model_name,
                                            intermediate_layer_names = intermediate_layer_names,
                                            dataset_base_dir = dataset_dir, 
                                            batch_size = batch_size, 
                                            stim_type = stim_type, 
                                            features_base_dir = features_dir, 
                                            conditions = conditions, 
                                            exp_name = exp_name)

Found 3000 images belonging to 1 classes.
Found 3000 images belonging to 1 classes.
Found 3000 images belonging to 1 classes.
Found 3000 images belonging to 1 classes.
Found 3000 images belonging to 1 classes.
Found 3000 images belonging to 1 classes.
Found 3000 images belonging to 1 classes.
Found 3000 images belonging to 1 classes.
Found 3000 images belonging to 1 classes.
Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-07-06 10:04:19.526108: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-06 10:04:19.526436: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-07-06 10:04:23.645300: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-07-06 10:04:24.022264: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


> [0;32m/var/folders/qt/rt9bv7zs62b05gjym3zzqgyw0000gn/T/ipykernel_59931/4258867819.py[0m(93)[0;36mextract_and_store[0;34m()[0m
[0;32m     92 [0;31m        [0;32mimport[0m [0mipdb[0m[0;34m;[0m[0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 93 [0;31m        [0mfeatures_dict[0m[0;34m[[0m[0;34m'fnames'[0m[0;34m][0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mgenerator[0m[0;34m.[0m[0mfilenames[0m[0;34m[[0m[0midx[0m [0;34m:[0m [0midx[0m [0;34m+[0m [0mgenerator[0m[0;34m.[0m[0mbatch_size[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     94 [0;31m    [0;32mdel[0m [0mextractor[0m[0;34m[0m[0;34m[0m[0m
[0m
