<a href="https://colab.research.google.com/github/shivangibithel/ACMR_demo/blob/master/Nuswide_ACMR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!python --version

Python 2.7.17


In [2]:
from google.colab import drive
drive.mount('/content/gdrive')
# %cd gdrive/
%tensorflow_version 1.x
!ls

Mounted at /content/gdrive
TensorFlow 1.x selected.
gdrive	sample_data


In [20]:
%cd ..
!ls

/
bin	 datalab  home	 lib64	opt   run   sys		       tools
boot	 dev	  lib	 media	proc  sbin  tensorflow-1.15.2  usr
content  etc	  lib32  mnt	root  srv   tmp		       var


In [3]:
%cd gdrive/MyDrive/ACMR_demo

!ls

/content/gdrive/MyDrive/ACMR_demo
acmr_wikipedia_stats.npy  train_adv_crossmodal_simple_nuswide.py
data			  train_adv_crossmodal_simple_wiki.py
models			  train_adv_crossmodal_triplet_wiki.py
README.md


In [None]:
# !python train_adv_crossmodal_simple_wiki.py

In [None]:
# !python train_adv_crossmodal_triplet_wiki.py

In [None]:
!python train_adv_crossmodal_simple_nuswide.py

# Framework

In [4]:
import os
import os.path as path
import numpy as np
import random
from sys import getsizeof



class Dataset:
    """
        Dataset is the superclass which contains high level variables:
        x_{train,val,test}, y_{train,val,test}
        dir_{train,val,test}
        loader : function to read the dataset
        preprocess : function to preprocess the datasets
        normalize : normalize train, val and test datasets
        Note: while normalizing:
            1. stats obtained from train can be used to normalize val and test
            2. val and test can be normalized independently
            This needs to be fixed.... #LOOK
        Example code is provided for NUS-wide
    """
    def __init__(self, directories,
                         loader, 
                         preprocess=None,
                         preprocess_params=None,
                         normalize=None,
                         normalization_params=None,
                         read_directories=(True,True,True),
                         summarize=None
                         ):
        self.dir_train, self.dir_val, self.dir_test = directories
        self.read_train, self.read_val, self.read_test = read_directories
        self.x_train = None
        self.x_val = None
        self.x_test = None
        self.y_train = None
        self.y_val = None
        self.y_test = None
        self.loader = loader
        self.preprocess = preprocess
        self.preprocess_params = preprocess_params
        self.normalize = normalize
        self.normalization_params = normalization_params
        self.summarize = summarize
        self.stats = None

    def get_train_labels(self):
        return self.y_train

    def get_test_labels(self):
        return self.y_test

    def check_directory(self, dir, msg='{} Directory does not exist!!'):
        assert path.exists(dir), msg.format(dir)

    def load_data(self):
        if self.read_train:
            self.check_directory(self.dir_train)
            self.x_train, self.y_train = self.loader(self.dir_train, "train")
        if self.read_val:
            self.check_directory(self.dir_val)
            self.x_val, self.y_val = self.loader(self.dir_val, "val")
        if self.read_test:
            self.check_directory(self.dir_test)
            self.x_test, self.y_test = self.loader(self.dir_test, "test")

    def preprocess_data(self):
        if not self.preprocess:
            def func(x,y, params=None):
                return x,y
            self.preprocess = func

        if not self.normalize:
            def func(x,y, params=None):
                return x,y
            self.normalize = func

        if self.read_train:
            self.x_train, self.y_train = self.preprocess(self.x_train, self.y_train, self.preprocess_params)
            self.x_train, self.y_train = self.normalize(self.x_train, self.y_train, self.normalization_params)
        if self.read_val:
            self.x_val, self.y_val = self.preprocess(self.x_val, self.y_val, self.preprocess_params)
            self.x_val, self.y_val = self.preprocess(self.x_val, self.y_val, self.normalization_params)
        if self.read_test:
            self.x_test, self.y_test = self.preprocess(self.x_test, self.y_test, self.preprocess_params)
            self.x_test, self.y_test = self.preprocess(self.x_test, self.y_test, self.normalization_params)

    def get_stats(self):
        if (self.summarize == None):
            return None

        if self.stats:
            return self.stats

        stats = {}
        if self.read_train:
            stats['train'] = self.summarize(self.x_train, self.y_train, 'train')
        if self.read_test:
            stats['test'] = self.summarize(self.x_test, self.y_test, 'test')
        if self.read_val:
            stats['val'] = self.summarize(self.x_val, self.y_val, 'val')

        self.stats = stats
        return self.stats

In [5]:
import os
import os.path as path
import numpy as np
import random
import sys
import time
import json


class Parameters:
    def __init__(self, values):
        self.values = values
        # This should ideally be stored in a dictionary format,
        # to allow for easy access and referencing
        self.size = sys.getsizeof(values)


def map_rank(traingnd, testgnd, hamming_rank):
    """ 
        This funtion returns map@all metric score.
        hamming_rank : numtrain x numtest
        *gnd : numsamples x labelsize
    """
    numtrain, numtest = hamming_rank.shape
    apall = np.zeros((numtrain, numtest))
    patk = np.zeros((numtrain, numtest))
    ratk = np.zeros((numtrain, numtest))
    aa = np.array([i+1 for i in range(numtrain)])
    for i in range(numtest):
        y = hamming_rank[:, i]
        new_label = np.array([0 for j in range(numtrain)])
        relevant_indices = (np.matmul(traingnd, testgnd[i, :].reshape((-1, 1))) > 0).reshape(-1)
        new_label[relevant_indices] = 1
        total_relevant = np.sum(new_label)
        xx = np.cumsum(new_label[y]) #retrieved relevant
        ratk[:, i] = xx / total_relevant
        patk[:, i] = xx / aa
        x = xx * new_label[y]
        p = x / aa #precision@k
        p = np.cumsum(p)
        mask = (p != 0)
        p[mask] = p[mask]/xx[mask]
        apall[:, i] = p.copy()
    pre = np.mean(patk, axis=1)
    recall = np.mean(ratk, axis=1)
    mAP = np.mean(apall, axis=1)
    return mAP, pre, recall

class Model:
    '''
    Model class serves as a wrapper for dataset objects, training and evaluation functions, datasets, etc
    The aim is to abstract low-level details and provide simple function calls for experimentation
    Model Inputs::
    training_function -> function that acts on the input data and returns a Parameters object
    The subsequent inputs should be consistent with the inputs required by the training function
    dataset_obj -> of type Dataset, containing training, validation and test files
    hyperparams -> Dictionary of iterations, learning rate or any other vars.
    params -> an object of class Parameters, which contains the matrices, weights and any other parameters,
                in the correct shape and data type, as per the training function
    params_verification -> ensures consistency b/w params and output of training function
    prediction_function -> takes params and datapoint(s) as input to generate output(s)
    evaluation_metrics -> list of evaluation metrics to be calculated
                            Output from prediction function should be in the required format
                            Implementation for those not provided by the framework
    '''
    def __init__(self, 
                    training_function,
                    hyperparams,
                    dataset_obj,
                    params=None,
                    params_verification=None,
                    prediction_function=None,
                    evaluation_metrics=None,
                    is_neural=None
                    ):
        '''
        Note: if you are using a neural model, then:
        1. pass is_neural = True
        2. params can be the saved object of the neural model
        This is to avoid redundancy, as TF, PyTorch, etc already have optimized saving and loading routines
        '''
        self.train = training_function
        self.hyperparams = hyperparams
        # dataset_object has to be of type Dataset
        self.dataset_obj = dataset_obj
        self.params = params
        self.params_verification = params_verification
        self.prediction_function = prediction_function
        self.evaluation_metrics = evaluation_metrics
        self.is_neural = is_neural
        self.stats = self.initialize_stats()
        self.logs = []
        self.results ={}

    def initialize_stats(self):
        stats = {
            'data_stats' : self.dataset_obj.get_stats(),
            'params_size' : self.params.size if self.is_neural else sys.getsizeof(self.params),
            'epochs' : 0,
            'training_time' : 0,
            'loss_history' : {},
            # Dictionary, where multiple lists are stored, for each kind of loss being monitored
            'metrics' : {}
        }

        return stats

    def train_model(self):
        '''
        trains the model, using the training function, params and hyperparams, dataset_obj provided by the user
        the train() function must return updated params, list of losses at each epoch, and logs
        This is used for further prediction and analysis.
        '''
        start = time.time()
        params, losses, logs = self.train(
                                    self.dataset_obj,
                                    self.params,
                                    self.hyperparams
                                )
        end = time.time()

        self.params = params
        self.stats['loss_history'] = losses[1:]
        self.logs.append(logs)
        self.stats['training_time'] = (end-start)

    # tag = train, val, test. Or anything else, but then custom dataset and provisions in prediction_function are needed
    def predict(self, tag):
        """ calls user-defined prediction_function
            user must return the results in the following form"
            type(results) = dict
            results.keys() : ['itot_ranked_results', 'ttoi_ranked_results']
            and matrices corresponding to the each key.
        """
        start = time.time()
        n_samples, results, logs = self.prediction_function(self.dataset_obj, self.params, tag)
        end = time.time()

        self.results[tag] = results #TODO: ''.format(tag)
        self.stats['prediction_time'] = (end-start)/n_samples
        self.logs.append(logs)

    def evaluate(self, train_labels, test_labels):
        """ calculate mAP, recall, pecision
            This funtion will make use of the "results" stored in
            the predict part above.
            args:
                train_labels : #samples x #classes matrix. 
                                It should be a binary matrix saying 
                                given sample belongs what labels.
                test_labels :  #samples x #classes matrix
                                It should be a binary matrix saying 
                                given sample belongs what labels.
        """
        mAP_itot, pre_itot, recall_itot = map_rank(train_labels,test_labels, self.results['test']['itot_ranked_results'].T)
        mAP_ttoi, pre_ttoi, recall_ttoi = map_rank(train_labels, test_labels, self.results['test']['ttoi_ranked_results'].T)
        self.stats['metrics']['map_itot'] = mAP_itot
        self.stats['metrics']['map_ttoi'] = mAP_ttoi
        self.stats['metrics']['pre_itot'] = pre_itot
        self.stats['metrics']['pre_ttoi'] = pre_ttoi
        self.stats['metrics']['recall_itot'] = recall_itot
        self.stats['metrics']['recall_ttoi'] = recall_ttoi
        print('image to text mAP@max: ', np.max(mAP_itot), np.argmax(mAP_itot))
        print('image to text P@max: ', np.max(pre_itot), np.argmax(pre_itot))
        print('image to text recall@p_max: ', recall_itot[np.argmax(pre_itot)])
        print('text to image mAP@max: ', np.max(mAP_ttoi), np.argmax(mAP_ttoi))
        print('text to image P@max: ', np.max(pre_ttoi), np.argmax(pre_ttoi))
        print('text to image recall@p_max: ', recall_ttoi[np.argmax(pre_ttoi)])
        

    def get_stats(self):
        return self.stats

    def save_stats(self, filename):
        np.save(filename, self.stats)

In [6]:
import tensorflow as tf
from tensorflow.python.framework import ops


class FlipGradientBuilder(object):
    def __init__(self):
        self.num_calls = 0

    def __call__(self, x, l=1.0):
        grad_name = "FlipGradient%d" % self.num_calls
        @ops.RegisterGradient(grad_name)
        def _flip_gradients(op, grad):
            return [tf.negative(grad) * l]
        
        g = tf.get_default_graph()
        with g.gradient_override_map({"Identity": grad_name}):
            y = tf.identity(x)
            
        self.num_calls += 1
        return y
    
flip_gradient = FlipGradientBuilder()


In [7]:
import os
import os.path as path
import numpy as np
import random
import os.path as path
import argparse
import matplotlib.pyplot as plt

class Comparator:
    '''
        Comparator class reads stats stored across multiple files.
        Has functions to plot bar graphs and line graphs, as per the labels provided by the user
        The main function takes 3 arguments -> filepaths, bar_labels and line_labels
        Get more info about input format : python compare.py --help
    '''
    def __init__(self, files):
        self.files = files
        assert len(files) > 0 and files[0] != '', 'No file/empty filename provided!!'
        self.stats = [self.load_stats(f) for f in files]
        self.labels = [path.basename(f).split('.')[0] for f in files]
        self.comparisions = {}
        self.outdir = '_V_'.join(self.labels)
        if not path.exists(self.outdir):
            os.makedirs(self.outdir)

    def load_stats(self, file):
        encoding = 'ASCII'
        if 'py2' in file:
            # This is because files dumped in python2 have different encoding
            encoding = 'latin1'
        return np.load(file, allow_pickle=True, encoding=encoding)[()]

    def createBarPlot(self, tag, subtag1='', subtag2=''):
        '''
            creates a bar plot
            supported tags - params_size, training_time, inference_time
            other tags can also be used, but consistency has to be ensured by the user
            Here upto 3 levels can be handled. For example : stats['data_stats']['train']['num_samples']
        ''' 
        labels = self.labels
        values = [s[tag] for s in self.stats]
        if subtag1 != '':
            values = [s[subtag1] for s in values]
            if subtag2 != '':
                values = [s[subtag2] for s in values]
        plt.title('{} {} {}'.format(tag, subtag1, subtag2))
        plt.bar(labels, values)
        outfile = path.join(self.outdir, '{} {} {}.jpeg'.format(tag, subtag1, subtag2))
        plt.savefig(outfile)
        plt.close()
    
    def createLinePlot(self, tag, subtag1='', subtag2=''):
        '''
            supported tags: loss_histry, metrics
            If the metric object is 2-level, then a subtag can be provided as well
            For example -> self.stats1[tag][subtag]
            Note subtag2 != '' only if subtag1 != ''
            This function does not handle 3-level tags, unlike createBarPlot()
            Here, when 2nd tag is provided then 1st tag serves as y-axis and 2nd as x-axis
            For example -> precision-recall curves
        '''
        is_biaxial = False
        subtag = subtag1
        if subtag1 != '' and subtag2 != '':
            is_biaxial = True
            subtag = subtag1 + ' ' + subtag2
        
        y_label = tag + ' ' + subtag1
        x_label = 'i' if subtag2 == '' else (tag + ' ' + subtag2)

        for i in range(len(self.labels)):
            label = self.labels[i]
            line = self.stats[i][tag]
            if subtag1 != '':
                line = line[subtag1]

            # ######## UNCOMMENT THIS SECTION IF USING THE STATS PROVIDED IN THE GIT-REPO
            if tag=='loss_history':
                line = line[1:]
            # ###########
            indices = [j for j in range(1,len(line)+1)]
            # In this case we replace indices by subtag2 readings
            if subtag2 != '':
                indices = self.stats[i][tag][subtag2]
            plt.plot(indices, line, label=label)       

        plt.title(tag+ ' ' + subtag)
        plt.ylabel(y_label)
        plt.xlabel(x_label)
        plt.legend()
        outfile = path.join(self.outdir, '{}.jpeg'.format(tag+' '+subtag))
        plt.savefig(outfile)
        plt.close()


def get_arguments():
    parser = argparse.ArgumentParser()
    a = parser.add_argument
    metrics = ['pre_ttoi', 'map_ttoi', 'recall_ttoi', 'pre_itot', 'map_itot', 'recall_itot']
    metrics_attributes = ','.join(['metrics:{}'.format(m) for m in metrics])
    metrics_dual_attributes = 'metrics:pre_ttoi:recall_ttoi, metrics:pre_itot:recall_itot'
    default_line_tags = ','.join([metrics_attributes, metrics_dual_attributes, 'loss_history'])

    default_bar_tags = 'training_time, params_size, prediction_time'
    # default_bar_tags = 'training_time, params_size, prediction_time, data_stats:train:num_samples, data_stats:train:num_classes'


    a('--filepaths', type=str, 
            default='', 
            help='comma separated relative filepaths for stats files of algorithms')
    a('--bar_tags', type=str, default=default_bar_tags, help='Same as the help for line_tags')
    a('--line_tags', type=str, 
            default=default_line_tags,
            help='comma separated attributes to compare. For subtags following tags, use :\
                \n Supports only 2-level access. Value after 1st colon is treated as subtag1\
                \n For example - metrics:pre_ttoi will plot values of pre_ttoi metric\
                \n If a 2nd colon is used, then it is assigned subtag2\
                \n For example - metrics:pre_ttoi:recall_ttoi plots precision vs recall values\
                \n ,i.e. if 2 subtags are given then they are assumed to share the same parent tag and the graph is plotted as subtag1 vs subtag2\
                \n Note: for some tags, it is advisable to plot for only 1 file. Example: loss_history')

    return parser.parse_args()


if __name__ == '__main__':
    arguments = get_arguments()
    filepaths = arguments.filepaths
    files = filepaths.split(',')
    files = [f.strip() for f in files]

    bar_tags_raw = arguments.bar_tags
    line_tags_raw = arguments.line_tags

    bar_tags = bar_tags_raw.split(',')
    bar_tags = [t.strip() for t in bar_tags]
    bar_tags = [ [val.strip() for val in t.split(':')] for t in bar_tags]

    line_tags = line_tags_raw.split(',')
    line_tags = [t.strip() for t in line_tags]
    line_tags = [ [val.strip() for val in t.split(':')] for t in line_tags]

    comparisions = Comparator(files)
    for bt in bar_tags:
        if len(bt) == 1:
            comparisions.createBarPlot(bt[0])
        elif len(bt) == 2:
            comparisions.createBarPlot(bt[0], bt[1])
        else:
            comparisions.createBarPlot(bt[0], bt[1], bt[2])

    
    for lt in line_tags:
        if len(lt) == 1:
            comparisions.createLinePlot(lt[0])
        elif len(lt) == 2:
            comparisions.createLinePlot(lt[0], lt[1])
        else:
            comparisions.createLinePlot(lt[0], lt[1], lt[2])

    print (bar_tags)
    print (line_tags)

usage: ipykernel_launcher.py [-h] [--filepaths FILEPATHS]
                             [--bar_tags BAR_TAGS] [--line_tags LINE_TAGS]
ipykernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-d6f77781-4b08-419b-ae2d-7b4bfcfb2a83.json


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Base Model

In [8]:
import os
import json
import tensorflow as tf

class BaseDataIter(object):
    def __init__(self, batch_size):
        self.batch_size = batch_size

    def train_data(self):
        raise NotImplemented

    def test_data(self):
        raise NotImplemented
    

class BaseModelParams(object):
    """
    Base class for model parameters
    Any model that takes parameters should derive this class to provide parameters
    """
    def __init__(self):
        """
        Common parameters
        Derived classes should override these parameters
        """
        # Checkpoint root directory; it may contain multiple directories for
        # different models
        self.checkpoint_dir = None

        # Sample directory
        self.sample_dir = None

        # Log directory
        self.log_dir = None

        # Dataset directory; this is the root directory of all datasets.
        # E.g., if dataset coco is located at /mnt/data/coco, then this
        # value should be /mnt/data
        self.dataset_dir = None

        # Name of the dataset; it should be the same as the directory
        # name containing this dataset.
        # E.g., if dataset coco is located at /mnt/data/coco, then this
        # value should be coco
        self.dataset_name = None

        # Name of this model; it is used as the base name for checkpoint files
        self.model_name = None

        # Name of the directory containing the checkpoint files.
        # This can be the same as the model name; however, it can also be encoded
        # to contain certain details of a particular model.
        # This directory will be a subdirectory under checkpoint directory.
        self.model_dir = None

        # Checkpoint file to load
        self.ckpt_file = None

    def load(self, f):
        """
        Load parameters from specified json file.
        The loaded parameters override those with the same name defined in this subclasses
        :param f:
        :return:
        """
        self.__dict__ = json.load(f)

    def loads(self, s):
        """
        Load parameters from json string
        The loaded parameters override those with the same name defined in this subclasses
        :param s:
        :return:
        """
        self.__dict__ = json.loads(s)

    def update(self):
        """
        Update the params
        :return:
        """
        raise Exception('Not implemented')


class BaseModel(object):
    """
    Base class for models
    """
    def __init__(self, model_params=None):
        """

        """
        self.model_params = model_params
        # self.dataIter = dataIter
        self.saver = None

    def get_checkpoint_dir(self):
        """
        Get the dir for all checkpoints.
        Implemented by the derived classes.
        :return:
        """
        if self.model_params is not None and self.model_params.checkpoint_dir is not None:
            return self.model_params.checkpoint_dir
        else:
            raise Exception('get_checkpoint_dir must be implemented by derived classes')

    def get_model_dir(self):
        """
        Get the model dir for the checkpoint
        :return:
        """
        if self.model_params is not None and self.model_params.model_dir is not None:
            return self.model_params.model_dir
        else:
            raise Exception('get_model_dir must be implemented by derived classes')

    def get_model_name(self):
        """
        Get the base model name.
        Implemented by the derived classes.
        :return:
        """
        if self.model_params is not None and self.model_params.model_name is not None:
            return self.model_params.model_name
        else:
            raise Exception('get_model_name must be implemented by derived classes')

    def get_sample_dir(self):
        """
        Get the dir for samples.
        Implemented by the derived classes.
        :return:
        """
        if self.model_params is not None and self.model_params.sample_dir is not None:
            return self.model_params.sample_dir
        else:
            raise Exception('get_sample_dir must be implemented by derived classes')

    def get_dataset_dir(self):
        """
        Get the dataset dir.
        Implemented by the derived classes.
        :return:
        """
        if self.model_params is not None and self.model_params.dataset_dir is not None:
            return self.model_params.dataset_dir
        else:
            raise Exception('get_dataset_dir must be implemented by derived classes')

    def check_dirs(self):
        if not os.path.exists(self.get_sample_dir()):
            os.mkdir(self.get_sample_dir())

        # sanity check for dataset
        if not os.path.exists(self.get_dataset_dir()):
            raise Exception('Dataset dir %s does not exist' % self.get_dataset_dir())

    def save(self, step, sess):
        checkpoint_dir = os.path.join(self.get_checkpoint_dir(), self.get_model_dir())

        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)

        self.saver.save(sess,
                        os.path.join(checkpoint_dir, self.get_model_name()),
                        global_step=step)

    def load(self, sess):
        """
        Load from a specified directory.
        This is for resuming training from a previous snapshot and is called from train(),
        therefore, a saver is created in train()

        Args:
            sess: tf session
        """
        print(' [*] Reading checkpoints...')

        checkpoint_dir = os.path.join(self.get_checkpoint_dir(), self.get_model_dir())

        ckpt_path = tf.train.latest_checkpoint(checkpoint_dir)
        if ckpt_path is not None:
            self.saver.restore(sess, ckpt_path)
            return True
        else:
            return False

    def load_for_testing(self, ckpt_path, sess):
        """
        Load from specified checkpoint file.
        This is for testing the model, a saver will be created here to restore the variables

        Args:
            ckpt_path: path to the checkpoint file
            sess: tf session
        """
        print(' [*] Reading checkpoints...')

        if not os.path.exists(ckpt_path):
            return False

        self.saver = tf.train.Saver()
        self.saver.restore(sess, ckpt_path)
        return True


# Adversarial class


In [11]:
from __future__ import print_function
import os, time, cPickle
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from random import shuffle
import sklearn.preprocessing
# from base_model import BaseModel, BaseModelParams, BaseDataIter
#import utils
# from flip_gradient import flip_gradient

class DataIter(BaseDataIter):
    def __init__(self, batch_size,data):
        BaseDataIter.__init__(self, batch_size)
        self.num_train_batch = 0
        self.num_test_batch = 0
        self.train_img_feats=data.x_train['img_train']
        self.train_txt_vecs=data.x_train['txt_train']
        self.train_labels=data.y_train['train_labels']
        self.test_img_feats=data.x_test['img_test']
        self.test_txt_vecs=data.x_test['txt_test']
        self.test_labels=data.y_test['test_labels']
        self.train_ids=data.y_train['train_ids']
        self.test_ids=data.y_test['test_ids']
        self.train_labels_single=data.y_train['train_labels_single']
        self.test_labels_single=data.y_test['test_labels_single']

        np.random.shuffle(self.train_ids)
        # np.random.shuffle(self.test_ids)          
        self.num_train_batch = len(self.train_ids) / self.batch_size
        self.num_test_batch = len(self.test_ids) / self.batch_size


    def train_data(self):
        for i in range(self.num_train_batch):
            batch_img_ids = self.train_ids[i*self.batch_size : (i+1)*self.batch_size]
            batch_img_feats = [self.train_img_feats[n] for n in batch_img_ids]
            batch_txt_vecs = [self.train_txt_vecs[n] for n in batch_img_ids]
            batch_labels = [self.train_labels[n] for n in batch_img_ids]
            batch_labels_single = np.array([self.train_labels_single[n] for n in batch_img_ids])
            yield batch_img_feats, batch_txt_vecs, batch_labels, batch_labels_single, i

    def test_data(self):
        for i in range(self.num_test_batch):
            batch_img_ids = self.test_ids[i*self.batch_size : (i+1)*self.batch_size]
            batch_img_feats = [self.test_img_feats[n] for n in batch_img_ids]
            batch_txt_vecs = [self.test_txt_vecs[n] for n in batch_img_ids]
            batch_labels = [self.test_labels[n] for n in batch_img_ids]
            batch_labels_single = [self.test_labels_single[n] for n in batch_img_ids]
            yield batch_img_feats, batch_txt_vecs, batch_labels, batch_labels_single, i


class ModelParams(BaseModelParams):
    def __init__(self):
        BaseModelParams.__init__(self)

        self.batch_size = 64
       
        self.visual_feat_dim = 4096
        #self.word_vec_dim = 200
        self.word_vec_dim = 1000
        self.lr_emb = 0.0001
        self.lr_domain = 0.0001
        self.top_k = 50
        self.semantic_emb_dim = 40
        self.dataset_name = 'nuswide'
        self.model_name = 'adv_semantic_zsl'
        self.model_dir = 'adv_semantic_zsl_%d_%d_%d' % (self.visual_feat_dim, self.word_vec_dim, self.semantic_emb_dim)

        self.checkpoint_dir = 'checkpoint'
        self.sample_dir = 'samples'
        self.dataset_dir = './data'
        self.log_dir = 'logs'

    def update(self):
        self.checkpoint_dir = os.path.join(self.checkpoint_dir, self.model_dir)
        self.sample_dir = os.path.join(self.sample_dir, self.model_dir)
        self.log_dir = os.path.join(self.log_dir, self.model_dir)
        self.dataset_dir = os.path.join(self.dataset_dir, self.dataset_name)


class AdvCrossModalSimple(BaseModel):
    def __init__(self, model_params,dataIter):
        BaseModel.__init__(self, model_params) 
        # self.data_iter = DataIter(self.model_params.batch_size)
        self.data_iter =dataIter
        self.visual_feats = tf.placeholder(tf.float32, [None, self.model_params.visual_feat_dim])
        self.word_vecs = tf.placeholder(tf.float32, [None, self.model_params.word_vec_dim])
        self.y = tf.placeholder(tf.int32, [self.model_params.batch_size,10])
        self.y_single = tf.placeholder(tf.int32, [self.model_params.batch_size,1])
        self.l = tf.placeholder(tf.float32, [])
        self.emb_v = self.visual_feature_embed(self.visual_feats)
        self.emb_w = self.label_embed(self.word_vecs)
        #self.corr_loss = tf.sqrt(2 * tf.nn.l2_loss(self.emb_v - self.emb_w))
        #self.corr_loss = tf.reduce_mean(self.corr_loss)
        # dissimilar loss
        emb_v_ = tf.reduce_sum(self.emb_v, axis=1, keep_dims=True)
        emb_w_ = tf.reduce_sum(self.emb_w, axis=1, keep_dims=True)
        distance_map = tf.matmul(emb_v_,tf.ones([1,self.model_params.batch_size])) - tf.matmul(self.emb_v,tf.transpose(self.emb_w))+ \
            tf.matmul(tf.ones([self.model_params.batch_size,1]),tf.transpose(emb_w_))
        mask_initial = tf.to_float(tf.matmul(self.y_single,tf.ones([1,self.model_params.batch_size],dtype=tf.int32)) - \
            tf.matmul(tf.ones([self.model_params.batch_size,1],dtype=tf.int32),tf.transpose(self.y_single)))
        mask = tf.to_float(tf.not_equal(mask_initial, tf.zeros_like(mask_initial)))
        masked_dissimilar_loss = tf.multiply(distance_map,mask)
        self.dissimilar_loss = tf.reduce_mean(tf.maximum(0., 0.1*tf.ones_like(mask)-masked_dissimilar_loss))
        self.similar_loss = tf.sqrt(2 * tf.nn.l2_loss(self.emb_v - self.emb_w))
        self.similar_loss = tf.reduce_mean(self.similar_loss)
        logits_v = self.label_classifier(self.emb_v)
        logits_w = self.label_classifier(self.emb_w, reuse=True)
        self.label_loss = tf.nn.softmax_cross_entropy_with_logits(labels=self.y, logits=logits_v) + \
            tf.nn.softmax_cross_entropy_with_logits(labels=self.y, logits=logits_w)
        self.label_loss = tf.reduce_mean(self.label_loss)
        self.emb_loss = 50*self.label_loss + self.similar_loss + 0.2*self.dissimilar_loss
        self.emb_v_class = self.domain_classifier(self.emb_v, self.l)
        self.emb_w_class = self.domain_classifier(self.emb_w, self.l, reuse=True)

        all_emb_v = tf.concat([tf.ones([self.model_params.batch_size, 1]),
                                   tf.zeros([self.model_params.batch_size, 1])], 1)
        all_emb_w = tf.concat([tf.zeros([self.model_params.batch_size, 1]),
                                   tf.ones([self.model_params.batch_size, 1])], 1)
        self.domain_class_loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.emb_v_class, labels=all_emb_w) + \
            tf.nn.softmax_cross_entropy_with_logits(logits=self.emb_w_class, labels=all_emb_v)
        self.domain_class_loss = tf.reduce_mean(self.domain_class_loss)
        self.t_vars = tf.trainable_variables()
        self.vf_vars = [v for v in self.t_vars if 'vf_' in v.name]
        self.le_vars = [v for v in self.t_vars if 'le_' in v.name]
        self.dc_vars = [v for v in self.t_vars if 'dc_' in v.name]
        self.lc_vars = [v for v in self.t_vars if 'lc_' in v.name]
        

    def visual_feature_embed(self, X, is_training=True, reuse=False):
        with slim.arg_scope([slim.fully_connected], activation_fn=None, reuse=reuse):
            net = tf.nn.tanh(slim.fully_connected(X, 4096, scope='vf_fc_0'))
            net = tf.nn.tanh(slim.fully_connected(net, 1000, scope='vf_fc_1'))
            net = tf.nn.tanh(slim.fully_connected(net, self.model_params.semantic_emb_dim, scope='vf_fc_2'))
        return net

    def label_embed(self, L, is_training=True, reuse=False):
        with slim.arg_scope([slim.fully_connected], activation_fn=None, reuse=reuse):
            net = tf.nn.tanh(slim.fully_connected(L, 1000, scope='le_fc_0'))
            net = tf.nn.tanh(slim.fully_connected(net, 300, scope='le_fc_1'))
            net = tf.nn.tanh(slim.fully_connected(net, self.model_params.semantic_emb_dim, scope='le_fc_2'))
        return net 
    def label_classifier(self, X, reuse=False):
        with slim.arg_scope([slim.fully_connected], activation_fn=None, reuse=reuse):
            net = slim.fully_connected(X, 10, scope='lc_fc_0')
        return net         
    def domain_classifier(self, E, l, is_training=True, reuse=False): 
        with slim.arg_scope([slim.fully_connected], activation_fn=None, reuse=reuse):
            E = flip_gradient(E, l)
            net = slim.fully_connected(E, self.model_params.semantic_emb_dim/2, scope='dc_fc_0')
            net = slim.fully_connected(net, self.model_params.semantic_emb_dim/4, scope='dc_fc_1')
            net = slim.fully_connected(net, 2, scope='dc_fc_2')
        return net

    def train_(self, sess):        
        emb_train_op = tf.train.AdamOptimizer(
            learning_rate=self.model_params.lr_emb,
            beta1=0.5).minimize(self.emb_loss, var_list=self.le_vars+self.vf_vars)
        domain_train_op = tf.train.AdamOptimizer(
            learning_rate=self.model_params.lr_domain,
            beta1=0.5).minimize(self.domain_class_loss, var_list=self.dc_vars)
        tf.initialize_all_variables().run()
        self.saver = tf.train.Saver()
        self.model_params.epoch = 50
        start_time = time.time()
        losses=[]
        for epoch in range(self.model_params.epoch):        
            p = float(epoch) / self.model_params.epoch
            l = 2. / (1. + np.exp(-10. * p)) - 1
            for batch_feat, batch_vec, batch_labels, batch_labels_single, idx in self.data_iter.train_data():
            
                #sess.run([total_train_op], feed_dict={self.visual_feats: batch_feat, self.word_vecs: batch_vec, self.y: b,self.l: l})
                sess.run([emb_train_op, domain_train_op],
                         feed_dict={
                             self.visual_feats: batch_feat,
                             self.word_vecs: batch_vec,
                             self.y: batch_labels,
                             self.y_single: batch_labels_single[:,np.newaxis],
                             self.l: l})
              
                label_loss_val, similar_loss_val, emb_loss_val, domain_loss_val, dissimilar_loss_val = sess.run([self.label_loss, self.similar_loss, self.emb_loss, self.domain_class_loss, self.dissimilar_loss],
                                                         feed_dict={self.visual_feats: batch_feat,
                                                                    self.word_vecs: batch_vec,
                                                                    self.y: batch_labels,
                                                                    self.y_single: batch_labels_single[:,np.newaxis],
                                                                    self.l: l})
                print('Epoch: [%2d][%4d/%4d] time: %4.4f, emb_loss: %.8f, domain_loss: %.8f, label_loss: %.8f, similar_loss: %.8f, disimilar_loss: %.8f' %(
                    epoch, idx, self.data_iter.num_train_batch, time.time() - start_time, emb_loss_val, domain_loss_val, label_loss_val, similar_loss_val, dissimilar_loss_val
                ))
                sum_loss = emb_loss_val + domain_loss_val +label_loss_val +similar_loss_val +dissimilar_loss_val
                losses.append(sum_loss)
        params = {'emb_v':self.emb_v, 'dataIter': self.data_iter, 'emb_w': self.emb_w, 'visual_feats': self.visual_feats, 'word_vecs': self.word_vecs }
        logs = None
        losses=losses
        return params, losses, logs

#     def eval(self, sess):
#         start = time.time()

#         test_img_feats_trans = []
#         test_txt_vecs_trans = []
#         test_labels = []
#         for feats, vecs, _, labels, i in self.data_iter.test_data():
#             feats_trans = sess.run(self.emb_v, feed_dict={self.visual_feats: feats})
#             vecs_trans = sess.run(self.emb_w, feed_dict={self.word_vecs: vecs})
#             #print("{0}".format(np.shape(labels)))            
#             test_labels += list(labels)
#             for ii in range(len(feats)):
#                 test_img_feats_trans.append(feats_trans[ii])
#                 test_txt_vecs_trans.append(vecs_trans[ii])
#         test_img_feats_trans = np.asarray(test_img_feats_trans)
#         test_txt_vecs_trans = np.asarray(test_txt_vecs_trans)
#         #print("{0}".format(np.shape(test_labels)))
#         print('[Eval] transformed test features in %4.4f' % (time.time() - start))

#         avg_precs = []
#         all_precs = []
#         all_k = [1984]
#         for k in all_k: 
#             for i in range(len(test_txt_vecs_trans)):
#                 query_label = test_labels[i]

#                 # distances and sort by distances
#                 wv = test_txt_vecs_trans[i]
#                 diffs = test_img_feats_trans - wv
#                 dists = np.linalg.norm(diffs, axis=1)
#                 sorted_idx = np.argsort(dists)
# # ------------------------------------------------------------------------------------
#                 #for each k do top-k
#                 precs = []
#                 for topk in range(1, k + 1):
#                     hits = 0
#                     top_k = sorted_idx[0 : topk]                    
#                     if query_label != test_labels[top_k[-1]]:
#                         continue
#                     for ii in top_k:
#                         retrieved_label = test_labels[ii]
#                         if retrieved_label == query_label:
#                             hits += 1
#                     precs.append(float(hits) / float(topk))
#                 if len(precs) == 0:
#                     precs.append(0)
#                 avg_precs.append(np.average(precs))
#             mean_avg_prec = np.mean(avg_precs)
#             all_precs.append(mean_avg_prec)
#         print('[Eval - txt2img] mAP: %f in %4.4fs' % (all_precs[0], 0))

#         avg_precs = []
#         all_precs = []
#         all_k = [1984]
#         for k in all_k:        
#             for i in range(len(test_img_feats_trans)):
#                 query_img_feat = test_img_feats_trans[i]
#                 ground_truth_label = test_labels[i]

#                 # calculate distance and sort
#                 diffs = test_txt_vecs_trans - query_img_feat
#                 dists = np.linalg.norm(diffs, axis=1)
#                 sorted_idx = np.argsort(dists)
# # ------------------------------------------------------------------------
#                 # for each k in top-k
#                 precs = []
#                 for topk in range(1, k + 1):
#                     hits = 0
#                     top_k = sorted_idx[0 : topk]
#                     if ground_truth_label != test_labels[top_k[-1]]:
#                         continue
#                     for ii in top_k:
#                         retrieved_label = test_labels[ii]
#                         if ground_truth_label == retrieved_label:
#                             hits += 1
#                     precs.append(float(hits) / float(topk))
#                 if len(precs) == 0:
#                     precs.append(0)
#                 avg_precs.append(np.average(precs))
#             mean_avg_prec = np.mean(avg_precs)
#             all_precs.append(mean_avg_prec)           
#         print('[Eval - img2txt] mAP: %f in %4.4fs' % (all_precs[0], 0))


W0520 11:29:47.563647 140423871608704 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [12]:
import pickle
import numpy as np

def data_loader(dirpath, tag):
  if tag=='train':
    with open(dirpath + 'img_train_id_feats.pkl') as f:
        train_img_feats = pickle.load(f)
    with open(dirpath + 'train_id_bow.pkl', 'rb') as f:
        train_txt_vecs = pickle.load(f)
    with open(dirpath + 'train_id_label_map.pkl', 'rb') as f:
        train_labels = pickle.load(f)
    with open(dirpath + 'train_ids.pkl', 'rb') as f:
        train_ids = pickle.load(f)
    with open(dirpath + 'train_id_label_single.pkl', 'rb') as f:
        train_labels_single = pickle.load(f)
    # print("Train data Loaded")
    # print(len(train_img_feats))
    # print(len(train_img_feats),len(train_txt_vecs),len(train_labels),len(train_labels_single),len(train_ids))
    return {'img_train': train_img_feats, 'txt_train': train_txt_vecs}, {'train_labels': train_labels,
                                                                                 'train_labels_single': train_labels_single,
                                                                                 'train_ids': train_ids}
    
  if tag =='test':
    with open(dirpath + 'img_test_id_feats.pkl', 'rb') as f:
        test_img_feats = pickle.load(f)
    with open(dirpath + 'test_id_bow.pkl', 'rb') as f:
        test_txt_vecs = pickle.load(f)
    with open(dirpath + 'test_id_label_map.pkl', 'rb') as f:
        test_labels = pickle.load(f)
    with open(dirpath + 'test_ids.pkl', 'rb') as f:
        test_ids = pickle.load(f)      
    with open(dirpath + 'test_id_label_single.pkl', 'rb') as f:
        test_labels_single = pickle.load(f)
    # print("Test data Loaded")
    # print(type(test_labels_single))
    # print(len(test_img_feats),len(test_txt_vecs), len(test_labels), len(test_ids),len(test_labels_single))
    return {'img_test': test_img_feats, 'txt_test': test_txt_vecs}, {'test_labels': test_labels,
                                                                               'test_labels_single': test_labels_single,
                                                                               'test_ids': test_ids}



In [13]:
def predict_(dataset_obj, params, tag,sess):  
  if tag =='test':
    data_iter = params['dataIter']
    emb_v = params['emb_v']
    emb_w = params['emb_w']
    # sess = params['sess']
    visual_feats = params['visual_feats']
    word_vecs = params['word_vecs']
    test_img_feats_trans = []
    test_txt_vecs_trans = []
    test_labels = []
    for feats, vecs, _, labels, i in data_iter.test_data():
        feats_trans = sess.run(emb_v, feed_dict={visual_feats: feats})
        vecs_trans = sess.run(emb_w, feed_dict={word_vecs: vecs})
        #print("{0}".format(np.shape(labels)))            
        test_labels += list(labels)
        for ii in range(len(feats)):
            test_img_feats_trans.append(feats_trans[ii])
            test_txt_vecs_trans.append(vecs_trans[ii])
    test_img_feats_trans = np.asarray(test_img_feats_trans)
    test_txt_vecs_trans = np.asarray(test_txt_vecs_trans)
    #print("{0}".format(np.shape(test_labels)))
    # print('[Eval] transformed test features in %4.4f')

    retrieval_t_i = []
    number_of_queries = len(test_txt_vecs_trans)
    for i in range(len(test_txt_vecs_trans)):
        query_label = test_labels[i]
        # distances and sort by distances
        wv = test_txt_vecs_trans[i]
        diffs = test_img_feats_trans - wv
        dists = np.linalg.norm(diffs, axis=1)
        sorted_idx = np.argsort(dists)
        retrieval_t_i.append(sorted_idx)

    retrieval_i_t = []
    for i in range(len(test_img_feats_trans)):
        query_img_feat = test_img_feats_trans[i]
        ground_truth_label = test_labels[i]

        # calculate distance and sort
        diffs = test_txt_vecs_trans - query_img_feat
        dists = np.linalg.norm(diffs, axis=1)
        sorted_idx = np.argsort(dists)
        retrieval_i_t.append(sorted_idx)
    # print(number_of_queries)
    # print(np.array(retrieval_i_t).shape)
    # print(np.array(retrieval_t_i).shape)
    retrieval = {'itot_ranked_results':np.array(retrieval_i_t), 'ttoi_ranked_results':np.array(retrieval_t_i), 'number_of_queries': number_of_queries }
  return number_of_queries, retrieval, None

# Final

In [9]:
def dummy_predict(dataset_obj, params, tag):
  n_samples = params['number_of_queries']
  results = {'itot_ranked_results':params['itot_ranked_results'].copy(), 'ttoi_ranked_results':params['ttoi_ranked_results'].copy()}
  return n_samples, results, None

In [15]:
import tensorflow as tf
# from models.adv_crossmodal_simple_nuswide import AdvCrossModalSimple, ModelParams, DataIter
# from Framework.dataset import Dataset
# from Framework.model import Parameters, Model
def train(dataset_obj, parameters, hyperparams):
    
    print("Inside Model -- train ( basically main of ACMR)")
    graph = tf.Graph()
    model_params = hyperparams
    model_params.update()
    with graph.as_default():     
        dataIter = DataIter(hyperparams.batch_size,dataset_obj)   
        model = AdvCrossModalSimple(model_params,dataIter)
    with tf.Session(graph=graph) as sess:
      print("Train of Adversarial class")
      parameters_, losses, logs = model.train_(sess)      
      # model.eval(sess)
      number_of_queries, retrieval, logs =predict_(dataset_obj, parameters_, 'test',sess)
      return retrieval, losses, logs

def main(_):  
    nuswide_filepath_train = "./data/data/nuswide_train/"
    nuswide_filepath_valid ="."
    nuswide_filepath_test="./data/data/nuswide_test/"
    data = Dataset((nuswide_filepath_train, nuswide_filepath_valid, nuswide_filepath_test), data_loader,
                   read_directories=(True, False, True))
    data.load_data()
    print("Data loaded successfully")    
    hyperparams = ModelParams()
    model = Model(train,hyperparams,data,prediction_function=dummy_predict)
    model.train_model()
    model.predict('test')
    label_ =data.y_test['test_labels']  
    mat_label=[]  
    for key in label_:
      mat_label.append(label_[key])
    y_mat_label = np.array(mat_label)[0:1984, :]
    model.evaluate(y_mat_label,y_mat_label)
    model.save_stats("acmr_nuswide_stats_p2.npy")


if __name__ == '__main__':
    tf.app.run()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch: [10][   6/ 125] time: 29.4188, emb_loss: 33.34014893, domain_loss: 1.42227900, label_loss: 0.35352629, similar_loss: 15.51534271, disimilar_loss: 0.74245721
Epoch: [10][   7/ 125] time: 29.4484, emb_loss: 45.46492767, domain_loss: 1.39768112, label_loss: 0.57784474, similar_loss: 16.24436378, disimilar_loss: 1.64163232
Epoch: [10][   8/ 125] time: 29.4707, emb_loss: 40.43709564, domain_loss: 1.39992154, label_loss: 0.47745579, similar_loss: 16.22308159, disimilar_loss: 1.70613253
Epoch: [10][   9/ 125] time: 29.4935, emb_loss: 45.45787048, domain_loss: 1.37891793, label_loss: 0.56497204, similar_loss: 16.77066612, disimilar_loss: 2.19300556
Epoch: [10][  10/ 125] time: 29.5172, emb_loss: 40.71911621, domain_loss: 1.38624549, label_loss: 0.46945953, similar_loss: 16.89020538, disimilar_loss: 1.77966237
Epoch: [10][  11/ 125] time: 29.5413, emb_loss: 63.77724838, domain_loss: 1.46881986, label_loss: 0.79279852, simil

SystemExit: ignored