SEE README.pdf FOR DESCRIPTION

In [1]:
!pip install git+https://github.com/deepmind/PGMax.git

Collecting git+https://github.com/deepmind/PGMax.git
  Cloning https://github.com/deepmind/PGMax.git to /tmp/pip-req-build-5msqg84g
  Running command git clone --filter=blob:none --quiet https://github.com/deepmind/PGMax.git /tmp/pip-req-build-5msqg84g
  Resolved https://github.com/deepmind/PGMax.git to commit 054efda42cbb199c064f41dd444c845f80cc5987
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jupyter>=1.0.0 (from pgmax==0.6.1)
  Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Collecting jupytext>=1.11.3 (from pgmax==0.6.1)
  Downloading jupytext-1.15.1-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m301.6/301.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
Collecting pytest-env>=0.6.2 (from pgmax==0.6.1)
  Downloading pytest_env-1.0.1-py3-none-any.whl (5.3 kB)
Collecting pre-commit>=2.13.0 (from pgmax==0.6.1)
  Downloading pre_commit-3.3.3-py2.py3-none-any.whl (202 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
%matplotlib inline
import functools
import itertools
import jax
import matplotlib.pyplot as plt
import numpy as np
import numpy.matlib
import math
import pandas as pd
import seaborn as sns
import random
from tqdm import tqdm
import time
from scipy.special import softmax

%load_ext autoreload
%autoreload 2
import IPython.display
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pickle as pkl
from pgmax import fgraph, fgroup, infer, vgroup, factor
from os import listdir
import json
from skimage import io
import glob
from matplotlib.patches import Rectangle

In [4]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [5]:
# UTILS

# index by scene
def get_cooccurrence_mat(dataset,data_scene):

    scene = data_scene.loc['scene']['annotation'][len(data_scene.loc['scene']['annotation'])-1]
    idx_scene = np.where(np.asarray(dataset['scene']) == scene)[0]

    # object count by image
    objs_by_img = pd.DataFrame(columns=['objs'])
    obj_classes = []

    for i in range(len(idx_scene)):
        objs = np.where(dataset['objectPresence'][:, idx_scene[i]] == 1)[0]
        obj_classes.extend(objs)
        objs_by_img.at[i,'objs'] = objs

    #get objects by image
    classes = np.unique(np.asarray(obj_classes))
    class_labels = pd.DataFrame(index_ade20k['objectnames'],columns=['objs']).loc[classes]
    cooccur = np.zeros((len(classes),len(classes)))
    nfiles = len(idx_scene)

    # #create matrix of counts for all objects classes, ij
    counts = np.zeros((len(classes)))

    for img in range(nfiles):
        objs = objs_by_img.iloc[img]
        pairs = list(itertools.combinations(objs[0],r=2))

        for x in range(len(objs[0])):
            counts[np.where(classes == objs[0][x])[0]] += 1

        for i in range(len(pairs)):
            cooccur[np.where(classes == pairs[i][0])[0],np.where(classes == pairs[i][1])[0]] += 1
            cooccur[np.where(classes == pairs[i][1])[0],np.where(classes == pairs[i][0])[0]] += 1

    return classes, objs_by_img, nfiles, class_labels

# phi based on Kollar & Roy 2009
def compatibility(edge,objs_by_img,n):

    n1 = (objs_by_img[objs_by_img.objs.apply(lambda row: classes[edge[0]] in row)]).index.values
    n2 = (objs_by_img[objs_by_img.objs.apply(lambda row: classes[edge[1]] in row)]).index.values

    tt = len(np.intersect1d(n1,n2))
    tf = len(n1) - tt
    ft = len(n2) - tt
    ff = n - tt - tf - ft

    phi = np.reshape(np.log(np.divide([ff, ft, tf, tt],n)),(2,2))
    #phi = np.reshape(np.log([ff, ft, tf, tt]),(2,2))

    return phi

# create graph
def create_graph(classes, objs_by_img, nfiles):
    objects = np.arange(len(classes))

    # initialize factor graph
    nodes = vgroup.NDVarArray(num_states=2, shape=objects.shape)
    edges = list(itertools.combinations(objects,r=2))
    fg = fgraph.FactorGraph(variable_groups=nodes)

    #log potentials for pairwise factors
    log_potential_matrix = np.zeros((len(edges), 2, 2))

    for i in range(log_potential_matrix.shape[0]):
      log_potential_matrix[i,:,:] = compatibility(edges[i],objs_by_img,nfiles)

    # define graph factors
    variables_for_factors=[]

    for i in range(len(edges)):

      variables_for_factors.append([nodes[edges[i][0]], nodes[edges[i][1]]])

    # create pairwise factor group and add to graph
    pairwise_factors = fgroup.PairwiseFactorGroup(
        variables_for_factors=variables_for_factors,
        log_potential_matrix=log_potential_matrix,
    )

    fg.add_factors(pairwise_factors)

    return fg, nodes

def set_evidence(objects,n_states=2,certainty=10**4.05,present=[],absent=[]):

    object_list = objects.reset_index()
    state = np.zeros((objects.shape[0], n_states))

    # set evidence for known present objects
    if present:
      for i in present:
        idx_present = object_list.index[object_list['objs'] == i].tolist()
        state[idx_present,1] = certainty

    # set evidence for known absent objects
    if absent:
      for i in absent:
        idx_absent = object_list.index[object_list['objs'] == i].tolist()
        state[idx_absent,0] = certainty

    return state

def certainty(fixation,coords,sigma):

    # the weight given to evidence for path based on its distance from fixation
    weight = np.exp(-np.sum(((math.dist(coords,fixation))/sigma)**2, axis=0))

    return weight

def get_centroids(data_scene, class_labels):

    numberOfAnot = len(data_scene.loc['object']['annotation'])

    annotation2D = []
    labels2D = []
    for i in range(0,numberOfAnot):

        if data_scene.loc['object']['annotation'][i]['name'] in list(class_labels['objs']):

          x = data_scene.loc['object']['annotation'][i]['polygon']['x']
          y = data_scene.loc['object']['annotation'][i]['polygon']['y']

          if (x) and (y):
              idxObj = data_scene.loc['object']['annotation'][i]['id']
              pts2 = np.array([x,y], np.int32)
              pts2 = np.transpose(pts2)
              annotation2D.append(pts2)

              labels2D.append(data_scene.loc['object']['annotation'][i]['name'])

    centroid = []
    for i in range(0, len(annotation2D)):
      pt = annotation2D[i]
      centroid.append(np.mean(pt,axis=0))

    return centroid, labels2D

# create dataframe for latent scene which holds the ground truth information object object locations in the scene
def create_scene(image,pixels_in_patch,centroid,labels2D,n_states=2):

    rows = int(image.shape[0]/pixels_in_patch)
    cols = int(image.shape[1]/pixels_in_patch)

    objs  = list(np.unique(labels2D))
    centroid = np.asarray(centroid)

    latent_scene = pd.DataFrame(index=range(rows),columns=range(cols))

    # set up latent (ground truth) scene
    for i in range(rows):
      for j in range(cols):

        # all pixels in given patch
        xlim = [j*pixels_in_patch,j*pixels_in_patch+pixels_in_patch]
        ylim = [i*pixels_in_patch,i*pixels_in_patch+pixels_in_patch]

        if i == rows-1:
          ylim = [i*pixels_in_patch,image.shape[0]]
        if j == cols-1:
          xlim = [j*pixels_in_patch,image.shape[1]]

        idx = np.where((centroid[:,0] >= xlim[0]) & (centroid[:,0] <= xlim[1]) & (centroid[:,1] >= ylim[0]) & (centroid[:,1] <= ylim[1]))[0]
        values = np.zeros(len(objs))

        for x in range(len(idx)):
          values[objs.index(labels2D[idx[x]])] = 1

        latent_scene.at[i,j] = dict(zip(objs, values))

    # create dataframe of dicts which will hold the observed likelihoods and marginals for objects in each patch
    observed_scene = pd.DataFrame(index=range(rows),columns=range(cols))

    for i in range(observed_scene.shape[0]):
      for j in range(observed_scene.shape[1]):
          observed_scene.at[i,j] = {'lkhd': 0, 'margs': np.zeros((len(class_labels),n_states))}

    return latent_scene, observed_scene, objs

# get baseline mrf for given set of objects and remove those objects from target list
def choose_target(fg,class_labels,nodes,objs):
    bp = infer.build_inferer(fg.bp_state, backend="bp")
    bp_arrays = bp.run(
        bp.init(),
        num_iters=1000,
        damping=0.5,
        temperature=0.0)
    beliefs = bp.get_beliefs(bp_arrays)

    # state inference (check for presence of target object)
    map_states = infer.decode_map_states(beliefs)

    # drop baseline objects
    baseline_objs = list(class_labels['objs'].iloc[np.where(map_states[nodes] == 1)[0]])
    target_options = [x for x in objs if x not in baseline_objs]

    return baseline_objs, target_options

# define variables
def get_fixation_seq(latent_scene,observed_scene,nodes,fg,target_options,max_fixations=20,max_certainty=10**4.05,sigma=2):

    step = 0
    map_states={nodes: np.zeros(len(class_labels))}
    target = target_options[np.random.randint(len(target_options))]

    rows = latent_scene.shape[0]
    cols = latent_scene.shape[1]

    # center first fixation
    x = int(np.median(range(rows)))
    y = int(np.median(range(cols)))

    fixation = np.asarray([x,y])
    fixation_seq = []

    while (step < max_fixations) and (map_states[nodes][np.where(class_labels == target)[0]] == 0):
      fixation_seq.append(fixation)
      keys = [k for k, v in latent_scene.at[fixation[0],fixation[1]].items() if v == 1]

      # step 2: run lbp at fixation point
      bp = infer.build_inferer(fg.bp_state, backend="bp")
      bp_arrays = bp.run(
          bp.init(evidence_updates={nodes: set_evidence(objects=class_labels,n_states=n_states,certainty=max_certainty,present=keys)}),
          num_iters=1000,
          damping=0.5,
          temperature=0.0)
      beliefs = bp.get_beliefs(bp_arrays)

      # state inference (check for presence of target object)
      map_states = infer.decode_map_states(beliefs)

      if (step == (max_fixations-1)) or (map_states[nodes][np.where(class_labels == target)[0]]):
          if step == (max_fixations-1):
            print('you have reached the fixation limit.')
          if map_states[nodes][np.where(class_labels == target)[0]]:
            # if map state of target is equal to 1 then the target is found and search
            # can terminate
            print('target found! terminate search.')
          break
      else:
        # get marginals for target object from each patch sampled with evidence corresponding to its distance from the fixated patch
        for i in range(rows):
          for j in range(cols):
            current_lkhd = certainty(fixation,tuple([i,j]),sigma)

            # if current likelihood is greater than the previous, recalculate marginals
            if current_lkhd > observed_scene.at[i,j]['lkhd']:
              observed_scene.at[i,j]['lkhd'] = current_lkhd
              keys = [k for k, v in latent_scene.at[i,j].items() if v == 1]

              bp = infer.build_inferer(fg.bp_state, backend="bp")
              bp_arrays = bp.run(
                  bp.init(evidence_updates={nodes: set_evidence(objects=class_labels,certainty=max_certainty*observed_scene.at[i,j]['lkhd'],present=keys)}),
                  num_iters=1000,
                  damping=0.5,
                  temperature=0.0)
              beliefs = bp.get_beliefs(bp_arrays)

              observed_scene.at[i,j]['margs'] = infer.get_marginals(beliefs)[nodes]

          # decide next fixation based on highest marginals for target with inhibition of return
          target_margs = []
          next_fixation = []

          for i in range(rows):
            for j in range(cols):
              if observed_scene.at[i,j]['lkhd'] < 1:
                next_fixation.append((i,j))
                target_margs.append(observed_scene.at[i,j]['margs'][np.where(class_labels == target)[0][0]][1])

          fixation = next_fixation[np.argmax(target_margs)]

        step += 1

    return fixation_seq, target

def visualize_search(image,pixels_in_patch,fixation_seq,target,location='/content/drive/My Drive/Colab Notebooks/images/',filename='test.png'):

    cmap = plt.colormaps['plasma']
    plt.imshow(image, cmap=cmap)

    rows = int(image.shape[0]/pixels_in_patch)
    cols = int(image.shape[1]/pixels_in_patch)

    # set xticks & yticks & title
    plt.title('target object: ' + target)
    plt.xticks(ticks=np.arange(0, image.shape[1], pixels_in_patch)[0:cols]+pixels_in_patch/2,labels=list(range(cols)),ha='center')
    plt.yticks(ticks=np.arange(0, image.shape[0], pixels_in_patch)[0:rows]+pixels_in_patch/2,labels=list(range(rows)),ha='center')

    # draw grid
    for loc in range(0, image.shape[1], pixels_in_patch)[0:cols]:
        plt.axvline(loc, color='black', linestyle='-', linewidth=1)

    for loc in range(0, image.shape[0], pixels_in_patch)[0:rows]:
        plt.axhline(loc, color='black', linestyle='-', linewidth=1)

    cs = cmap(np.linspace(0, 1, len(fixation_seq)))

    for f in range(len(fixation_seq)):
      plt.gca().add_patch(Rectangle(np.flip(np.array(fixation_seq[f])*pixels_in_patch),pixels_in_patch,pixels_in_patch,color=cs[f,0:3], alpha=0.5))

    cbar = plt.colorbar(ticks=np.linspace(0,250,len(fixation_seq)),label='fixation',cmap='coolwarm')
    cbar.set_ticklabels(list(range(len(fixation_seq))))

    plt.savefig(location + filename)

    plt.show()

    return

In [6]:
# Load index with global information about ADE20K
DATASET_PATH = '/content/drive/My Drive/Colab Notebooks'
index_file = 'index_ade20k.pkl'
with open('{}/{}'.format(DATASET_PATH, index_file), 'rb') as f:
    index_ade20k = pkl.load(f)

# choose scene
jsons = sorted(glob.glob('/content/drive/My Drive/Colab Notebooks/*.json'))
jpgs = sorted(glob.glob('/content/drive/My Drive/Colab Notebooks/*.jpg'))


In [11]:
# create cooccurrence matrix from online images (by scene)
n_states = 2
pixels_in_patch = 80
result = np.zeros(len(jsons))

scene_idx = 0

data_scene = pd.read_json(jsons[scene_idx])
image = io.imread(jpgs[scene_idx])

classes, objs_by_img, nfiles, class_labels = get_cooccurrence_mat(index_ade20k, data_scene)

fg, nodes = create_graph(classes, objs_by_img, nfiles)

centroid, labels2D = get_centroids(data_scene, class_labels)

latent_scene, observed_scene, objs = create_scene(image, pixels_in_patch, centroid, labels2D)

baseline_objs, target_options = choose_target(fg, class_labels, nodes, objs)



  phi = np.reshape(np.log(np.divide([ff, ft, tf, tt],n)),(2,2))


In [None]:
trials = 1

for t in range(trials):

    fixation_seq, target = get_fixation_seq(latent_scene, observed_scene, nodes, fg, target_options, sigma=2)

    last_fix = fixation_seq[len(fixation_seq)-1]

    if latent_scene.at[last_fix[0],last_fix[1]][target] == 1:
      filename = str(t) + '_correct_' + jpgs[scene_idx][-22:]
    else:
      filename = str(t) + '_' + jpgs[scene_idx][-22:]

    visualize_search(image, pixels_in_patch, fixation_seq, target, filename=filename)
