In [1]:
%matplotlib inline
import os
import numpy as np
import agent
import env
import skimage
import skimage.io
import skimage.transform
from matplotlib import pyplot as plt
import random
import tensorflow as tf
import sys
sys.path.append('tensorflow-vgg/')
import vgg16
from collections import deque, namedtuple

data_dir = '/home/vagrant/ocm/language-learning/data'

os.chdir('/home/vagrant/ocm/language-learning/code')

In [2]:
def load_image(path):
    # load image
    img = skimage.io.imread(path)
    img = img / 255.0
    assert (0 <= img).all() and (img <= 1.0).all()
    # print "Original Image Shape: ", img.shape
    # we crop image from center
    short_edge = min(img.shape[:2])
    yy = int((img.shape[0] - short_edge) / 2)
    xx = int((img.shape[1] - short_edge) / 2)
    crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
    # resize to 224, 224
    resized_img = skimage.transform.resize(crop_img, (224, 224))
    return resized_img.astype(np.float32)

In [3]:
#sender.show_images(target, distractor)

iterations = 1000

img_dirs = ['cat', 'dog']

os.getcwd()

'/home/vagrant/ocm/language-learning/code'

In [4]:
def get_image_activations(sess, vgg, image, placeholder):
    #image_pl = tf.placeholder("float32", [1, 224, 224, 3])
    batch = image.reshape((1, 224, 224, 3))
    feed_dict = {placeholder: batch}
    
    with tf.name_scope("content_vgg"):
        fc8 = sess.run(vgg.fc8, feed_dict=feed_dict)
    
    return(fc8)


In [5]:
def make_epsilon_greedy_policy(estimator, nA):
    """
    Creates an epsilon-greedy policy based on a given Q-function approximator and epsilon.

    Args:
        estimator: An estimator that returns q values for a given state
        nA: Number of actions in the environment.

    Returns:
        A function that takes the (sess, observation, epsilon) as an argument and returns
        the probabilities for each action in the form of a numpy array of length nA.

    """
    def policy_fn(sess, observation, epsilon):
        A = np.ones(nA, dtype=float) * epsilon / nA
        q_values = estimator.predict(sess, np.expand_dims(observation, 0))[0]
        best_action = np.argmax(q_values)
        A[best_action] += (1.0 - epsilon)
        return A
    return policy_fn

In [6]:
def shuffle_image_activations(im_acts):
    reordering = np.array(range(len(im_acts)))    
    random.shuffle(reordering)
    target_ind = np.argmin(reordering)
    shuffled = im_acts[reordering]
    return (shuffled, target_ind)    

In [None]:
num_words = 2
vocab = ['W'+str(i) for i in range(num_words)]

#vocab = ['Catword', 'Dogword']
embed_dim = 2
print(vocab)

['W0', 'W1']


In [None]:
tf.reset_default_graph()

agents = agent.Agents(vocab, image_embedding_dim = 10, embedding_dim = 10, temperature=10)
game = env.Environment(data_dir, img_dirs, 2)

logs_path = os.path.join('..','logs/run3')
writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

## Run the iterations of the game
iterations = 20000
mini_batch_size = 4

num_classes = len(img_dirs)

wins = 0
losses = 0

update_estimators_every = 50

with tf.Session(config=tf.ConfigProto(gpu_options=(tf.GPUOptions(per_process_gpu_memory_fraction=0.7)))) as sess:
    vgg = vgg16.Vgg16()
    
    image_pl = tf.placeholder("float32", [1, 224, 224, 3])
    vgg.build(image_pl)
    sess.run(tf.global_variables_initializer())
        
    batch = []
    Game = namedtuple("Game", ["im_acts", "target_acts", "distractor_acts", "word_probs", "image_probs", "target", "word", "selection", "reward"])
    tot_reward = 0
    for i in range(iterations):
        
        print("\rEpisode {}/{}".format(i, iterations), end="")
        sys.stdout.flush()
        
        if i % 10 == 0:
            print('last 10 interations performance ', tot_reward)
            tot_reward = 0
                    
        target_image, distractor_image = game.get_images()
        target_class = game.target_class
        target_acts = get_image_activations(sess, vgg, target_image, image_pl)
        distractor_acts = get_image_activations(sess, vgg, distractor_image, image_pl)
        
        reordering = np.array([0,1])
        random.shuffle(reordering)
        target = np.where(reordering==0)[0]
        
        img_array = [target_acts, distractor_acts] 
        i1, i2 = [img_array[reordering[i]] for i, img in enumerate(img_array)]

        shuffled_acts = np.concatenate([i1, i2], axis=1)
        
        ## for Sender - take action in reinforcement learning terms
        
        reward, word, selection, word_probs, image_probs = agents.show_images(sess, shuffled_acts, target_acts, distractor_acts, target, target_class)

        batch.append(Game(shuffled_acts, target_acts, distractor_acts, word_probs, image_probs, target, word, selection, reward))
        
        if len(batch) > mini_batch_size:
            batch.pop(0)

        if (i+1) % mini_batch_size == 0:
            print('updating the agent weights')
            summary = agents.update(sess, batch)
            writer.add_summary(summary, i)
            
        #reward, word_text = agents.test_sender(sess, shuffled_acts, target, target_class)
        print(target_class, reward)
        #reward = agents.test_receiver(sess, shuffled_acts, word, target_ind, target_class)
        tot_reward += reward
        selection = 0
        #agents.call_trial(sess, img_array, target_ind)


now building the learning graph
<class 'tensorflow.python.framework.ops.Tensor'> Tensor("sender/Shape_5:0", shape=(2,), dtype=int32)
word Tensor("word:0", shape=(1, ?), dtype=int32)
selected word probs Tensor("sender/Print_2:0", shape=(1, ?), dtype=float32)
reward Tensor("sender/Print_3:0", shape=(?, 1), dtype=float32)
sender loss Tensor("sender/mul_1:0", shape=(?, 1), dtype=float32)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


created the scalar for tensorboard
finished building the learning graph
/home/vagrant/ocm/language-learning/code/tensorflow-vgg/vgg16.npy
npy file loaded
build model started
build model finished: 0s
Episode 0/20000last 10 interations performance  0
word probs [ 0.50055498  0.49944499]
image probs [ 0.5  0.5]
dog -1.0
Episode 1/20000word probs [ 0.50036162  0.49963832]
image probs [ 0.50000006  0.49999997]
cat -1.0
Episode 2/20000word probs [ 0.50053233  0.49946764]
image probs [ 0.49999991  0.50000006]
dog 1.0
Episode 3/20000word probs [ 0.50053543  0.49946463]
image probs [ 0.50000006  0.49999994]
updating the agent weights
shape of the rewards  (4, 1)
shape of the selected word  (1, 4)
dog 1.0
Episode 4/20000word probs [ 0.50979716  0.49020284]
image probs [ 0.49614486  0.50385511]
cat -1.0
Episode 5/20000word probs [ 0.50204158  0.49795842]
image probs [ 0.49857455  0.50142539]
dog -1.0
Episode 6/20000word probs [ 0.50204158  0.49795842]
image probs [ 0.50150192  0.49849808]
dog 1.0