In [178]:
import numpy as np
import pickle
np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)
# import torch
from scipy.spatial.distance import cosine
from scipy.stats import multivariate_normal
from scipy.special import logsumexp
import csv

In [4]:
# x: [BATCH w, DIM VECS]: vectors to be projected
# m: [DIM VECS, DIM PROJECTION SUBSPACE]. m is a single projection. each column vector in m is a line onto which one projects
def get_projection(m):
  covariance_matrix = np.dot(np.transpose(m),m)
  inverse_covariance_matrix = np.linalg.inv(covariance_matrix)
  #inverse_covariance_matrix = [DIM PROJECTION SUBSPACE, DIM PROJECTION SUBSPACE]
  
  def projection(x):
    #x: [DIM VECS, BATCH SIZE]
    #[DIM PROJECTION SUBSPACE, BATCH SIZE]
    uncorrected_projection_weights = np.dot(np.transpose(m),x.T)
    #[DIM PROJECTION SUBSPACE, BATCH SIZE]    
    projection_weights = np.dot(inverse_covariance_matrix,uncorrected_projection_weights)
    return np.dot(m,projection_weights).T
  
  return projection

In [5]:
# def get_literal_listener(prior_mean, sigma1, sigma2):
#   #prior_mean: [DIM VECS, 1]
#   prior_mean = np.expand_dims(prior_mean,axis=1)
#   def literal_listener(u):
#     #u: [DIM VECS, UTTERANCE BATCH SIZE]
#     posterior_mean = (sigma1*prior_mean + sigma2*u)*(sigma1^2*sigma2^2)/(sigma1^2+sigma2^2)
#     return posterior_mean
#   return literal_listener

In [6]:
# commenting scheme:
#   list(A) is a list of variables of type A
#   [x,y,z] is an array of shape (x,y,z)
  
#   DIM VECS = dimension of word embedding space
#   BATCH w = number of states w that are batched
#   NUM utts = number of utterances

In [124]:
# class contains several Bayesian pragmatic models:
# L0_Unbatched
# S1_Batched: fast S1, batched over w, and u (batch of u passed to L0): L0 is implicit in this S1
# S1_Unbatched: no batching: useful for understanding the code semantics clearly and for testing the batched version
# L1_Unbatched: no batched equivalent written
class Pragmatic_Model:
  # utterances : list(str). the utterance set U
  # projections : list(str). the projection set Q
  # vectors : dictionary: key:str,val:[dim vecs] of word embeddings
  # sigma1: hyperparameter for L0: variance of L0 prior. See paper
  # sigma2: hyperparameter for L0: variance of gaussian used in the semantics
  # mu1: hyperparameter for L0: the mean of the L0 prior: e.g. "man" in man is a shark. 
  def __init__(self,
              utterances,
              projections,
              vectors,
              sigma1,
              sigma2,
              mu1):
    
    self.utterances=utterances
    self.projections=projections
    # [NUM U, DIM VECS]
    self.utterance_vectors = np.asarray([vectors[u] for u in utterances]) 
    # [NUM Q, DIM VECS, DIM PROJECTION SUBSPACE]
    self.projection_vectors = (np.asarray([np.asarray([vectors[word]/np.linalg.norm(vectors[word]) for word in words]).T for words in self.projections]))
    
    self.vectors=vectors
    self.sigma1 = sigma1
    self.sigma2 = sigma2
    self.mu1 = mu1
    
    self.dimvecs = vectors["the"].shape[0]
    
  # u : [DIM VECS]
  def L0_Unbatched(self,u):
    sigma1sq, sigma2sq = self.sigma1 ** 2, self.sigma2 ** 2
    mu = np.divide(np.add(self.mu1/sigma1sq, u/sigma2sq),  ((1/sigma1sq) + (1/sigma2sq)))
    sigma_base = ((1/sigma1sq) + (1/sigma2sq))**-1
    sigma = np.diag([sigma_base] * self.dimvecs)
    return mu,sigma
  
  # w : [1,DIM VECS]
  # q : [DIM VECS, DIM PROJECTION SUBSPACE]
  def S1_Unbatched(self,w,q):
    
    projection = get_projection(q)
    # calculates a term that increases with the euclidean distance of q(w) to q(u) where q(x) is x projected onto the subspace spanned by q
    def utility(w,projection,u):
      l0_posterior_mu, l0_posterior_sigma = self.L0_Unbatched(u=u)
      projected_w = projection(w)
      projected_l0_posterior_mu = projection(l0_posterior_mu)
      log_score = multivariate_normal(projected_l0_posterior_mu,l0_posterior_sigma).logpdf(projected_w)
      return log_score
    
    unnormed_log_probs = [utility(w=w,projection=projection,u=u) for u in self.utterance_vectors]
    norm = logsumexp(unnormed_log_probs)
    return unnormed_log_probs - norm
  
  # ws: [BATCH w, DIM VECS]
  # q: [DIM VECS, DIM PROJECTION SUBSPACE]
  def S1_Batched(self, ws,q):

    projection = get_projection(q)

    # obtain L0 posterior MU and SIGMA
    sigma1sq, sigma2sq = self.sigma1 ** 2, self.sigma2 ** 2
    inverse_sd = (1/sigma1sq) + (1/sigma2sq)
    sigma = np.diag([1/inverse_sd] * self.dimvecs)
    inverse_sigma = np.linalg.inv(sigma)
    l0_posterior_mu = np.divide(np.add(self.mu1/sigma1sq, self.utterance_vectors/sigma2sq),inverse_sd)

    # projections
    # [NUM UTTS, DIM VECS]
    projected_l0_posterior_mu = projection(l0_posterior_mu)
#     print("BATCHED",projected_l0_posterior_mu)
    # [BATCH w, DIM VECS]
    projected_ws = projection(ws)
    
    # compute logprob of gaussian
    # [BATCH w, NUM UTTS, DIM VECS]
    distances = np.expand_dims(projected_ws,1)-np.expand_dims(projected_l0_posterior_mu,0)
    # [BATCH w, NUM UTTS, DIM VECS]
    rescaled_distances = np.einsum('ij,abi->abj',np.sqrt(inverse_sigma),distances)
    # [BATCH w, NUM UTTS]
    unnormed_logprobs = -0.5*np.sum(np.square(rescaled_distances),axis=2)
    # [BATCH w,1]
    norm = np.expand_dims(logsumexp(unnormed_logprobs,axis=-1),-1)
    # [BATCH w, NUM UTTS]
    return unnormed_logprobs-norm
  
  def L1(self,u):
    #u: [DIM VECS, 1]
    #listener_mean: [DIM VECS, 1]
    start = -2
    stop = 2
    step = 0.02
    # intervals: [NUM INTERVALS]    
    intervals = np.arange(start=start,stop=stop,step=step)
    num_intervals = (stop-start)/step
    
    marginal_projection_probs = np.zeros((len(self.projections)))
    for i in range(len(self.projections)):
        
      q = self.projection_vectors[i]
      # assume q is a vector of unit length
      #q: [DIM VECS, 1]
      projection = get_projection(q)
      # [DIM VECS]
      projected_mu1 = projection(self.mu1)
      
      # [DIM VECS, NUM INTERVALS]
      projected_worlds = np.expand_dims(projected_mu1,0) + np.dot(q,np.expand_dims(intervals,0)).T

      # [NUM INTERVALS]
      speaker_log_likelihood = self.S1_Batched(projected_worlds,q)[:,u]
      
      # this right? seems unlikely
      # [NUM INTERVALS]
      world_log_priors = -1/(self.sigma1**2)*np.abs(intervals)
      
      # [NUM INTERVALS]
      joint_probs = world_log_priors + speaker_log_likelihood
      # SCALAR
      marginal_projection_prob = logsumexp(joint_probs)      
      marginal_projection_probs[i]=marginal_projection_prob
     
    norm = logsumexp(marginal_projection_probs)
    return marginal_projection_probs - norm

In [170]:
utterances = ["shark","swimmer"]
projections = [["swims"],["vicious"]]
ws = np.asarray([[0,3.4,4],[4,5,7],[6,7,3],[6,6,6]])

simple_vecs = {}
simple_vecs["swimmer"]=np.asarray([4.0,5.0,6.0])
simple_vecs["shark"]=np.asarray([3.0,2.0,6.0])
simple_vecs["man"]=np.asarray([4.0,5.0,9.0])
simple_vecs["vicious"]=np.asarray([20.0,4.0,2.0])
simple_vecs["swims"]=np.asarray([2.0,3.0,8.0])
simple_vecs["child"]=np.asarray([0.7,-2.0,8])
simple_vecs["nightmare"]=np.asarray([5.0,5.0,7.0])
simple_vecs["wonder"]=np.asarray([5.0,8.0,-9.0])
simple_vecs["the"]=np.asarray([5.0,4.0,-9.0])

In [217]:
test_utterances = ["shark","swimmer","wonder","child"]
test_projections = [["swims"],["vicious"],["man"]]
test_mu1 = np.random.rand(5)
test_ws = np.random.rand(10,5)

test_vecs = {}
test_vecs["swimmer"]=np.random.rand(5)
test_vecs["shark"]=np.random.rand(5)
test_vecs["man"]=np.random.rand(5)
test_vecs["vicious"]=np.random.rand(5)
test_vecs["swims"]=np.random.rand(5)
test_vecs["child"]=np.random.rand(5)
test_vecs["nightmare"]=np.random.rand(5)
test_vecs["wonder"]=np.random.rand(5)
test_vecs["the"]=np.random.rand(5)

test_model = Pragmatic_Model(utterances=test_utterances,
                                  projections=test_projections,
                                  vectors=test_vecs,
                                 sigma1=1.0,
                                 sigma2=2.0,
                                 mu1=test_mu1)

In [218]:
test_model.L1(u=0)

array([-1.097, -1.1  , -1.098])

In [96]:
#test that S1_Batched and S1_Unbatched are equivalent, up to numerical precision
q = test_model.projection_vectors[0]
batched = test_model.S1_Batched(ws=test_ws,q=q)

for i,w in enumerate(test_ws):
  unbatched = test_model.S1_Unbatched(w=test_ws[i],q=q)
  b = batched[i]
  ub = unbatched
  print(b,ub)
  assert(np.allclose(b,ub))


[-1.393 -1.379 -1.384 -1.389] [-1.393 -1.379 -1.384 -1.389]
[-1.387 -1.385 -1.386 -1.387] [-1.387 -1.385 -1.386 -1.387]
[-1.391 -1.381 -1.385 -1.388] [-1.391 -1.381 -1.385 -1.388]
[-1.379 -1.394 -1.389 -1.383] [-1.379 -1.394 -1.389 -1.383]
[-1.383 -1.391 -1.388 -1.384] [-1.383 -1.391 -1.388 -1.384]
[-1.398 -1.373 -1.382 -1.392] [-1.398 -1.373 -1.382 -1.392]
[-1.395 -1.376 -1.383 -1.39 ] [-1.395 -1.376 -1.383 -1.39 ]
[-1.379 -1.395 -1.389 -1.383] [-1.379 -1.395 -1.389 -1.383]
[-1.392 -1.381 -1.384 -1.389] [-1.392 -1.381 -1.384 -1.389]
[-1.393 -1.379 -1.384 -1.389] [-1.393 -1.379 -1.384 -1.389]


In [47]:
# test that batched projection is equivalent to unbatched projection
m = np.random.rand(10,3)
x = np.random.rand(2,10)

projection = get_projection(m)
projected_x = projection(x)
assert(np.allclose(projection(x)[0],projection(x[0])))
assert(np.allclose(projection(x)[1],projection(x[1])))

In [None]:
# pickled version of GLoVE
vecs = pickle.load(open("../dist_rsa/data/wordVectors50",'rb'))

def get_words(with_freqs=False):
	nouns, adjs, words = {},{},set()
	with open('../dist_rsa/data/concreteness.csv', newline='') as csvfile:
		r = csv.reader(csvfile, delimiter=',', quotechar='|')
		for i,row in enumerate(r):
			if i>0:
				is_bigram = float(row[1])!=0
				is_noun = (row[8])=='Noun'		
				is_adj = (row[8])=='Adjective'
				is_adv = (row[8])=='Adverb'
				freq = row[7]
				if not is_bigram:
					if is_noun:
						if with_freqs:nouns[row[0]]=float(row[2]),freq
						else: nouns[row[0]]=float(row[2])
					if is_adj:
						if with_freqs: adjs[row[0]]=float(row[2]),freq
						else: adjs[row[0]]=float(row[2])
		return nouns,adjs

In [223]:
abstract_threshold = 2.5
concrete_threshold = 3.0

target = "woman"
source = "rose"

nouns,adjs = get_words(with_freqs=False)
adjectives = [a for a in adjs if adjs[a] > concrete_threshold and a in vecs]
utterances = sorted(adjectives,key=lambda x: cosine(vecs[x],np.mean([vecs[target],vecs[target]],axis=0)))
projections = [a for a in adjs if adjs[a] < abstract_threshold and a in vecs]
projections = sorted(projections,key=lambda x:cosine(vecs[x],vecs[target]))
projections = [[x] for x in projections]

utterances = utterances[:100]
projections = projections[:100]

pragmatic_model = Pragmatic_Model(utterances=utterances,
                                  projections=projections,
                                  vectors=vecs,
                                 sigma1=1.0,
                                 sigma2=0.1,
                                 mu1=vecs[target])

In [224]:
raw_output = pragmatic_model.L1(u=1)
print("baseline",projections[:10])
print(sorted(list(zip(np.exp(raw_output),projections)),key=lambda x: x[0],reverse=True))


baseline [['mysterious'], ['lonely'], ['beautiful'], ['unknown'], ['beloved'], ['obsessed'], ['true'], ['innocent'], ['good'], ['unidentified']]
[(0.014377575986033826, ['honorary']), (0.010997174743390901, ['distinguished']), (0.010715698537749894, ['talented']), (0.010053371854597544, ['prospective']), (0.009775275921039842, ['demographic']), (0.007144730677189736, ['serial']), (0.006040182115427575, ['aspiring']), (0.005853470874881882, ['masculine']), (0.005089439538519004, ['feminine']), (0.004839542539967402, ['gifted']), (0.00458161356666161, ['indigenous']), (0.004580325638273583, ['naturalized']), (0.004410241142155174, ['privileged']), (0.004407508421318066, ['fictional']), (0.004277192350831755, ['submissive']), (0.004188347001306236, ['unidentified']), (0.004073650378485406, ['aristocratic']), (0.004054483072525496, ['renowned']), (0.004026340258661248, ['willowy']), (0.004007390069324095, ['avid']), (0.003903436728636945, ['devout']), (0.0038930243895501085, ['illustrious'