# Experiments with Dense Vectors

In [2]:
import os
import sys
import numpy as np
from numba import jit
sys.path.append(os.path.abspath('../')) # needed to import from permutations sibling directory
import permutations.dense_permutations as p

In [3]:
# Generate some vectors
a = p.get_random_vector(100)
b = p.get_random_vector(100)
c = p.get_random_vector(100)

# Generate some predicates
p_a = p.get_random_permutation(100)
p_b = p.get_random_permutation(100)
p_c = p.get_random_permutation(100)

In [3]:
# In theory, this should give us the ability to deduce a permutation from two source vectors; untested
def deduceperm(permuted, unpermuted): 
     return np.argsort(np.argsort(permuted)[np.argsort(np.argsort(unpermuted))]) 
    

# Changes a permutation to be like another permutation, where howmuch is the number of dimensions the input perm
# should have in common with tobelike. In theory it should never become disjoint with range(dimension), but I haven't
# rigorously tested it. Assuming it's fed with a permutation derived from range(dimension), that is.
def trainpermutation(perm, tobelike, howmuch):
    likeness = np.sum(perm == tobelike)
    perm1 = np.copy(perm) 
    while likeness < howmuch: 
        tochange = np.random.choice(np.where(perm1 != tobelike)[0]) 
        perm1[np.where(perm1 == tobelike[tochange])] = perm1[tochange] 
        perm1[tochange] = tobelike[tochange] 
        likeness = np.sum(perm1 == tobelike) 
    return perm1        

def trainpermutation(perm, tobelike, howmuch):
    perm1 = np.copy(perm)
    for i in np.where(perm1 == p.get_random_permutation(100))[0]: #move away from noise
        idx = np.random.randint(0,perm1.shape[0])
        val = perm1[i]
        perm1[i] = perm[idx]
        perm1[idx] = val
    canchange = np.clip(howmuch, 0, len(np.where(perm1 != tobelike)[0])) # how many spaces can we actually change?
    for i in np.argsort(np.abs(perm1 - tobelike))[::-1][:canchange]: # move towards the thing you want to be like
        perm1[np.where(perm1 == tobelike[i])] = perm1[i] 
        perm1[i] = tobelike[i] 
    return perm1

In [4]:
p_ta = trainpermutation(p.get_random_permutation(100), p_a, 50) # make a permutation with 50 dimensions in common with p_a
print(np.setdiff1d(np.arange(100), p_ta)) # should be an empty list
print(np.sum(p_ta == p_a)) # should be precisely 50

[]
52


In [5]:
print(p.cosine_similarity(a[p_a], a[p_b])) # basically random
print(p.cosine_similarity(a[p_ta], a[p_a])) # same vector, more similar permutations, should be more similar
print(p.cosine_similarity(a,b)) #should be identical to below permutation
print(p.cosine_similarity(a[p_a], b[p_a])) # dissimilar vectors, same permutation, should be (identically to unpermuted) dissimilar

0.46643516
0.8107682
0.45108235
0.45108235


In [6]:
np.sum(deduceperm(a[p_a], a) == p_a) # should be 100 (e.g. every dimension should match)

100

## Training Things
If we want to train, we should go for something like `sigmoid(vec[permutation].dot(vec2))`
Ideally, we want the sigmoid to be very close to one for vectors in context, and far from one for negative samples.

In [7]:
def sigmoid(x):
    return np.exp(-np.logaddexp(0, -x))

Let's make a dataset:

In [8]:
dataset = {
    'orange': {
        'isa': 'fruit',
        'color': 'orange',
        'colour': 'orange',
        'shape': 'round',
        'origin': 'plant',
        'growson': 'tree'
    },
    'apple': {
        'isa': 'fruit',
        'colour':'red',
        'color': 'red',
        'shape': 'round',
        'origin': 'plant',
        'growson': 'tree'
    },
    'broccoli': {
        'isa': 'vegetable',
        'color': 'green',
        'shape': 'floret',
        'origin': 'plant',
        'growson': 'bush'
    },
    'chicken': {
        'isa': 'animal',
        'makesnoise': 'cuccoo',
        'has': 'feathers,',
        'eats': 'grain',
        'drinks': 'water'
    },
    'bicycle': {
        'typeof':'transportation',
        'origin':'man-made',
        'color':'varies',
        'has':'wheels',
        'used-by':'humans'
    }
}

svecs = {x:p.get_random_vector(100) for x in dataset.keys()} #generate subject vectors
preds = {x:p.get_random_permutation(100) for y in dataset.keys() for x in dataset[y].keys()} # generate permutation vectors
cvecs = {x:p.get_random_vector(100) for y in dataset.keys() for x in dataset[y].values()} # generate context vectors

Let's train based on how the attributes are related to each subject in the dataset:

In [31]:
alpha = 0.1
for i in range(100): # epochs
    alpha = alpha - 0.001 # monotonic decrease in alpha
    for subject in dataset.keys():
        for predicate in dataset[subject]:
            context = dataset[subject][predicate]
            subjectvector = svecs[subject]
            svectorcopy = svecs[subject].copy() # copy for context to move to
            predicatevector = preds[predicate]
            contextvector = cvecs[dataset[subject][predicate]]
            cvectorcopy = cvecs[dataset[subject][predicate]].copy()
            #print(f"{subject}-{predicate}-{context} similarity prior to training: {p.cosine_similarity(p.permute_vector(predicatevector, subjectvector), contextvector):.2f}")
            shiftoward = 1-sigmoid(p.permute_vector(predicatevector, subjectvector).dot(contextvector))
            shiftaway = -sigmoid(p.permute_vector(predicatevector, subjectvector).dot(p.get_random_vector(100))) # generate a random noise vector to move away from
            subjectvector += alpha*shiftoward*contextvector[p.inverse_permutation(predicatevector)]
            contextvector += alpha*shiftoward*p.permute_vector(predicatevector, svectorcopy)
            subjectvector -= alpha*shiftaway*contextvector[p.inverse_permutation(predicatevector)]
            contextvector -= alpha*shiftaway*p.permute_vector(predicatevector, svectorcopy)
            predicatevector = trainpermutation(predicatevector, deduceperm(cvectorcopy, svectorcopy), 5)
            #print(f"{subject}-{predicate}-{context} similarity after training: {p.cosine_similarity(p.permute_vector(predicatevector, subjectvector), contextvector):.2f}")
    #normalize vectors at end of every epoch
    for vec in svecs:
        svecs[vec] = p.normalize(svecs[vec])
    for vec in cvecs:
        cvecs[vec] = p.normalize(cvecs[vec])
    #don't normalize predicate vectors as that would break things
        
        

Let's do some querying to make sure that it worked:

In [32]:
def real_similarity_search(query, vectorstore):
    similarities = [(i, p.cosine_similarity(vectorstore[i], vectorstore[query])) for i in vectorstore]
    similarities.sort(key=lambda x: x[1], reverse=True) 
    for i in similarities[:5]:
        print(f"{i[0]+':':<10}\t{i[1]:.4f}")

In [33]:
real_similarity_search('orange', svecs) #should be very similar to apples, a little similar to broccoli, and random to others

orange:   	1.0000
apple:    	1.0000
broccoli: 	0.9727
chicken:  	0.5205
bicycle:  	0.5049


In [34]:
real_similarity_search('orange', cvecs) #should be similar to other colors, especially red

orange:   	1.0000
red:      	1.0000
green:    	0.9384
man-made: 	0.6255
feathers,:	0.5876


In [42]:
def permutation_similarity_search(query, permstore):
    similarities = [(i, np.sum(permstore[query] == permstore[i])) for i in permstore]
    similarities.sort(key=lambda x: x[1], reverse=True)
    for i in similarities[:5]:
        print(f"{i[0]+':':<10}\t{i[1]}")
              
              
#May want to do something like below
'''def permutation_similarity_search(query, permstore):
    comparevec = svecs['term that is probably related by query to svec']
    similarities = [(i, p.cosine_similarity(comparevec[permstore[query]], comparevec[permstore[i]])) for i in permstore]
    similarities.sort(key=lambda x: x[1], reverse=True)
    for i in similarities[:5]:
        print(f"{i[0]+':':<10}\t{i[1]:.4f}")''';

In [36]:
permutation_similarity_search('color', preds) #preds aren't trained, so should recover cue term and be random to other things

color:    	100
typeof:   	3
makesnoise:	2
drinks:   	2
used-by:  	1


In [37]:
def bound_permutation_search(contextquery, subjectquery, contextstore, subjectstore, permstore):
    similarities = [(i, np.sum(deduceperm(contextstore[contextquery], subjectstore[subjectquery]) == permstore[i])) for i in permstore]
    similarities.sort(key=lambda x: x[1], reverse=True)
    for i in similarities[:5]:
        print(f"{i[0]+':':<10}\t{i[1]}")

In [38]:
bound_permutation_search('orange','orange', cvecs, svecs, preds) # should return color
print()
bound_permutation_search('red','apple', cvecs, svecs, preds) # should return color

color:    	6
colour:   	5
has:      	3
used-by:  	2
shape:    	1

color:    	9
colour:   	6
has:      	6
isa:      	1
shape:    	1


In [39]:
bound_permutation_search('fruit','orange', cvecs, svecs, preds) # should return isa

isa:      	56
colour:   	4
drinks:   	3
shape:    	2
used-by:  	2


In [40]:
bound_permutation_search('wheels','bicycle', cvecs, svecs, preds) # should return has

has:      	100
used-by:  	3
origin:   	2
growson:  	2
eats:     	2


In [41]:
bound_permutation_search('fruit','bicycle', cvecs, svecs, preds) # should return random, especially without permutation training

colour:   	2
makesnoise:	2
isa:      	1
growson:  	1
has:      	1
