In [None]:
#run once - small sets of vectors for development (low-D, high minimum frequency threshold so relatively few concepts)
!wget https://www.dropbox.com/s/3f5qg9nfop2l45e/semanticvectors.bin
!wget https://www.dropbox.com/s/r8r8ffw9rnjmmo0/permutationvectors.bin
!wget https://www.dropbox.com/s/nf28cvp1ocvyh1s/elementalvectors.bin

In [1]:
import copy
import sys
sys.path.insert(0, '../')
import permutations.constants as c
import permutations.sparse_permutations as sp
import permutations.semvec_utils as sv

In [4]:
#run once 
semanticvectors = sv.readfile('semanticvectors.bin')
elementalvectors = sv.readfile('elementalvectors.bin')
permutation_cache = sv.readfile('permutationvectors.bin')


100   REAL
100   REAL
100   PERMUTATION


In [5]:
#nearest neighbor by term or by vector
print(sv.get_k_vec_neighbors(semanticvectors,'docetaxel',10))
print(sv.get_k_neighbors(semanticvectors,sv.getvector(semanticvectors,'docetaxel'),10))

['docetaxel', 'gemcitabine', 'etoposide', 'paclitaxel', 'carboplatin', 'erlotinib', 'gefitinib', 'capecitabine', 'cetuximab', 'fluorouracil']
['docetaxel', 'gemcitabine', 'etoposide', 'paclitaxel', 'carboplatin', 'erlotinib', 'gefitinib', 'capecitabine', 'cetuximab', 'fluorouracil']


In [6]:
#single predicate search
semvec=copy.copy(sv.getvector(semanticvectors,'prozac'))
permvec=sp.permute_vector(sv.getvector(permutation_cache,'_ISA'),semvec)
results=sv.get_k_neighbors(elementalvectors, permvec, 10)
print(results)

['dopamine_antagonists', 'antidepressive_agents', 'monoamine_oxidase_inhibitors', 'serotonin_uptake_inhibitors', 'reuptake_inhibitors', 'antipsychotic_agents', 'selective_serotonin_re-uptake_inhibitor', 'antidepressive_agents,_tricyclic', 'atypical_antipsychotic', 'serotonin_agents']


In [7]:
#double predicate search - not sure why plants and cattle show up, but the others make sense
semvec=copy.copy(sv.getvector(semanticvectors,'prozac'))
permvec=sp.permute_vector(sv.getvector(permutation_cache,'_ISA'),semvec)
permvec=sp.permute_vector(sv.getvector(permutation_cache,'TREATS-INV'),permvec)
results=sv.get_k_neighbors(semanticvectors, permvec, 10)
print(results)

['plants', 'obsessive-compulsive_disorder', 'phobia,_social', 'depression,_bipolar', 'schizoaffective_disorder', 'sleeplessness', 'relationships', 'cattle', 'chronic_schizophrenia', 'country']


In [None]:
#run once
!wget https://www.dropbox.com/s/53x384ca2ehhcnd/semanticvectors_1024.bin

In [8]:
vecs=sv.readfile('semanticvectors_1024.bin')


1024   BINARY


In [12]:
sv.getvector(vecs,'docetaxel')

array([239,  36, 110, 165,  62, 252,  18,  90, 122,  86,   6,  25,  69,
       200, 197,  81,  37, 193, 249,  67, 222, 192, 210, 251,  61,  61,
       151, 183,  23, 237, 127, 103, 253, 125,  14, 209,  28, 116,  95,
       240,  32,  69,  43, 211, 250, 206, 132, 111, 228,  92,  83, 115,
       214, 231, 167, 162, 244, 192, 159, 225,  98,  22, 241, 119, 238,
         4, 127, 221,  61, 220, 223, 202, 121,  81, 229, 235,  58,  24,
       186,  68, 228, 179, 129, 123, 237, 129, 117, 238, 131, 238,  44,
        79, 172,  33, 240, 197,  54, 171, 174, 111,  93,  55, 151, 243,
        27,  69,  36,  28, 212, 235, 218,  77,  48, 210, 212, 248, 156,
       166,  91, 100,  13, 245,  39, 214, 255,  76, 204,  55], dtype=uint8)

In [9]:
x=copy.copy(sv.getvector(vecs,'docetaxel'))
y=copy.copy(sv.getvector(vecs,'prozac'))
xs=[]
ys=[]
for i in range(10):
    xs.append(x)
    ys.append(y)

xs ^= ys

TypeError: unsupported operand type(s) for ^=: 'list' and 'list'

In [7]:
import numpy as np
def get_k_bvec_neighbors(bwordvectors, query_term, k):
    """Returns the nearest neighboring terms (binary vector reps) to query_term - a term"""
    if query_term in bwordvectors[0]:
        query_index = bwordvectors[0].index(query_term)
        query_vec = bwordvectors[1][query_index]
        return get_k_b_neighbors(bwordvectors, query_vec, k)
    else:
        return None


def get_k_b_neighbors(bwordvectors, query_vec, k):
    """Returns the nearest neighboring to terms to query_vec - a binary vector."""
    sims = []
    for vector in bwordvectors[1]:
        vec2 = copy.copy(vector)
        vec2 ^= query_vec
        sims.append(-vec2.bin.count("1"))
    indices = np.argpartition(sims, -k)[-k:]
    indices = sorted(indices, key=lambda i: sims[i], reverse=True)
    labels = []
    for index in indices:
        labels.append(bwordvectors[0][index])
    return labels

In [8]:
print(sv.get_k_bvec_neighbors(vecs,'paclitaxel',10))

['paclitaxel', 'methyl_oleate', 'topical_hemostatic_agent', 'adenovirus_vector', 'mcc_protocol', 'cisplatin/vinblastine_protocol', 'radiotherapy,_interstitial', 'zona_drilling', 'reproductive_technology,_assisted', 'iron_ion_homeostasis']


In [9]:
print(get_k_bvec_neighbors(vecs,'paclitaxel',10))

['paclitaxel', 'methyl_oleate', 'topical_hemostatic_agent', 'adenovirus_vector', 'mcc_protocol', 'cisplatin/vinblastine_protocol', 'radiotherapy,_interstitial', 'zona_drilling', 'reproductive_technology,_assisted', 'iron_ion_homeostasis']


In [None]:
!conda list