# SemVec Utilities Demo

In [24]:
import os
import re
import sys
import wget
import copy
import numpy as np

import semvecpy.permutations.constants as c
import semvecpy.permutations.dense_permutations as dp
import semvecpy.vectors.semvec_utils as sv

basepath=os.path.abspath('..')
sys.path.insert(0, basepath)

In [31]:
def load_vectors_from_url(url):
    filename = re.search(r'(?<=\/)\w+\.\w+(?=\?dl=1|$)', url).group(0)
    path = os.sep.join([os.getcwd(), filename])
    if not os.path.isfile(path):
        sys.stdout.write("Downloading {0} from {1} ... \n".format(filename, url))
        wget.download(url)
    return sv.readfile(path)
    
semanticvectors = load_vectors_from_url("https://www.dropbox.com/s/3f5qg9nfop2l45e/semanticvectors.bin?dl=1")
elementalvectors = load_vectors_from_url("https://www.dropbox.com/s/nf28cvp1ocvyh1s/elementalvectors.bin?dl=1")
permuattion_vectors = load_vectors_from_url("https://www.dropbox.com/s/r8r8ffw9rnjmmo0/permutationvectors.bin?dl=1")

100   REAL
100   REAL
100   PERMUTATION


In [18]:
#norm

## Nearest Neighbor Search

In [25]:
#nearest neighbor by term or by vector
res=sv.get_k_vec_neighbors(semanticvectors,'docetaxel',10)
print('Nearest neighbors of docetaxel')
for re in res:
    print("{:0.2f}".format(re[0]),re[1])

print('Nearest neighbors of docetaxel vector')
res=sv.get_k_neighbors(semanticvectors,sv.getvector(semanticvectors,'docetaxel'),10)
for re in res:
    print("{:0.2f}".format(re[0]),re[1])

Nearest neighbors of docetaxel
44.23 docetaxel
42.86 gemcitabine
42.68 etoposide
42.25 paclitaxel
41.89 carboplatin
41.73 erlotinib
41.35 gefitinib
40.80 capecitabine
40.26 cetuximab
39.85 fluorouracil
Nearest neighbors of docetaxel vector
44.23 docetaxel
42.86 gemcitabine
42.68 etoposide
42.25 paclitaxel
41.89 carboplatin
41.73 erlotinib
41.35 gefitinib
40.80 capecitabine
40.26 cetuximab
39.85 fluorouracil


In [26]:
#single predicate search
semvec=copy.copy(sv.getvector(semanticvectors,'prozac'))
permvec=dp.permute_vector(np.asarray(sv.getvector(permutation_cache,'ISA')),np.asarray(semvec))
results=sv.get_k_neighbors(elementalvectors, permvec, 10)
for re in results:
    print("{:0.2f}".format(re[0]),re[1])

4.40 dopamine_antagonists
4.38 antidepressive_agents
4.31 monoamine_oxidase_inhibitors
4.27 serotonin_uptake_inhibitors
4.13 reuptake_inhibitors
4.07 antipsychotic_agents
3.70 selective_serotonin_re-uptake_inhibitor
3.70 antidepressive_agents,_tricyclic
3.66 atypical_antipsychotic
3.61 serotonin_agents


In [27]:
#double predicate search - not sure why plants and cattle show up, but the others make sense
semvec=copy.copy(sv.getvector(semanticvectors,'prozac'))
permvec=dp.permute_vector(np.asarray(sv.getvector(permutation_cache,'ISA')),np.asarray(semvec))
permvec=dp.permute_vector(np.asarray(sv.getvector(permutation_cache,'_TREATS-INV')),np.asarray(permvec))
results=sv.get_k_neighbors(semanticvectors, permvec, 10)
for re in results:
    print("{:0.2f}".format(re[0]),re[1])

12.53 plants
12.46 obsessive-compulsive_disorder
12.32 phobia,_social
12.12 depression,_bipolar
12.02 schizoaffective_disorder
11.96 sleeplessness
11.85 relationships
11.79 cattle
11.78 chronic_schizophrenia
11.77 country


In [34]:
vecs=load_vectors_from_url("https://www.dropbox.com/s/53x384ca2ehhcnd/semanticvectors_1024.bin?dl=1")

1024   BINARY


In [35]:
sv.getvector(vecs,'docetaxel')

bitarray('111011110010010001101110101001010011111011111100000100100101101001111010010101100000011000011001010001011100100011000101010100010010010111000001111110010100001111011110110000001101001011111011001111010011110110010111101101110001011111101101011111110110011111111101011111010000111011010001000111000111010001011111111100000010000001000101001010111101001111111010110011101000010001101111111001000101110001010011011100111101011011100111101001111010001011110100110000001001111111100001011000100001011011110001011101111110111000000100011111111101110100111101110111001101111111001010011110010101000111100101111010110011101000011000101110100100010011100100101100111000000101111011111011011000000101110101111011101000001111101110001011000100111110101100001000011111000011000101001101101010101110101110011011110101110100110111100101111111001100011011010001010010010000011100110101001110101111011010010011010011000011010010110101001111100010011100101001100101101101100100000011011111010100100111110101

In [36]:
print(sv.get_k_bvec_neighbors(vecs,'paclitaxel',10))

[[1.0, 'paclitaxel'], [0.974609375, 'bax_gene|bax'], [0.97265625, 'caspase-3'], [0.96875, 'bortezomib'], [0.966796875, 'etoposide'], [0.96484375, 'caspase'], [0.96484375, 'curcumin'], [0.96484375, 'survivin'], [0.96484375, 'bcl2-related_protein_11|bcl2l11'], [0.962890625, 'proto-oncogene_proteins_c-myc|myc']]
