In [1]:
import numpy as np
import six
import sys
import os
import traceback
import re
import pickle
from copy import deepcopy

from chainer import cuda
from context2vec.common.context_models import Toks
from context2vec.common.model_reader import ModelReader
import sklearn
import pandas as pd
import logging
from scipy.stats import spearmanr
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
import gensim
import math
import collections



[nltk_data] Downloading package stopwords to /home/ql261/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [41]:
def produce_top_n_simwords(w_filter,context_embed,n_result,index2word,debug=False):
        #assume that w_filter is already normalized
        context_embed = context_embed / xp.sqrt((context_embed * context_embed).sum())
        similarity_scores=[]
        print('producing top {0} simwords'.format(n_result))
        similarity = (w_filter.dot(context_embed)+1.0)/2
        top_words_i=[]
        top_words=[]
        count = 0
        for i in (-similarity).argsort():
                    if xp.isnan(similarity[i]):
                        continue
                    if debug==True:
                        print('{0}: {1}'.format(str(index2word[int(i)]), str(similarity[int(i)])))
                    count += 1
                    top_words_i.append(int(i))
                    top_words.append(index2word[int(i)])
                    similarity_scores.append(float(similarity[int(i)]))
                    if count == n_result:
                        break

        top_vec=w_filter[top_words_i,:]
        return top_vec,xp.array(similarity_scores),top_words
    
def top_mutual_sim(top_vec,similarity_scores):

    #normalize the top_vec
    s = xp.sqrt((top_vec * top_vec).sum(1))
    s[s==0.] = 1.
    top_vec /= s.reshape((s.shape[0], 1))
    
    # substitutes' similarity to sentence (similarity_scores) as weight matrix to mutual similarity
    max_score=similarity_scores[0]
    similarity_scores=np.array(similarity_scores)
    sim_weights=(similarity_scores+similarity_scores.reshape(len(similarity_scores),1))/2.0
    #weighted by the maximum score in the substitutes (highre max score means the context is more certain about the substitutes)
    sim_weights=(sim_weights/float(sum(sum(sim_weights))))*max_score
    # dot product weighted by substitute probability (sim_weights)
    inf_score=sum(sum(top_vec.dot(top_vec.T)*sim_weights))
    return inf_score

def top_cluster_density(top_vec,similarity_scores):
    #normalize the top_vec
    s = xp.sqrt((top_vec * top_vec).sum(1))
    s[s==0.] = 1.
    top_vec = top_vec/ s.reshape((s.shape[0], 1))
    
    #perform the centroid
    max_score=similarity_scores[0]
    similarity_scores=np.array(similarity_scores).reshape(len(similarity_scores),1)/sum(similarity_scores)
    centroid_vector=sum(top_vec*similarity_scores)
    # average of cosine distance to the centroid,weighted by max scores
    inf_score=sum(top_vec.dot(centroid_vector))/len(top_vec)*max_score
    return inf_score

In [65]:
def load_w2salience(w2salience_f,weight_type):
    w2salience={}
    with open(w2salience_f) as f:
        for line in f:
            if line.strip()=='':
                continue
            w,w_count,s_count=line.strip().split('\t')
            if weight_type==INVERSE_W_FREQ:
                w2salience[w]=1/float(w_count)
            elif weight_type==INVERSE_S_FREQ:
                w2salience[w]=math.log(1+84755431/float(s_count))
    return w2salience

def skipgram_context(model,words,pos,weight=None,w2entropy=None):
    context_wvs=[]
    weights=[]
    for i,word in enumerate(words):
        if i != pos: #surroudn context words
            try:
                if weight ==LDA:
                    if word in w2entropy and word in model:
                        print (word,w2entropy[word])
                        weights.append(1/(w2entropy[word]+1.0))
                        context_wvs.append(model[word])
                elif weight in [INVERSE_W_FREQ,INVERSE_S_FREQ]:
                    if word in w2entropy and word in model:
                        print (word,w2entropy[word])
                        weights.append(w2entropy[word])
                        context_wvs.append(model[word])
                else:
                    #equal weights per word
                    context_wvs.append(model[word])
                    weights.append(1.0)
            except KeyError as e:
                print ('==warning==: key error in context {0}'.format(e))
    print ('per word weights',weights)
    context_embed=sum(np.array(context_wvs)*np.array(weights).reshape(len(weights),1))#/sum(weights)
    return sum(weights),context_embed #  will be normalized later

def lg_model_out_w2v(top_words,w_target,word2index_target):
        # lg model substitutes in skipgram embedding
        top_vec=[]
        index_list=[]
        for i,word in enumerate(top_words):
            try :
                top_vec.append(w_target[word2index_target[word]])
                index_list.append(i)
            except KeyError as e:
                print (e)
        return xp.stack(top_vec),index_list
    
def context_inform(test_s,test_w, model,model_type,n_result,w_filter,index2word,weight,w2entropy=None,w_target=None,word2index_target=None,index2word_target=None):
    #produce context representation and infromative score for each context
    test_s=test_s.replace(test_w, ' '+test_w+' ')
    print(test_s)
    words=test_s.split()
    pos=words.index(test_w)
    
    score=1.0 #default score
    
    # Decide on the model
    if model_type=='context2vec':
        context_embed= model.context2vec(words, pos)
        
    elif model_type=='skipgram':
        score,context_embed=skipgram_context(model,words,pos,weight,w2entropy)
        context_embed_out=xp.array(context_embed)
        
    elif model_type=='context2vec-skipgram':
        # context2vec substitutes in skipgram space
        context_embed= model.context2vec(words, pos)
        top_vec,sim_scores,top_words=produce_top_n_simwords(w_filter,context_embed,n_result,index2word)
        top_vec,index_list=lg_model_out_w2v(top_words,w_target,word2index_target) 
        sim_scores=sim_scores[index_list] #weighted by substitute probability
        print (type(top_vec),type(sim_scores))
        if weight==SUBSTITUTE_PROB:
            context_embed_out=xp.array(sum(top_vec*sim_scores.reshape(len(sim_scores),1)))
        else:
            context_embed_out=xp.array(sum(top_vec*((sim_scores/sum(sim_scores)).reshape(len(sim_scores),1))))

    else:
        print ('model type {0} not recognized'.format(model_type))
        sys.exit(1)
        
    print('context_embed original', context_embed[:10])
    print ('context_embed_out',context_embed_out[:10])
    print ('3',type(context_embed_out))

    #decide on weight per sentence
    print ('weight mode',weight)
    if weight==TOP_MUTUAL_SIM:
#         if word2index_target==None: #not context2vec-skipgram
#             context2vec word embedding space neighbours
        top_vec,sim_scores,top_words=produce_top_n_simwords(w_filter,context_embed,n_result,index2word)
        #skipgram word embedding space neighbours when context2vec-skipgram
        score=top_mutual_sim(top_vec,sim_scores)
    elif weight==TOP_CLUSTER_DENSITY:
#         if word2index_target==None: #not context2vec-skipgram
#             context2vec word embedding space neighbours
        top_vec,sim_scores,top_words=produce_top_n_simwords(w_filter,context_embed,n_result,index2word)
        score=top_cluster_density(top_vec,sim_scores)
    elif weight==SUBSTITUTE_PROB:
        score=sum(sim_scores)
        print ('substitute prob score',score)
    elif weight=='learned':
        print ('learned not implemented')
    elif weight=='gaussian':
        print ('gaussian not implemented')
    elif weight ==False or weight in [LDA,INVERSE_S_FREQ,INVERSE_W_FREQ]:
        score=score
    else:
        print ('weight mode {0} not recognized'.format(weight))

    return score,context_embed_out

def additive_model(test_ss,test_w, model_type,model,n_result,w_filter,index2word,weight=False,w2entropy=None,w_target=None,word2index_target=None,index2word_target=None,f_w=None):
    #produce context representation across contexts using weighted average
    context_out=[]
    context_weights=[]
    for test_s in test_ss.split('@@'):
        test_s=test_s.strip()
        #produce context representation with scores
        score,context_embed=context_inform(test_s,test_w, model,model_type,n_result,w_filter,index2word,weight,w2entropy,w_target,word2index_target,index2word_target)
        context_out.append(context_embed)
        print ('context_embedtype',type(context_embed))
        context_weights.append(score)
    
    
    print ('context_weights',context_weights)
    #sum representation across contexts
    context_out=xp.stack(context_out)
    
    
    if model_type=='skipgram' or weight==SUBSTITUTE_PROB:
        # context representation by weighted sum of all context words in all contexts
        context_avg=sum(context_out)/sum(context_weights)
    else:
        norm_weights=xp.array(context_weights).reshape(len(context_weights),1)/float(sum(context_weights))
        if f_w!=None:
            f_w.write(','.join([str(i[0]) for i in norm_weights])+'\n')
        print ('normalized weight: \n  {0}'.format(norm_weights))
        # context represenatation by weighted sum of contexts
        context_avg=sum(norm_weights*context_out)
    
    
    # check new embedding neighbours

    print('producing top {0} words for new embedding'.format(n_result))
    if index2word_target==None:
        top_vec,scores,top_words=produce_top_n_simwords(w_filter,context_avg,n_result,index2word,debug=True)
    else:
        #print the target space neighbours for context2vec-skipgram
        top_vec,scores,top_words=produce_top_n_simwords(w_target,context_avg,n_result,index2word_target,debug=True)
    
    return context_avg




In [4]:
def filter_w(w,word2index,index2word):
    #filter out words with no letters in, and stopwords
    stopw=stopwords.words('english')
    stopw=[word.encode('utf-8') for word in stopw]
    index2word_filter={}
    word2index_filter={}
    index_filter2index=[]
    counter=0
    for word in word2index:
            if word not in stopw:
                    index_filter2index.append(word2index[word])
                    word2index_filter[word]=counter
                    index2word_filter[counter]=word
                    counter+=1
    w_filter= w[index_filter2index,:]
    return w_filter,word2index_filter,index2word_filter

def rm_stopw_context(model):
    stopw=stopwords.words('english')
    stopw=[word.encode('utf-8') for word in stopw]
    
    model2={word:model.wv.__getitem__(word) for word in model.wv.vocab if word not in stopw}
    return model2




In [94]:
def preprocess_nonce(sent):
    
    sents_out=[]
    
    results=re.finditer('___ ',sent)
    matches=[m for m in results]
    for i in range(len(matches)):
        sent_masked=sent
        matches_mask=[(m2.start(0),m2.end(0)) for i2,m2 in enumerate(matches) if i2!=i]
        matches_mask=sorted(matches_mask, key=lambda x:x[0],reverse=True)
        for m in matches_mask:
            sent_masked=sent_masked[:m[0]]+sent_masked[m[1]:]
        sents_out.append(sent_masked+' .')
    return ' @@ '.join(sents_out)

def eval_nonce(nonce_data_f,context_model,model_w2v,model_type,n_result,w,index2word,word2index,weight=False,w2entropy=None,w_target=None,word2index_target=None,index2word_target=None):
        ranks = []
        mrr = 0.0
        data=pd.read_csv(os.path.join(nonce_data_f),delimiter='\t',header=None,comment='#')
        c = 0
        for index, row in data.iterrows():
            if index>100 and index%100==0:
                print (index)
            sents=preprocess_nonce(row[1])
            nonce=row[0]
            if nonce not in model_w2v:
                print ('{0} not known'.format(nonce))
                continue
            #compute context representation
            if model_type=='context2vec-skipgram?skipgram':
                    #context2vevc                
                    context_avg_1=additive_model(sents.lower(),'___', model_type.split('?')[0],context_model[0],n_result,w[0],index2word[0],weight[0],w2entropy[0],w_target[0],word2index_target[0],index2word_target[0])
                    print ('context2vec avg embed',context_avg_1[:10])
                    context_avg_2=additive_model(sents.lower(),'___', model_type.split('?')[1],context_model[1],n_result,w[1],index2word[1],weight[1],w2entropy[1],w_target[1],word2index_target[1],index2word_target[1])
                    print ('skipgram avg embed', context_avg_2[:10])
                    context_avg=(context_avg_1+context_avg_2)/2
                    print ('context2vec avg out', context_avg[:10])
                    #compute probe embeddings in skipgram space
                    w_out=w[1]
                    w_target_out=w_target[1]
                    word2index_out=word2index[1]
                    word2index_target_out=word2index_target[1]
                    
            else:
                    
                    context_avg=additive_model(sents.lower(),'___', model_type,context_model,n_result,w,index2word,weight,w2entropy,w_target,word2index_target,index2word_target)
                    print ('context avg out', context_avg[:10])
                    w_out=w
                    w_target_out=w_target
                    word2index_out=word2index
                    word2index_target_out=word2index_target
            
#             context_avg = context_avg / xp.sqrt((context_avg * context_avg).sum())
            if xp==cuda.cupy:
                context_avg=xp.asnumpy(context_avg)
            print ('vector norm: {0}'.format(np.linalg.norm(context_avg)))
            # MRR Rank calculation
            nns=model_w2v.similar_by_vector(context_avg,topn=len(model_w2v.wv.vocab))

            rr = 0
            n = 1
            for nn in nns:
                word = nn[0]
                if word == nonce:
                    print (word)
                    rr = n
                    ranks.append(rr)
                else:
                  n+=1

            if rr != 0:
                mrr+=float(1)/float(rr)	
            print rr,mrr
            c+=1
        print ("Final MRR: ",mrr,c,float(mrr)/float(c))

        print ('mediam : {0}'.format(np.median(ranks)))
        return ranks
            


def eval_chimera(chimeras_data_f,context_model,model_type,n_result,w,index2word,word2index,weight=False,w2entropy=None,w_target=None,word2index_target=None,index2word_target=None):
    chimeras_data_dir='/'.join(chimeras_data_f.split('/')[:-1])
    num_sent=chimeras_data_f.split('/')[-1].split('.')[1][1]
    print (chimeras_data_dir)
    print (num_sent)
    with open(chimeras_data_dir+'/weights_{0}_{1}_{2}'.format(num_sent,model_type,str(weight)),'w') as f_w:
        spearmans=[]
        data=pd.read_csv(os.path.join(chimeras_data_f),delimiter='\t',header=None)

        for index, row in data.iterrows():
            if index>100 and index%100==0:
                print (index)
            golds=[]
            model_predict=[]
            probes=[]
            #compute context representation
            if model_type=='context2vec-skipgram?skipgram':
                    #context2vevc
                    
                    context_avg_1=additive_model(row[1].lower(),'___', model_type.split('?')[0],context_model[0],n_result,w[0],index2word[0],weight[0],w2entropy[0],w_target[0],word2index_target[0],index2word_target[0],f_w)
                    print ('context2vec avg embed',context_avg_1[:10])
                    context_avg_2=additive_model(row[1].lower(),'___', model_type.split('?')[1],context_model[1],n_result,w[1],index2word[1],weight[1],w2entropy[1],w_target[1],word2index_target[1],index2word_target[1],f_w)
                    print ('skipgram avg embed', context_avg_2[:10])
                    context_avg=(context_avg_1+context_avg_2)/2
                    print ('context2vec avg out', context_avg[:10])
                    #compute probe embeddings in skipgram space
                    w_out=w[1]
                    w_target_out=w_target[1]
                    word2index_out=word2index[1]
                    word2index_target_out=word2index_target[1]
                    
            else:
                    
                    context_avg=additive_model(f_w,row[1].lower(),'___', model_type,context_model,n_result,w,index2word,weight,w2entropy,w_target,word2index_target,index2word_target)
                    print ('context avg out', context_avg[:10])
                    w_out=w
                    w_target_out=w_target
                    word2index_out=word2index
                    word2index_target_out=word2index_target
            
            context_avg = context_avg / xp.sqrt((context_avg * context_avg).sum())

           
            
            #cosine similarity with probe embedding
            for gold,probe in zip(row[3].split(','),row[2].split(',')):
                try:
                    if word2index_target_out==None:
                        probe_w_vec=xp.array(w_out[word2index_out[probe]])
                    else:
                        probe_w_vec=xp.array(w_target_out[word2index_target_out[probe]])
                    probe_w_vec=probe_w_vec/xp.sqrt((probe_w_vec*probe_w_vec).sum())
                    cos=probe_w_vec.dot(context_avg)
                    if xp.isnan(cos):
                        continue
                    else:
                        model_predict.append(cos)
                        golds.append(gold)
                        probes.append(probe)
                except KeyError as e:
                    print ("====warning key error for probe=====: {0}".format(e))
            print ('probes',probes)
            print ('gold',golds)
            print ('model_predict',model_predict)
            sp=spearmanr(golds,model_predict)[0]
            print ('spearman correlation is {0}'.format(sp))
            if not math.isnan(sp):
                spearmans.append(sp)
        print ("AVERAGE RHO:",float(sum(spearmans))/float(len(spearmans)))

In [92]:
TOP_MUTUAL_SIM='top_mutual_sim'
TOP_CLUSTER_DENSITY='top_cluster_density'
LDA='lda'
INVERSE_S_FREQ='inverse_s_freq'
INVERSE_W_FREQ='inverse_w_q'
SUBSTITUTE_PROB='substitute_prob'
WEIGHT_DICT={0:False,1:TOP_MUTUAL_SIM,2:LDA,3:INVERSE_S_FREQ,4:INVERSE_W_FREQ,5:TOP_CLUSTER_DENSITY, 6:SUBSTITUTE_PROB}


if __name__=="__main__":
    
    #params read in
    if sys.argv[0]=='/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py':
        
#         data='./eval_data/data-chimeras/dataset.l2.fixed.test.txt.punct'
        data='./eval_data/data-nonces/n2v.definitional.dataset.train.txt'
        weight=WEIGHT_DICT[0]
        gpu=1
#         ##context2vec
##         model_param_file='../models/context2vec/model_dir/context2vec.ukwac.model.params'
#         model_param_file='../models/context2vec/model_dir/MODEL-wiki.params.14'
        
        model_type='skipgram'

####skipgram
        if model_type=='skipgram':
            model_param_file='../models/wiki_all.model/wiki_all.sent.split.model'
            model_type='skipgram'
    #         weight='inverse_w_freq'
    #         w2salience_f='../corpora/corpora/wiki.all.utf8.sent.split.tokenized.vocab'
    #         w2salience_f='../models/lda/w2entropy'
            n_result=20
            w2salience_f=None
        elif model_type=='context2vec-skipgram?skipgram':
            model_param_file='../models/context2vec/model_dir/MODEL-wiki.params.14?../models/wiki_all.model/wiki_all.sent.split.model'
    #         model_param_file='../models/context2vec/model_dir/context2vec.ukwac.model.params?../models/wiki_all.model/wiki_all.sent.split.model'
            n_result=20
            w2salience_f=None
        elif model_type=='skipgram?context2vec-skipgram':
        #####skipgram?context2vec-skipgram
            model_param_file='../models/context2vec/model_dir/MODEL-wiki.params.14?../models/wiki_all.model/wiki_all.sent.split.model'
    #         weight='inverse_w_freq'
    #         w2salience_f='../corpora/corpora/wiki.all.utf8.sent.split.tokenized.vocab'
    #         w2salience_f='../models/lda/w2entropy'
            n_result=20
            w2salience_f=None
    
    else:
        if len(sys.argv) < 6:
            print >> sys.stderr, "Usage: {0} <model_param_file> <model_type: context2vec; context2vec-skipgram (context2vec substitutes in skipgram space); context2vec-skipgram?skipgram (context2vec substitutes in skipgram space plus skipgram context words)> <weight:{1}> <eval_data> <gpu> <w2salience> "  .format (sys.argv[0],WEIGHT_DICT.items())
            sys.exit(1)
        
        model_param_file = sys.argv[1]
        model_type=sys.argv[2]
        
        if '-' in sys.argv[3]:
            weight,n_result=sys.argv[3].split('-')
            weight=WEIGHT_DICT[int(weight)]
            n_result=int(n_result)
        else:
            weight=WEIGHT_DICT[int(sys.argv[3])]
            n_result=20 #default is 20 top
            
#         context_rm_stopw=int(sys.argv[4])
        data =sys.argv[4]
        
        gpu=int(sys.argv[5])
        
        if len(sys.argv)>6:
            w2salience_f=argv[6]
        else:
            w2salience_f=None
    
    #gpu setup 
   

    if gpu >= 0:
        cuda.check_cuda_available()
        cuda.get_device(gpu).use()    
    xp = cuda.cupy if gpu >= 0 else np
    
    # logging
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)

    
    #choose model type
    print ('read model....')
    if model_type=='context2vec':
        #read in model
        
        model_reader = ModelReader(model_param_file,gpu)
        w = xp.array(model_reader.w)
        index2word = model_reader.index2word
        word2index=model_reader.word2index
        model = model_reader.model
        w_target=None
        word2index_target=None
        index2word_target=None
        
    elif model_type=='skipgram':
        model_w2v = gensim.models.Word2Vec.load(model_param_file)
        w=xp.array(deepcopy(model_w2v.wv.vectors))
        #vector normalize for target w embedding, consistent with context2vec w and convenient for cosine computation among substitutes
        s = xp.sqrt((w * w).sum(1))
        s[s==0.] = 1.
        w /= s.reshape((s.shape[0], 1))
        
        index2word=model_w2v.wv.index2word
        word2index={key: model_w2v.wv.vocab[key].index for key in model_w2v.wv.vocab}
        w_target=None
        word2index_target=None
        index2word_target=None
        
        print ('filter words for context....')

        model=rm_stopw_context(model_w2v)
        
    elif model_type=='context2vec-skipgram':
        model_param_context,model_param_w2v=model_param_file.split('?')
        model_reader = ModelReader(model_param_context,gpu)
        w = xp.array(model_reader.w)
        index2word = model_reader.index2word
        word2index=model_reader.word2index
        model = model_reader.model
        
        model_w2v = gensim.models.Word2Vec.load(model_param_w2v)
        w_target=xp.array(model_w2v.wv.vectors)
        index2word_target=model_w2v.wv.index2word
        word2index_target={key: model_w2v.wv.vocab[key].index for key in model_w2v.wv.vocab}
    
    elif model_type=='context2vec-skipgram?skipgram':
        model_param_context,model_param_w2v=model_param_file.split('?')
        #context2vec-skipgram
        model_reader = ModelReader(model_param_context,gpu)
        w = xp.array(model_reader.w)
        index2word = model_reader.index2word
        word2index =model_reader.word2index
        model = model_reader.model
        
        model_w2v = gensim.models.Word2Vec.load(model_param_w2v)
        w_target=xp.array(model_w2v.wv.vectors)
        index2word_target=model_w2v.wv.index2word
        word2index_target={key: model_w2v.wv.vocab[key].index for key in model_w2v.wv.vocab}
    
        # skigpram
        model_skipgram = model_w2v
        w_skipgram=xp.array(deepcopy(model_skipgram.wv.vectors))
        #vector normalize for probe w embedding
        s = xp.sqrt((w_skipgram * w_skipgram).sum(1))
        s[s==0.] = 1.
        w_skipgram /= s.reshape((s.shape[0], 1))
        
        index2word_skipgram=model_skipgram.wv.index2word
        word2index_skipgram={key: model_skipgram.wv.vocab[key].index for key in model_skipgram.wv.vocab}
        w_target_skipgram=None
        word2index_target_skipgram=None
        index2word_target_skipgram=None
        
        print ('filter words for context....')

        model_skipgram=rm_stopw_context(model_skipgram)
        
                
        
    
    #remove stop words in target word space and asarray
    print ('filter words for target....')
    w,word2index,index2word=filter_w(w,word2index,index2word)
    if  index2word_target!=None:
        w_target,word2index_target,index2word_target=filter_w(w_target,word2index_target,index2word_target)
    if model_type=='context2vec-skipgram?skipgram':
        w_skipgram,word2index_skipgram,index2word_skipgram=filter_w(w_skipgram,word2index_skipgram,index2word_skipgram)
    
    #per word weight
    
    w2salience=None
    if weight==LDA:
        print ('load vectors and entropy')
        w2salience=pickle.load(open(w2salience_f))
    elif weight==INVERSE_W_FREQ:
        print ('load w2freq')
        w2salience=load_w2salience(w2salience_f,weight)
    elif weight==INVERSE_S_FREQ:
        print ('load w2freq')
        w2salience=load_w2salience(w2salience_f,weight)


    #combine parameters for skipgram?context2vec-skipgram
    if model_type=='context2vec-skipgram?skipgram':
        model=(model,model_skipgram)
        w=(w,w_skipgram)
        index2word=(index2word,index2word_skipgram)
        word2index=(word2index,word2index_skipgram)
        weight=(weight,WEIGHT_DICT[0])#assume that skipgram has no weight
        w2salience=(w2salience,w2salience)
        w_target=(w_target,w_target_skipgram)
        word2index_target=(word2index_target,word2index_target_skipgram)
        index2word_target=(index2word_target,index2word_target_skipgram)
    
    print (model_param_file,model_type,weight,data,w2salience_f)


read model....
filter words for context....
filter words for target....
('../models/wiki_all.model/wiki_all.sent.split.model', 'skipgram', False, './eval_data/data-nonces/n2v.definitional.dataset.train.txt', None)


In [76]:
xp.asnumpy(xp.array([1,2]))

array([1, 2])

In [93]:
    #read in data
    import time
    start_time = time.time()
    
    
    if data.split('/')[-2]== 'data-chimeras':

            eval_chimera(data,model,model_type,n_result,w,index2word,word2index,weight,w2salience,w_target,word2index_target,index2word_target)

    elif data.split('/')[-2]== 'data-nonces':
            ranks=eval_nonce(data,model,model_w2v,model_type,n_result,w,index2word,word2index,weight,w2salience,w_target,word2index_target,index2word_target)
    print("--- %s seconds ---" % (time.time() - start_time))

0
 ___  is an inwardly directed emotion that carries two common meanings  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.79432904, -0.25426867, -0.25418369, -0.76199849,  1.25186341,
       -0.39327487,  0.13843274,  0.11596303, -0.03449698,  0.26046094]))
('context_embed_out', array([ 0.79432904, -0.25426867, -0.25418369, -0.76199849,  1.25186341,
       -0.39327487,  0.13843274,  0.11596303, -0.03449698,  0.26046094]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [7.0])
producing top 20 words for new embedding
producing top 20 simwords
inwardly: 0.7999344806775163
meanings: 0.7882873256849461
emotion: 0.7806941378751409
emotions: 0.7692245591269006
connotative: 0.7684925647606728
conveys: 0.7626288127958546
directed: 0.7581507875460088
hopefulness: 0.7563031288965952
groundcolour: 0.7537158166299638
interneural: 0.7535815994685118
interiority: 



pride
99913 1.00087075756e-05
1
 ___  is a form of semi permanent hair removal which removes the hair from the root  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.94888639,  1.31163059, -0.50962209, -1.39630762,  0.25211979,
       -0.14729566, -0.77188834,  0.2900049 , -0.13358229,  0.51609061]))
('context_embed_out', array([ 0.94888639,  1.31163059, -0.50962209, -1.39630762,  0.25211979,
       -0.14729566, -0.77188834,  0.2900049 , -0.13358229,  0.51609061]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [8.0])
producing top 20 words for new embedding
producing top 20 simwords
hair: 0.8611294432018397
skin: 0.7793710018611788
removes: 0.7766888060457521
removal: 0.7754887551079086
sloughing: 0.7721940088259063
permed: 0.7686139007859087
conk: 0.7676407881349901
regrows: 0.7652004920767349
hairs: 0.7648189554697824
forelock: 0.760587510346

scattering
1127 0.00137288765678
5
 ___  means small boy or child in the spanish language  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-1.32520236,  0.30466162, -0.74345084, -1.00517324,  0.32056738,
        0.47383278, -0.2513013 , -0.0214376 ,  0.81346865, -0.22329051]))
('context_embed_out', array([-1.32520236,  0.30466162, -0.74345084, -1.00517324,  0.32056738,
        0.47383278, -0.2513013 , -0.0214376 ,  0.81346865, -0.22329051]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [6.0])
producing top 20 words for new embedding
producing top 20 simwords
language: 0.8107781829938852
child: 0.8080988437550531
boy: 0.7867019715865207
spanish: 0.780738089091831
means: 0.772545237176486
girl: 0.76675885771924
afaan: 0.7571245528523608
sranan: 0.7567002120702339
euskara: 0.7530973885466647
pashtu: 0.7493367664155242
quichua: 0.7446756416333089
nawat: 0.744

needlework
28 0.491857128237
10
 ___  was the name given to two music focused online services  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-0.76068512, -0.29181387, -0.32500441,  0.50229737,  0.71529116,
       -0.46951205,  0.3307074 , -0.02007996,  0.24418139, -0.19325032]))
('context_embed_out', array([-0.76068512, -0.29181387, -0.32500441,  0.50229737,  0.71529116,
       -0.46951205,  0.3307074 , -0.02007996,  0.24418139, -0.19325032]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [7.0])
producing top 20 words for new embedding
producing top 20 simwords
online: 0.7908063376979906
music: 0.7827702559201372
babelgum: 0.7663703436873714
focused: 0.7641003963362214
services: 0.7579729458389786
1977-2010: 0.7574624424959631
given: 0.7550037584323233
asiavision: 0.7546952989961366
name: 0.7525729487675368
xbiz: 0.7477213643586584
website: 0.743962

heaven
367 0.494633593019
13
 ___  is a series of supercomputers designed and assembled by the centre for development of advanced computing c dac in pune india  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-0.78334054, -0.8390517 ,  0.46454122, -0.71607614, -1.51228627,
       -2.21947449,  0.2124862 , -0.77562122,  2.10629128, -0.8393361 ]))
('context_embed_out', array([-0.78334054, -0.8390517 ,  0.46454122, -0.71607614, -1.51228627,
       -2.21947449,  0.2124862 , -0.77562122,  2.10629128, -0.8393361 ]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [12.0])
producing top 20 words for new embedding
producing top 20 simwords
aptech: 0.8087679114921986
imrb: 0.8075232195472379
ihrd: 0.8022791086253331
coep: 0.797724935140973
iiser: 0.7975442748968744
stpi: 0.7955458563312581
teragrid: 0.794048709792009
npti: 0.7939117582687

kew
8467 0.705347316698
18
 ___  is an anglicisation of the scots haly ruid holy cross  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.11515985, -0.05092442,  1.52270974, -1.83577566, -0.50970956,
       -0.41604559, -0.94852628, -0.01918735, -0.64712587, -0.69012797]))
('context_embed_out', array([ 0.11515985, -0.05092442,  1.52270974, -1.83577566, -0.50970956,
       -0.41604559, -0.94852628, -0.01918735, -0.64712587, -0.69012797]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [5.0])
producing top 20 words for new embedding
producing top 20 simwords
anglicisation: 0.8222803686669402
scots: 0.8139362532031704
haly: 0.8011396869850937
holy: 0.77907217550632
cross: 0.7771178540022592
loughmoe: 0.7735449563604442
albanach: 0.7716653200042989
cathair: 0.7708680504256946
maol: 0.767788556836644
tuathal: 0.7662892865372246
bewcastle: 0.7647528472100272
cainnech

surbiton
181 0.756097976501
23
 ___  is a city and the county town of hampshire  .
('per word weights', [1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.35283998, -0.49776194, -0.04821746, -0.83089168, -0.12229645,
       -1.06563809, -1.35938233, -0.18101781,  0.84430812,  0.42096535]))
('context_embed_out', array([ 0.35283998, -0.49776194, -0.04821746, -0.83089168, -0.12229645,
       -1.06563809, -1.35938233, -0.18101781,  0.84430812,  0.42096535]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [4.0])
producing top 20 words for new embedding
producing top 20 simwords
county: 0.893904578306215
town: 0.8689334406091753
city: 0.866417024690239
hampshire: 0.8294347885882571
somersworth: 0.8169204406233392
harrietstown: 0.8132117656322511
willsboro: 0.804948973674114
wawarsing: 0.798782910707407
francestown: 0.7973487587191721
lyndeborough: 0.7908400231619566
stratham: 0.7875840398667078


knitting
142 0.764390223843
28
 ___  is aromatic biotic material which releases fragrant smoke when burned  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 1.04505855,  0.40301503, -0.47866114, -0.59219648,  0.08658695,
        0.42006959, -0.6034055 ,  0.16837986,  1.02536014,  0.97063569]))
('context_embed_out', array([ 1.04505855,  0.40301503, -0.47866114, -0.59219648,  0.08658695,
        0.42006959, -0.6034055 ,  0.16837986,  1.02536014,  0.97063569]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [7.0])
producing top 20 words for new embedding
producing top 20 simwords
aromatic: 0.8622604282893098
fragrant: 0.8569806231695989
terpenes: 0.8338150251103372
resinous: 0.8249195083705743
terpene: 0.8218723376777975
saponin: 0.8213660571254607
polyphenolic: 0.8211286969689842
linalool: 0.8203767282590637
rutin: 0.817539814791743
musks: 0.815389505948

industrialisation
33 1.0643411317
33
 ___  limburgish mestreech french maestricht spanish mastrique is a town and a municipality in the southeast of the european country the netherlands  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-0.84395005, -0.24430103,  0.81405185,  0.32364276, -0.67276691,
       -0.05416395, -1.45424782, -0.01520694,  1.72217915, -0.75898351]))
('context_embed_out', array([-0.84395005, -0.24430103,  0.81405185,  0.32364276, -0.67276691,
       -0.05416395, -1.45424782, -0.01520694,  1.72217915, -0.75898351]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [9.0])
producing top 20 words for new embedding
producing top 20 simwords
denderstreek: 0.8427021071009042
voeren: 0.8294328009905025
dilbeek: 0.8208187744231812
oldambt: 0.8197275691553048
limburgs: 0.8181196202118792
deelgemeente: 0.8169528709128082
heeze: 0.8135

refraction
82 1.08044806401
37
 ___  is a thin paper like material made from the pith of the plant cyperus a wetland sedge that was once abundant in the sudd of southern sudan along with the nile delta of egypt  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 2.30666774,  1.21689359,  0.99625879, -2.7442794 , -0.03814735,
        0.78426525, -1.66793234,  1.92984993, -0.75994772,  0.9457988 ]))
('context_embed_out', array([ 2.30666774,  1.21689359,  0.99625879, -2.7442794 , -0.03814735,
        0.78426525, -1.66793234,  1.92984993, -0.75994772,  0.9457988 ]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
is a thin paper like material made from the pith of the  ___  plant cyperus a wetland sedge that was once abundant in the sudd of southern sudan along with the nile delta of egypt  .
('per word weights', [1.0, 1.0, 

portsmouth
751 1.08196844449
40
 ___  or usefulness is the perceived ability of something to satisfy needs or wants  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.78819004,  1.2119654 , -1.37741803, -1.39229351,  0.67650049,
       -0.17133095, -0.07837855, -0.83816479, -0.68469913,  0.10487291]))
('context_embed_out', array([ 0.78819004,  1.2119654 , -1.37741803, -1.39229351,  0.67650049,
       -0.17133095, -0.07837855, -0.83816479, -0.68469913,  0.10487291]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [7.0])
producing top 20 words for new embedding
producing top 20 simwords
needs: 0.8440691210764573
wants: 0.8317902022045065
ability: 0.8205192004223192
desires: 0.8104363382889546
want: 0.79858702760884
understands: 0.7824217500173147
thinks: 0.7816180565533751
need: 0.7796058096131986
desire: 0.7791854372237479
perceives: 0.7785509022380886

inverclyde
197 1.10063161098
45
 ___  is the preaching of the gospel or the practice of giving information about a particular doctrine or set of beliefs to others with the intention of converting or conversion of others to the christian faith  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.02870932, -0.84273499, -1.1325975 ,  1.02794142,  1.69649644,
        0.83010381, -0.05022299,  0.18525777, -0.56187328, -0.42578957]))
('context_embed_out', array([ 0.02870932, -0.84273499, -1.1325975 ,  1.02794142,  1.69649644,
        0.83010381, -0.05022299,  0.18525777, -0.56187328, -0.42578957]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [16.0])
producing top 20 words for new embedding
producing top 20 simwords
faith: 0.8413334741176275
beliefs: 0.8104963586074214
teachings: 0.8019300135644808
preaching: 0.8

hawaii
1049 1.14043470812
49
 ___  is the systematic destruction of all or a significant part of a racial ethnic religious or national group  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-1.2548502 , -0.40046015, -0.71322287,  0.58638947,  0.25331135,
       -1.02049924, -0.29729992, -1.46769691,  0.60626917, -1.28670338]))
('context_embed_out', array([-1.2548502 , -0.40046015, -0.71322287,  0.58638947,  0.25331135,
       -1.02049924, -0.29729992, -1.46769691,  0.60626917, -1.28670338]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [9.0])
producing top 20 words for new embedding
producing top 20 simwords
ethnic: 0.839804252045476
racial: 0.81602245286243
religious: 0.8006035910836176
ethnical: 0.7816684941483087
ethno: 0.7808435941831107
minorities: 0.7802571483399513
systematic: 0.775774742058504
racialization: 0.7752547038938256
cultu

dodge
2683 1.24228396559
54
 ___  formerly spelled wykeham is a small historic village and civil parish in hampshire southern england located about three miles north of fareham  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.7456612 , -3.81980653,  1.15890369, -2.16431139, -1.30058488,
       -2.57150335, -2.15217293, -0.18809796,  0.62610362, -0.1733796 ]))
('context_embed_out', array([ 0.7456612 , -3.81980653,  1.15890369, -2.16431139, -1.30058488,
       -2.57150335, -2.15217293, -0.18809796,  0.62610362, -0.1733796 ]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [16.0])
producing top 20 words for new embedding
producing top 20 simwords
leckford: 0.8494808315727556
scawby: 0.8411366291222915
perranzabuloe: 0.8398724267419158
shinfield: 0.8379825925321477
mickleton: 0.8375534599052308
whitstone: 0.8

fascism
58 1.29700782477
59
 ___  abbreviated from american standard code for information interchange is a character encoding scheme  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-0.23822558, -0.99803961, -0.74180568, -0.0149061 , -1.21746975,
       -1.10543484, -0.2611392 ,  0.40880043,  1.85768812, -1.04685227]))
('context_embed_out', array([-0.23822558, -0.99803961, -0.74180568, -0.0149061 , -1.21746975,
       -1.10543484, -0.2611392 ,  0.40880043,  1.85768812, -1.04685227]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [9.0])
producing top 20 words for new embedding
producing top 20 simwords
encoding: 0.819957796494692
code: 0.8037022249902552
coding: 0.797229636049982
iscii: 0.7925542938346914
26262: 0.7895445999256477
639-1: 0.7836575284232421
edifact: 0.7823512554012504
autosar: 0.7800157001290422
140-2: 0.7791156375908412
incit

candy
20 1.35758328546
63
 ___  is a district of south west london within the london borough of  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 1.77063454,  0.03001391, -1.03582431,  0.2603986 , -1.0891891 ,
       -1.01410849, -0.51857766, -0.36885352,  0.68220218, -0.26208202]))
('context_embed_out', array([ 1.77063454,  0.03001391, -1.03582431,  0.2603986 , -1.0891891 ,
       -1.01410849, -0.51857766, -0.36885352,  0.68220218, -0.26208202]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
is a district of south west london within the london borough of  ___   .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 1.77063454,  0.03001391, -1.03582431,  0.2603986 , -1.0891891 ,
       -1.01410849, -0.51857766, -0.36885352,  0.68220218, -0.26208202]))
('context_embed_out', array([ 1.77063454,  0.03001391, -1.03582431,  0.2603986 , 

holland
4351 1.37148191435
67
 ___  or is the day of the week following friday and preceding sunday  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.21585111, -0.25725117,  0.78148122,  0.86785953,  1.28435724,
       -0.05363486,  0.01779146,  1.1008184 ,  1.28213986, -0.26247363]))
('context_embed_out', array([ 0.21585111, -0.25725117,  0.78148122,  0.86785953,  1.28435724,
       -0.05363486,  0.01779146,  1.1008184 ,  1.28213986, -0.26247363]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [6.0])
producing top 20 words for new embedding
producing top 20 simwords
sunday: 0.8733060849547026
friday: 0.872279399279015
monday: 0.8651872465186858
thursday: 0.8636468886477429
saturday: 0.856833002059255
day: 0.8517265497889079
week: 0.8508147051929552
tuesday: 0.8431882365988286
wednesday: 0.8271264351817655
morning: 0.8086040074473471
weekend: 0.808543569

paranormal
5394 1.65539322013
72
 ___  is a chemical element with symbol i and atomic number 53  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-0.03179501,  0.45248215, -0.50858312, -0.11370138,  0.07146294,
       -0.84148238, -0.13966478,  0.23905833,  0.77285884, -0.32538947]))
('context_embed_out', array([-0.03179501,  0.45248215, -0.50858312, -0.11370138,  0.07146294,
       -0.84148238, -0.13966478,  0.23905833,  0.77285884, -0.32538947]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [6.0])
producing top 20 words for new embedding
producing top 20 simwords
atomic: 0.8188097081208456
chemical: 0.8086421878104606
element: 0.7688981296657705
fermium: 0.7658015163248548
seaborgium: 0.7656553496821932
darmstadtium: 0.7651173141416083
rutherfordium: 0.7641769814997044
bohrium: 0.7641551741421024
ununseptium: 0.7630710706036087
roentgenium: 0.76075267298

flour
33 2.68597944974
77
 ___  often abbreviated to chemo and sometimes ctx or ctx is a category of cancer treatment that uses chemical substances especially one or more anti cancer drugs chemotherapeutic agents that are given as part of a standardized regimen  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-2.99942796,  1.42865107,  0.28370716, -0.40396592, -1.13518089,
       -1.11562847, -3.02218296,  0.56318833,  3.6374354 , -1.27860309]))
('context_embed_out', array([-2.99942796,  1.42865107,  0.28370716, -0.40396592, -1.13518089,
       -1.11562847, -3.02218296,  0.56318833,  3.6374354 , -1.27860309]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
often abbreviated to chemo and sometimes ctx or ctx is a category of cancer treatment that uses chemical substances especially one or more 

remarkable
222008 2.68830329087
80
 ___  or is a calcium carbonate or lime rich mud or mudstone which contains variable amounts of clays and silt  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.21991662, -2.45047771, -0.17930065, -2.2364975 ,  0.82038497,
        1.39421163, -0.84265464,  0.93216126, -0.16921205,  0.98816359]))
('context_embed_out', array([ 0.21991662, -2.45047771, -0.17930065, -2.2364975 ,  0.82038497,
        1.39421163, -0.84265464,  0.93216126, -0.16921205,  0.98816359]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
or  ___  is a calcium carbonate or lime rich mud or mudstone which contains variable amounts of clays and silt  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.21991662, -2.45047771, -0.17930065, -2.2364975 ,  0.82038497,
        1.39421163, -0.8

dominoes
3 3.02790181755
84
 ___  marketed as is one of the earliest arcade video games it is a tennis sports game featuring simple two dimensional graphics  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-1.28333808, -0.47983714,  0.23375413,  0.48868254,  0.71980425,
       -2.63745444, -0.04957249,  0.75062905,  0.79340674,  2.30085867]))
('context_embed_out', array([-1.28333808, -0.47983714,  0.23375413,  0.48868254,  0.71980425,
       -2.63745444, -0.04957249,  0.75062905,  0.79340674,  2.30085867]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
marketed as  ___  is one of the earliest arcade video games it is a tennis sports game featuring simple two dimensional graphics  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-1.28333808, -0.47983714,  0.2

sardinia
2 3.52954692286
88
 ___  typically refers to a mixture of different gases produced by the breakdown of organic matter in the absence of oxygen  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 1.18134957, -0.46943762, -0.13374119, -0.43846538, -0.35799033,
        0.57779253, -0.86642105,  0.9729013 ,  0.89797549,  0.1596962 ]))
('context_embed_out', array([ 1.18134957, -0.46943762, -0.13374119, -0.43846538, -0.35799033,
        0.57779253, -0.86642105,  0.9729013 ,  0.89797549,  0.1596962 ]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [11.0])
producing top 20 words for new embedding
producing top 20 simwords
polysulfides: 0.8277472573674238
digestate: 0.8228428680398998
photodegradation: 0.8200438672467849
organobromine: 0.8187190262105704
hydroperoxides: 0.8176055397467754
methanethiol: 0.8169097966530092
radiolysis: 

abergavenny
185 3.54812056865
93
 ___  is the art of creating images with an assemblage of small pieces of colored glass stone or other materials  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([-0.60984241,  0.442928  ,  0.26890923, -1.22759319,  1.68507736,
       -2.15029245, -0.795968  ,  0.20610835, -0.52740974,  1.00029541]))
('context_embed_out', array([-0.60984241,  0.442928  ,  0.26890923, -1.22759319,  1.68507736,
       -2.15029245, -0.795968  ,  0.20610835, -0.52740974,  1.00029541]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [10.0])
producing top 20 words for new embedding
producing top 20 simwords
glass: 0.8322165709669812
handpainted: 0.8176813458102818
ceramic: 0.8173943621783828
semiprecious: 0.8160796686363783
intaglios: 0.8086709042715532
patinated: 0.8083052166985818
silkscreens: 0.8079513321942686
woodcarvings: 

adenocarcinoma
263 3.55373728744
97
 ___  is the most populous city in nigeria the second fastest growing city in africa and the seventh in the world  .
('per word weights', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
('context_embed original', array([ 0.48297391,  0.82376051,  0.48350553,  0.58420712, -0.14238946,
       -2.04540177, -1.13648639, -0.09205657,  0.35276502, -0.56196348]))
('context_embed_out', array([ 0.48297391,  0.82376051,  0.48350553,  0.58420712, -0.14238946,
       -2.04540177, -1.13648639, -0.09205657,  0.35276502, -0.56196348]))
('3', <type 'cupy.core.core.ndarray'>)
('weight mode', False)
('context_embedtype', <type 'cupy.core.core.ndarray'>)
('context_weights', [10.0])
producing top 20 words for new embedding
producing top 20 simwords
populous: 0.7855135777441853
city: 0.7818646792522204
africa: 0.7772822177075019
fastest: 0.7766669758295832
fourth: 0.7744795991498692
third: 0.7696171595669328
world: 0.7664508168331746
fifth: 0.7663172990332183
koinadu