In [1]:
from gensim.models import KeyedVectors
import gensim.downloader as api
import numpy as np

In [2]:
word_vectors = api.load("glove-wiki-gigaword-100")

### Skip-Gram

In [41]:
def skipgram(text, model, topn=3):
    result = {}

    for word in text.lower().split():
        if word in model:
            print(f"Word: {word}")
            print(model.most_similar(word, topn=topn))

            result[word] = model.most_similar(word, topn=topn)
            
    return result if result else None

### Continuous Bag of Words

In [42]:
def cbow(text, model, topn=1):
    vectors = []

    for word in text.lower().split():
        if word in model:
            vectors.append(model[word])
    
    if not vectors:
        return None
    
    avg_vector = np.mean(vectors, axis=0)  

    predictions = model.similar_by_vector(avg_vector, topn=topn)
    print(predictions)
    return predictions

### Test

Royal

In [43]:
text1 = "king queen throne castle royal"


In [44]:
print("Skip-gram results:")
skipgram(text1, word_vectors)

print("\nCBOW results:")
cbow(text1, word_vectors)

print()

Skip-gram results:
Word: king
[('prince', 0.7682328820228577), ('queen', 0.7507690787315369), ('son', 0.7020887136459351)]
Word: queen
[('princess', 0.7947245240211487), ('king', 0.7507691383361816), ('elizabeth', 0.7355712056159973)]
Word: throne
[('heir', 0.7213122248649597), ('king', 0.6919990181922913), ('emperor', 0.6828049421310425)]
Word: castle
[('manor', 0.7622005939483643), ('mansion', 0.6870322823524475), ('fortress', 0.6852313876152039)]
Word: royal
[('imperial', 0.7388318181037903), ('british', 0.7135697603225708), ('queen', 0.7065026164054871)]

CBOW results:
[('queen', 0.8731532096862793)]



Emotions

In [45]:
text2 = "pain joy love sad angry"

In [46]:
print("Skip-gram results:")
skipgram(text2, word_vectors)

print("\nCBOW results:")
cbow(text2, word_vectors)

print()

Skip-gram results:
Word: pain
[('discomfort', 0.78193199634552), ('suffering', 0.774366021156311), ('pains', 0.752337634563446)]
Word: joy
[('sadness', 0.7273973226547241), ('delight', 0.708582878112793), ('sorrow', 0.7009647488594055)]
Word: love
[('me', 0.73828125), ('passion', 0.7352136373519897), ('my', 0.7327208518981934)]
Word: sad
[('sorry', 0.7547181248664856), ('awful', 0.7284239530563354), ('tragic', 0.7238599061965942)]
Word: angry
[('furious', 0.8143534660339355), ('outraged', 0.7746473550796509), ('enraged', 0.7717769742012024)]

CBOW results:
[('feeling', 0.8278279304504395)]



Idiom

In [47]:
text3 = "hit the road"  

In [48]:
print("Skip-gram results:")
skipgram(text3, word_vectors)

print("\nCBOW results:")
cbow(text3, word_vectors)

print()

Skip-gram results:
Word: hit
[('hits', 0.8204647898674011), ('hitting', 0.804127037525177), ('struck', 0.8035117983818054)]
Word: the
[('this', 0.8573122620582581), ('part', 0.8507950305938721), ('one', 0.8503074645996094)]
Word: road
[('highway', 0.8092237710952759), ('route', 0.7784003019332886), ('roads', 0.7482966184616089)]

CBOW results:
[('the', 0.8342995047569275)]

