## 1) Find Odd One Out

#### In this task we are given a list of words and we're supposed to find the odd one out.

In [1]:
import numpy as np
import gensim
from gensim.models import word2vec
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin',binary=True)

In [3]:
input_1 = ["apple","mango","juice","party","orange"]
input_2 = ["music","dance","sleep","dancer","food"]
input_3 = ["match","player","football","cricket","dancer"]
input_4 = ["india","paris","russia","france","germany"]

In [4]:
def oddOneOut(words):
    
    """This function accepts a list of words and returns the odd one out."""
    
    all_word_vectors = [word_vectors[w] for w in words]
    
    #Find average vector of all the words
    avg_vector = np.mean(all_word_vectors,axis=0)
    
    odd_one_out = None
    min_sim = 1.0
    
    for w in words:
        #Check similarity of each word with the average vector and select the one with least similarity.
        sim = cosine_similarity([word_vectors[w]],[avg_vector])
        
        print("Similarity of %s with the average word is : %.2f"%(w,sim))
        if(sim < min_sim):
            odd_one_out = w
            min_sim = sim
    
    
    return odd_one_out
        

In [5]:
oddOneOut(input_1)

Similarity of apple with the average word is : 0.78
Similarity of mango with the average word is : 0.76
Similarity of juice with the average word is : 0.71
Similarity of party with the average word is : 0.36
Similarity of orange with the average word is : 0.65


'party'

In [6]:
oddOneOut(input_2)

Similarity of music with the average word is : 0.66
Similarity of dance with the average word is : 0.81
Similarity of sleep with the average word is : 0.51
Similarity of dancer with the average word is : 0.72
Similarity of food with the average word is : 0.52


'sleep'

In [7]:
oddOneOut(input_3)

Similarity of match with the average word is : 0.58
Similarity of player with the average word is : 0.68
Similarity of football with the average word is : 0.72
Similarity of cricket with the average word is : 0.70
Similarity of dancer with the average word is : 0.53


'dancer'

In [8]:
oddOneOut(input_4)

Similarity of india with the average word is : 0.81
Similarity of paris with the average word is : 0.75
Similarity of russia with the average word is : 0.79
Similarity of france with the average word is : 0.81
Similarity of germany with the average word is : 0.84


'paris'

## 2) Word Analogies

#### In this task we complete the sentence "if a is to b then c is to ___"

In [9]:
def predict_word(a,b,c):
    """This function accepts a triad of words a,b and c and predicts a word d such that a is to b : b is to d."""
    
    a,b,c = a.lower(),b.lower(),c.lower()
    
    wa,wb,wc = word_vectors[a],word_vectors[b],word_vectors[c]
    
    max_sim = -100
    d = None
    
    words = word_vectors.vocab.keys()
    
    for w in words:
        if w in [a,b,c]:
            continue
        
        ww = word_vectors[w]
        sim = cosine_similarity([wb - wa],[ww - wc])
        
        if sim > max_sim:
            max_sim = sim
            d = w
            
    
    return d

In [10]:
triad_1 = ("man","woman","prince")
predict_word(*triad_1)

'princess'