In [1]:
from nltk.corpus import wordnet as wn
from nltk import word_tokenize, pos_tag

In [2]:
def penn_to_wn(tag):
    """ 
        Convert between a Penn Treebank tag to a simplified Wordnet tag 
    """
    if tag.startswith('N'):
        return 'n'

    if tag.startswith('V'):
        return 'v'

    if tag.startswith('J'):
        return 'a'

    if tag.startswith('R'):
        return 'r'

    return None
 
def tagged_to_synset(word, tag):
    
    wn_tag = penn_to_wn(tag)
    if wn_tag is None:
        return None

    try:
        return wn.synsets(word, wn_tag)[0]
    except:
        return None
    
def sentence_similarity(sentence1, sentence2):
    """ 
        reference: https://nlpforhackers.io/wordnet-sentence-similarity/
        compute the sentence similarity using Wordnet 
    """
    # Tokenize and tag
    sentence1 = pos_tag(word_tokenize(sentence1))
    sentence2 = pos_tag(word_tokenize(sentence2))
    # Get the synsets for the tagged words

    synsets1 = [tagged_to_synset(*tagged_word) for tagged_word in sentence1]
    synsets2 = [tagged_to_synset(*tagged_word) for tagged_word in sentence2]
    
    # Filter out the Nones
    synsets1 = [ss for ss in synsets1 if ss]
    synsets2 = [ss for ss in synsets2 if ss]
    
    score, count = 0.0, 0

    # For each word in the first sentence
    for synset in synsets1:
        # Get the similarity value of the most similar word in the other sentence
        scores = [synset.path_similarity(ss) for ss in synsets2]
        scores = [ss for ss in scores if ss]
        if len(scores) > 0:
            best_score = max(scores)
            score += best_score
            count += 1
            
    # Average the values
    if count != 0:
        score /= count
        return score
    else:
        return 0

def symmetric_sentence_similarity(sentence1, sentence2):
    """ compute the symmetric sentence similarity using Wordnet """
    return (sentence_similarity(sentence1, sentence2) + sentence_similarity(sentence2, sentence1)) / 2 

class MyCompleter(object):  # Custom completer

    def __init__(self, options):
        self.freq = {}
        self.add(options)
        
    def add(self, options):
        for word in options:
            if word not in self.freq:
                self.freq[word] = 1
            else:
                self.freq[word] = self.freq[word] + 1
                
    def complete_frequency(self, text, k):
        if text:  # cache matches (entries that start with entered text)
            self.matches = [(key,value) for (key,value) in self.freq.items()
                                if key and key.startswith(text)]
        
        sorted_d = sorted(self.matches, key=lambda x: x[1], reverse=True)
        
        if len(self.matches) > k:
            sorted_d = sorted_d[:k]
            
        # the output is sorted in frequency, it depends on corpus or user, we could set more weight on words which user used 
        return [key for (key,value) in sorted_d]
    

    
    def complete_similarity(self, text, k):
        if text:  # cache matches (entries that start with entered text)
            self.matches = [(key, value, symmetric_sentence_similarity(key, text)) for (key,value) in self.freq.items()]
       
        print(self.matches)
        sorted_d = sorted(self.matches, key=lambda x: x[2], reverse=True)
        
        if len(self.matches) > k:
            sorted_d = sorted_d[:k]
        return [key for (key, value, score) in sorted_d]
    
    
completer = MyCompleter(["hello", "hi", "hi", "how", "goodbye", "great"])

In [3]:
# get result from 
print(completer.complete_frequency("h",2))

['hi', 'hello']
[('hello', 1, 0.6549075924075924), ('hi', 2, 0.6549075924075924), ('how', 1, 0.0), ('goodbye', 1, 0.15490759240759241), ('great', 1, 0.0), ('Hello Mr. Jade, how are you', 1, 0.7795138888888888)]
['Hello Mr. Jade, how are you']
[('hello', 1, 0.6946386946386947), ('hi', 2, 0.6946386946386947), ('how', 1, 0.0), ('goodbye', 1, 0.16130536130536133), ('great', 1, 0.0), ('Hello Mr. Jade, how are you', 1, 0.6313657407407407)]
['hello']


In [4]:
completer.add(["Hello Mr. Jade, how are you"])
print(completer.complete_similarity("Hello Mr. Jade, h", 1))
print(completer.complete_similarity("Hello Mr. Joe, h", 1))
print(symmetric_sentence_similarity("Hello Mr. Jade, how are you", "Hello Mr. Jade, h"))

0.7795138888888888
