In [2]:
import numpy as np
d = 64                           # dimension
nb = 100000                      # database size
nq = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.

In [3]:
import faiss                   # make faiss available
index = faiss.IndexFlatL2(d)   # build the index
print(index.is_trained)
index.add(xb)                  # add vectors to the index
print(index.ntotal)

True
100000


In [4]:
import pandas as pd
from bert_serving.client import BertClient

# Bert Client must be running locally
bc = BertClient()

# Read quote data
quotes = pd.read_csv('quotes.csv', sep=';', skiprows=1)
quotes

Unnamed: 0,QUOTE,AUTHOR,GENRE
0,Age is an issue of mind over matter. If you do...,Mark Twain,age
1,"Anyone who stops learning is old, whether at t...",Henry Ford,age
2,Wrinkles should merely indicate where smiles h...,Mark Twain,age
3,True terror is to wake up one morning and disc...,Kurt Vonnegut,age
4,A diplomat is a man who always remembers a wom...,Robert Frost,age
...,...,...,...
216,"In the information age, you don't teach philos...",Timothy Leary,age
217,Once I planned to write a book of poems entire...,Gilbert K. Chesterton,age
218,The harvest of old age is the recollection and...,Marcus Tullius Cicero,age
219,It is the spirit of the age to believe that an...,Gore Vidal,age


In [5]:
embeddings = bc.encode(quotes.QUOTE.to_list())
quotes['EMBEDDINGS'] = embeddings.tolist()

# Persist to pickle
quotes.to_pickle('data/embedded_quotes.pkl')

here is what you can do:
- or, start a new server with a larger "max_seq_len"
  '- or, start a new server with a larger "max_seq_len"' % self.length_limit)


In [6]:
def load_quotes_and_embeddings():
    quotes = pd.read_pickle('data/embedded_quotes.pkl')

    # change dtype in place for memory efficiency
    quotes['EMBEDDINGS'] = quotes['EMBEDDINGS'].apply(
        lambda arr: np.array(arr, dtype='float32')
    )

    quote_embeddings = np.stack(quotes.EMBEDDINGS.values)

    # reduce memory footprint by dropping column
    quotes.drop('EMBEDDINGS', axis='columns')

    # normalize embeddings for cosine distance
    embedding_sums = quote_embeddings.sum(axis=1)
    normed_embeddings = quote_embeddings / embedding_sums[:, np.newaxis]
    return quotes, normed_embeddings

quotes, embeddings = load_quotes_and_embeddings()

In [7]:
def create_index(embeddings):
    """
    Create an index over the quote embeddings for fast similarity search.
    """
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    return index

index = create_index(embeddings)

In [8]:
bc = BertClient()
text="I dreamed a dream."

text_embedding = bc.encode([text])
normalized_text_embedding = text_embedding / text_embedding.sum()

_, idx = index.search(normalized_text_embedding, 5)

relevant_quotes = quotes.iloc[idx.flatten()].QUOTE.values
relevant_authors = quotes.iloc[idx.flatten()].AUTHOR.values

In [9]:
for q in range(5):
    print('>'+relevant_quotes[q])
    print(relevant_authors[q])

>I don't feel old. I don't feel anything till noon. That's when it's time for my nap.
Bob Hope
>I think the biggest disease the world suffers from in this day and age is the disease of people feeling unloved. I know that I can give love for a minute, for half an hour, for a day, for a month, but I can give. I am very happy to do that, I want to do that.
Princess Diana
>Once I planned to write a book of poems entirely about the things in my pocket. But I found it would be too long and the age of the great epics is past.
Gilbert K. Chesterton
>I don't think of myself as a poor deprived ghetto girl who made good. I think of myself as somebody who from an early age knew I was responsible for myself, and I had to make good.
Oprah Winfrey
>Dressing up is a bore. At a certain age, you decorate yourself to attract the opposite sex, and at a certain age, I did that. But I'm past that age.
Katharine Hepburn
