# Compare Transfomer Models Before and After Compression

In [54]:
# import timeit
import os
import timeit
import numpy as np
import pandas as pd
from tqdm import tqdm
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer, util

In [31]:
st_time = timeit.default_timer()
model = SentenceTransformer("msmarco-distilbert-base-tas-b")
sentences = [
    "the fifty mannequin heads floating in the pool kind of freaked them out",
    "she swore she just saw her sushi move",
    "he embraced his new life as an eggplant",
    "my dentist tells me that chewing bricks is very bad for your teeth",
    "the dental specialist recommended an immediate stop to flossing with construction materials"
]

embeddings = model.encode(sentences)
print(embeddings.shape)
print("Time taken: ", timeit.default_timer() - st_time)

  return torch._C._cuda_getDeviceCount() > 0


(5, 768)
Time taken:  1.4741140305995941


In [5]:
from pathlib import Path
from typing import List

from optimum.onnxruntime import ORTModelForFeatureExtraction
from transformers import AutoTokenizer

In [6]:
import torch
import torch.nn.functional as F
from transformers import Pipeline


class SentenceEmbeddingPipeline(Pipeline):
    def _sanitize_parameters(self, **kwargs):
        # we don't have any hyperameters to sanitize
        self.preprocess_params = {}
        return self.preprocess_params, {}, {}

    def preprocess(self, inputs):
        encoded_inputs = self.tokenizer(
            inputs, padding=True, truncation=True, return_tensors="pt"
        )
        return encoded_inputs

    def _forward(self, model_inputs):
        outputs = self.model(**model_inputs)
        return {"outputs": outputs, "attention_mask": model_inputs["attention_mask"]}

    def postprocess(self, model_outputs):
        # Perform pooling
        sentence_embeddings = self.mean_pooling(
            model_outputs["outputs"], model_outputs["attention_mask"]
        )
        # Normalize embeddings
        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
        return sentence_embeddings

    def mean_pooling(self, model_output, attention_mask):
        token_embeddings = model_output[
            0
        ]  # First element of model_output contains all token embeddings
        input_mask_expanded = (
            attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        )
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
            input_mask_expanded.sum(1), min=1e-9
        )


In [7]:
onnx_path = Path("../ml/model/sentence-transformers/msmarco-distilbert-base-tas-b.onnx")
model = ORTModelForFeatureExtraction.from_pretrained(onnx_path)
tokenizer = AutoTokenizer.from_pretrained(onnx_path)
pipeline = SentenceEmbeddingPipeline(model=model, tokenizer=tokenizer)

def generate_embeddings(inputs: List[str]):
    embeddings = pipeline(inputs)
    return embeddings

In [8]:
foo = generate_embeddings("hello world, haha!")
print(foo.shape)

torch.Size([1, 768])


In [None]:
st_time = timeit.default_timer()
onnx_embeddings = generate_embeddings(sentences)
print(len(onnx_embeddings))
print("Time taken: ", timeit.default_timer() - st_time)

In [29]:
onnx_embeddings[0].shape

torch.Size([1, 768])

## Try Elastic Search

In [41]:
books = pd.read_csv("../search/books_embeddings.csv")
books.head()

Unnamed: 0,title,authors,description,link,average_rating,text_reviews_count,embeddings
0,The Blood Royal (Joe Sandilands #9),Barbara Cleverly,"A story of murder, mystery and espionage (with...",https://www.goodreads.com/book/show/10215672-t...,3.62,86,"[-0.15909269452095032, -0.1371370106935501, 0...."
1,Wedding at King's Convenience (Kings of Califo...,Maureen Child,Everyone did Jefferson King's bidding. Except ...,https://www.goodreads.com/book/show/11147472-w...,3.37,3,"[0.28028225898742676, 0.00014651630772277713, ..."
2,Alcibiades,Plato,The Alcibiades was widely read in antiquity as...,https://www.goodreads.com/book/show/1132944.Al...,3.82,10,"[-0.2626037299633026, 0.15822897851467133, 0.2..."
3,Crazy Enough: A Memoir,Storm Large,"Yes,Storm Large is her real name, though she's...",https://www.goodreads.com/book/show/11459673-c...,3.77,131,"[-0.1144125685095787, -0.3899981379508972, 0.0..."
4,"The Second Spy (The Books of Elsewhere, #3)",Jacqueline West,"In Olive's third adventure, what lurks below t...",https://www.goodreads.com/book/show/11737314-t...,4.29,93,"[0.2180931568145752, -0.09221331775188446, -0...."


In [57]:
# convert the string representation of the embeddings to numpy arrays
books["embeddings"] = books["embeddings"].apply(lambda x: np.fromstring(x[1:-1], sep=","))

In [58]:
books.head()

Unnamed: 0,title,authors,description,link,average_rating,text_reviews_count,embeddings
0,The Blood Royal (Joe Sandilands #9),Barbara Cleverly,"A story of murder, mystery and espionage (with...",https://www.goodreads.com/book/show/10215672-t...,3.62,86,"[-0.15909269452095032, -0.1371370106935501, 0...."
1,Wedding at King's Convenience (Kings of Califo...,Maureen Child,Everyone did Jefferson King's bidding. Except ...,https://www.goodreads.com/book/show/11147472-w...,3.37,3,"[0.28028225898742676, 0.00014651630772277713, ..."
2,Alcibiades,Plato,The Alcibiades was widely read in antiquity as...,https://www.goodreads.com/book/show/1132944.Al...,3.82,10,"[-0.2626037299633026, 0.15822897851467133, 0.2..."
3,Crazy Enough: A Memoir,Storm Large,"Yes,Storm Large is her real name, though she's...",https://www.goodreads.com/book/show/11459673-c...,3.77,131,"[-0.1144125685095787, -0.3899981379508972, 0.0..."
4,"The Second Spy (The Books of Elsewhere, #3)",Jacqueline West,"In Olive's third adventure, what lurks below t...",https://www.goodreads.com/book/show/11737314-t...,4.29,93,"[0.2180931568145752, -0.09221331775188446, -0...."


In [59]:
client = Elasticsearch("http://localhost:9200")
# construct the index
client.indices.delete(index="goodreads_index", ignore_unavailable=True)

# setup mappings
mappings = {
    "properties": {
        "title": {"type": "text"},
        "authors": {"type": "text"},
        "link": {"type": "text"},
        "description": {"type": "text"},
        "average_rating": {"type": "float"},
        # we are using different names for text_review_counts
        "reviews_count": {"type": "integer"},
        "embeddings": {
            "type": "dense_vector",
            "dims": 768,
            "index": "true",
            "similarity": "cosine",
        },
    }
}

# create the index
client.indices.create(index="goodreads_index", mappings=mappings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'goodreads_index'})

In [60]:
# when we add the dataset into the index, later we could use
# bulk API to add the data in batches
# format should be a list of dictionaries

# ealsticsearch uses 'operations' to refer to documents
t_start = timeit.default_timer()
operations = []
for i, row in books.iterrows():
    # tell elasticsearch where to store the data
    operations.append(
        {
            "index": {
                "_index": "goodreads_index"
            }
        }
    )
    book = {
            "title": row["title"],
            "authors": row["authors"],
            "link": row["link"],
            "description": row["description"],
            "average_rating": row["average_rating"],
            "reviews_count": row["text_reviews_count"],
            # make embeddings as a array
            "embeddings": row["embeddings"]
        }

    operations.append(book)

# add the data to the index
# if you have used postgres, this is similar to a commit

# since we have a lot of data, we will construct the data in batches
# and then add the data in batches

# we will add 1000 books at a time
batch_size = 1000
n_batches = len(operations) // batch_size
bulk_batches = [operations[i*batch_size:(i+1)*batch_size] for i in range(n_batches)]

# use tqdm to show a progress bar
for i, batch in enumerate(tqdm(bulk_batches)):
    if i % 100 == 0:
        print(f"Adding batch {i} to the index")
    client.bulk(index = "goodreads_index", operations=batch, refresh = True)
    
t_end = timeit.default_timer()
print(f"Time taken to add data to the index: {t_end - t_start} ms for {books.shape[0]} books")

  0%|          | 0/10 [00:00<?, ?it/s]

Adding batch 0 to the index


100%|██████████| 10/10 [00:04<00:00,  2.31it/s]

Time taken to add data to the index: 4.546241268515587 ms for 5083 books





In [62]:
# initialize the sentence transformer model
# it has to be the same model that was used to generate the embeddings
model = SentenceTransformer("msmarco-distilbert-base-tas-b")
# query based on the embeddings
# num_candidates has to be >> k
st_time = timeit.default_timer()
response = client.search(
    index="goodreads_index",
    knn = {
        "field": "embeddings",
        "query_vector": model.encode("A relaxing book to read on a rainy day"),
        "k": 3,
        "num_candidates": 100
    }
)

print(response)
print(f"Time taken to query the index: {timeit.default_timer() - st_time} ms")



Time taken to query the index: 0.02622746303677559 ms


In [50]:
# query based on the title
st_time = timeit.default_timer()
response = client.search(
    index="goodreads_index",
    query={
        "match": {
            "title": {
                "query": "Blood"
            }
        }
    }
)
print(f"Time taken to query the index: {timeit.default_timer() - st_time} ms")

Time taken to query the index: 0.00192289799451828 ms


In [32]:
test_query = "A romantic book for a sunny afternoon"
query_embedding1 = generate_embeddings(test_query)
query_embedding2 = model.encode(test_query)


In [63]:
response = client.search(
    index="goodreads_index",
    knn = {
        "field": "embeddings",
        "query_vector": query_embedding1[0].tolist(),
        "k": 5,
        "num_candidates": 50
    }
)

In [64]:
response

ObjectApiResponse({'took': 3, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 5, 'relation': 'eq'}, 'max_score': 0.89194703, 'hits': [{'_index': 'goodreads_index', '_id': 'ot6Wm48BvVabLECUayXj', '_score': 0.89194703, '_source': {'title': 'Lessness and More', 'authors': 'Cesar Nascimento', 'link': 'https://www.goodreads.com/book/show/16059163-lessness-and-more', 'description': "Lessness and More is a fresh poetic experiment that will be certainly most rewarding to the reader. Its themes are varied, from weather reports to approaching death.\nExcerpts:\nTHAT SUMMER AFTERNOON\nThat summer was not easy on us\nBut one afternoon the clouds came\nAnd the shadows gave way to quiet light\nAs we walked to the park half regretting\nHaving cursed the heat\nWe were tired of summer, and afraid of winter\nLying by me, you read a book, occasionally\nTurning your head to see what I was doing\nBut I was reading too\nAnd I would bend ov

In [38]:
def pretty_response(response):
    if len(response['hits']['hits']) == 0:
        print('Your search returned no results.')
    else:
        for hit in response['hits']['hits']:
            id = hit['_id']
            score = hit['_score']
            title = hit['_source']['title']
            author = hit['_source']['authors']
            description = hit['_source']['description']
            link = hit['_source']['link']
            reviews_count = hit['_source']['reviews_count']
            pretty_output = (f"\nID: {id}\nTitle: {title}\nAuthor: {author}\nDescription: {description}\nLink: {link}\nScore: {score}\n Reviews Count: {reviews_count}\n")
            print(pretty_output)

In [65]:
pretty_response(response)


ID: ot6Wm48BvVabLECUayXj
Title: Lessness and More
Author: Cesar Nascimento
Description: Lessness and More is a fresh poetic experiment that will be certainly most rewarding to the reader. Its themes are varied, from weather reports to approaching death.
Excerpts:
THAT SUMMER AFTERNOON
That summer was not easy on us
But one afternoon the clouds came
And the shadows gave way to quiet light
As we walked to the park half regretting
Having cursed the heat
We were tired of summer, and afraid of winter
Lying by me, you read a book, occasionally
Turning your head to see what I was doing
But I was reading too
And I would bend over to your side
And kiss you lightly on the head
It was a quiet summer afternoon, and
We had all life can offer
The rest being vanity and excess
I flipped a page and turned to you
To kiss you on the head
It was the last of that hot summer
But winter would be warm
A DOG IS A MAN'S BEST FRIEND
Should you peek
At the fugitive
Picture of death?
How will you face
Your death
