In [8]:
import numpy as np
from openai import OpenAI
import faiss
import sqlite3
import argparse
from os.path import exists
import torch
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
from dotenv import load_dotenv

In [9]:
load_dotenv()

True

In [10]:
class Embedder:
    def __init__(self, model_name=None, to_cuda=True, client=None, use_openai=True, attn_implementation=None):
        self.use_openai = use_openai
        if use_openai:
            self.model_name = model_name
            if client is None:
                self.client = OpenAI()
            else:
                self.client = client
        else: # Load a PyTorch model and tokenizer

            # The attention implementation to use in the model (if relevant). Can be any of 
            # `"eager"` (manual implementation of the attention), 
            # `"sdpa"` (using [`F.scaled_dot_product_attention`](https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html)), 
            # or `"flash_attention_2"` (using [Dao-AILab/flash-attention](https://github.com/Dao-AILab/flash-attention)). 
            # By default, if available, SDPA will be used for torch>=2.1.1. The default is otherwise the manual `"eager"` implementation.
            self.attn_implementation = attn_implementation
            self.model_name = model_name
            self.to_cuda = to_cuda

            if attn_implementation:
                self.model = AutoModel.from_pretrained(model_name, 
                                    trust_remote_code=True, 
                                    attn_implementation="flash_attention_2", 
                                    torch_dtype=torch.float16).to('cuda' if to_cuda else 'cpu')
            else:
                self.model = AutoModel.from_pretrained(model_name, 
                                    trust_remote_code=True, 

                                    torch_dtype=torch.float16).to('cuda' if to_cuda else 'cpu')
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model.eval()

    def get_embedding(self, text: str) -> np.ndarray:
        if self.use_openai:
            query_embedding_response = self.client.embeddings.create(
                model=self.model_name,
                input=text
            )
            return np.array(query_embedding_response.data[0].embedding, dtype='f')
        else:
            return np.array(self.encode([text])[0], dtype='f')

            
    def weighted_mean_pooling(self, hidden, attention_mask):
        attention_mask_ = attention_mask * attention_mask.cumsum(dim=1)
        s = torch.sum(hidden * attention_mask_.unsqueeze(-1).float(), dim=1)
        d = attention_mask_.sum(dim=1, keepdim=True).float()
        reps = s / d
        return reps

    @torch.no_grad()
    def encode(self, input_texts):
        batch_dict = self.tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt', return_attention_mask=True).to('cuda' if self.to_cuda else 'cpu')
        
        outputs = self.model(**batch_dict)
        attention_mask = batch_dict["attention_mask"]
        hidden = outputs.last_hidden_state

        reps = self.weighted_mean_pooling(hidden, attention_mask)   
        embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
        return embeddings

In [11]:
CPM_embedder = Embedder(model_name="openbmb/MiniCPM-Embedding",
                    use_openai=False, 
                    attn_implementation="flash_attention_2",
                    to_cuda=True)

CPM_embedder.get_embedding("Hello, world!")

You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.29s/it]


array([ 0.01122869, -0.02335457, -0.002179  , ..., -0.01329134,
       -0.02295973,  0.01809652], dtype=float32)

In [12]:
GPT_embedder = Embedder(model_name="text-embedding-3-small",
                        use_openai=True)

GPT_embedder.get_embedding("Hello, world!")

array([-0.01918462, -0.02527903, -0.00171952, ..., -0.02264216,
        0.00563363, -0.01059459], dtype=float32)

In [13]:
## This version of faiss search requires a sqlite3.Connection object AND an IVFPQ index object
def faiss_search(
    query: str,
    con: sqlite3.Connection,
    index: faiss.Index,
    embedder: Embedder,
    top: int = 5
) -> tuple[list[str], list[float]]:
    """Returns a list of strings and relatednesses, sorted from most related to least."""
    cur = con.cursor()
    query_embedding = embedder.get_embedding(query)

    # I is a list of list of index
    # D is a list of list of error (relatedness)
    D, I = index.search(np.array([query_embedding], dtype='f'), top)
    # print(I)
    # print(D)

    related_text = []

    #### This doesn't work:
    #### row = 1000
    #### cur.execute("SELECT content FROM reviews WHERE row_number=?", (row,)).fetchall()
    #### But this works:
    #### cur.execute("SELECT content FROM reviews WHERE row_number=?", (1000,)).fetchall()

    ### probably b/c how python treat one-element tuple w/ variable differently...

    ### Current workaround is to first
    ### eval(f"({row},)") 

    for row, err in zip(I[0], D[0]):
        ## retrieve corresponding row from db
        input = eval(f"({row},)")
        content = cur.execute('SELECT content FROM reviews WHERE row_number=?', input).fetchone()[0]
        related_text.append((content, err))

    # might not needed
    if len(related_text) == 0:
        related_text.append(("No results found.", -1))

    related_text.sort(key=lambda x: x[1], reverse=True)
    strings, relatednesses = zip(*related_text)
    return strings[:top], relatednesses[:top]

In [15]:
con_GPT = sqlite3.connect("../Reviews.db")
con_CPM = sqlite3.connect("../stories_cn_test.db")

index_GPT = faiss.read_index("../IVFPQ_index.bin")
index_CPM = faiss.read_index("../stories_cn_ebd.index")

In [17]:
faiss_search("I love apples, any recommendation for apple-related products?", con_GPT, index_GPT, GPT_embedder)

(('Summary: I thought that the Sprout apple are the best...|Text: ...I was wrong! The taste is sweet and sour, and texture just so good (unbelievably good). No regret, proud to find this product. Now, I am buying only this applesauce!"',
  "Summary: Sweet, Yummy, Super-Crispy|Text: *****<br />These apple chips are sweet, very crispy, and yummy! They don't have the processed or chemically taste that some dried apples have. They also are not soggy at all--I like my apple chips very crispy! They are 100% certified organic, baked with no added sugar (but still are quite sweet), no preservatives or chemicals, no pesticides or herbicides, gluten free, and made from Washington State apples with five apples in every bag. There are 6 servings in every 2.6 ounce bag, with 29 calories in each serving. These are a truly healthy and delicious snack.<br /><br />Highly recommended.<br />*****",
  'Summary: An Apple A Day? Actually 1 1/2 apples|Text: I have a sulfite allergy and was tremendously excit

In [18]:
faiss_search("太阳为什么从东边升起？", con_CPM, index_CPM, CPM_embedder)

(('name: 为什么天上会打雷|category: 寓言故事|content: 上帝造好人以后，本来是和人一起住在地上的。那时候，上帝和人混得很熟。人们都很尊敬上帝，天冷的时候，送柴禾给他取暖；缺吃的时候，送食物给他充饥。上帝呢，对大家是一视同仁，不管人间有什么矛盾去找他，他都能公平合理地给调解妥当，让双方心眼口服。因此，那时候，地上的人都生活得十分安宁和欢乐。可是以后上帝老了，人就慢慢对他冷淡起来了。例如旱季到了，天气凉了，人们围着火堆烤火，上帝也凑过来取暖，人们就往外推他。有一个女人甚至拿捣木薯的木杵①捣伤了他的眼睛。上帝生气了，便离开了人间回到了天上。上帝一走，地上可就乱了套了。那些酋长办事不公，正义得不到伸张，邪恶却到处蔓延，人间开始笼罩着痛苦和不幸。一天，上帝打开窗子往地上看，明白了地上发生的一切，他很同情人们的遭遇，便造了一座大桥，一头连着自己的房子，一头连着大地。这样，桥把天和地连接起来，人们如果有事便可以沿着桥去找他了。于是，人间又有了正义，人们又重新获得了安宁与欢乐的生活。这四个人全都怒容满面，三个女人指责她们的丈夫——这个男人抛弃了她们，又爱上了第四个老婆；这男人争辩说没有这回事。上帝听不清楚，让他们一个个地轮流讲，但是他们不听，总是四个喉咙一起响，上帝被吵得头昏脑胀，因此发起火来。上帝发火的声音大极了，不仅天上听得清清楚楚，连地上也听得清清楚楚。人们听到这可怕的声音，吓得赶快躲进屋里去了。发完火，上帝便对四个吵架的人说：“所有的动物都对我唯命是从，只有你们人不遵守我的规矩。你们赶快回去吧，从今以后，人间的事我再也不管了。”从此，人便无法到天上去了，但人间的事上帝仍然了如指掌。为什么天上会打雷一看到人间有什么不顺心的事发生，上帝就发火。一发火，人们就听到了那可怕的声音。这声音就是今天人们常常听到的雷声。\n\n\t①非洲人吃木薯一般有两种方法：一种是像中国人吃红薯一样蒸，另一种是把木薯埋在泥里沤几天，扒出来晒干，再用木杵在臼里捣碎，然后做成糕蒸熟或放油里炸。',
  'name: 天为什么这么高|category: 寓言故事|content: 听说从前天并不像现在这么高，天和地是离得很近的。多近呢？人站在地上一伸手就能很容易地把天摸到。上帝一直是住在天上的，他为地上造了人，造了动物，造了植物。他把他造的人当成自己的孩子，为人准备了美味可口的食