# Embeddings and Index

In [1]:
import sys
current_dir = "/Users/josephtolsma/Documents/dev/yelp_rag"
sys.path.insert(0,current_dir)

In [2]:
import os
from sentence_transformers import SentenceTransformer
from typing import List
import pandas as pd
import numpy as np
import faiss
from src.config import DATA_DIR_PROC, EMBEDDING_MODEL_NAME,EMBED_BATCH_SIZE,\
                        COL_RESTAURANT_ID,INDEX_METRIC,INDEX_DIR

In [3]:
chunks_df = pd.read_parquet(os.path.join(DATA_DIR_PROC,"review_chunks.parquet"))

In [4]:
def load_embedding_model(model_name,device = "mps") -> SentenceTransformer:
    """
    Load a local embedding model using sentence_transformer library
    """
    return SentenceTransformer(model_name,device)

In [5]:
def embed_texts(
        model:SentenceTransformer,
        texts:List[str],
        batch_size:int = 64,
        normalize_flag:bool = True
) -> np.ndarray:
    
    embeddings = model.encode(
        sentences = texts,
        batch_size = batch_size,
        normalize_embeddings = normalize_flag,
        show_progress_bar = True,
        convert_to_numpy = True,
    )

    return embeddings.astype(np.float32,copy = False)

In [6]:
texts = chunks_df["chunk"].tolist()

In [7]:
model = load_embedding_model(EMBEDDING_MODEL_NAME,"mps")

X = embed_texts(model,texts,batch_size=EMBED_BATCH_SIZE, normalize_flag=True)

Batches:   0%|          | 0/39 [00:00<?, ?it/s]

In [8]:
def build_faiss_index(X,metric = "cosine"):

    assert X.dtype == np.float32
    n,d = X.shape

    if metric == "cosine":
        index = faiss.IndexFlatIP(d)
    elif metric == "l2":
        index = faiss.IndexFlatL2(d)
    else:
        raise ValueError("metric must be 'cosine' or 'l2'")
    
    index.add(X)
    return index

In [9]:
def create_faiss_by_restaurant(df,embeddings):
    for restaurant_id, idx in df.groupby(COL_RESTAURANT_ID).groups.items():
        idx_list = list(idx)
        X_r = embeddings[idx_list]
        meta_r = df.loc[idx_list].copy()

        index = build_faiss_index(X_r,metric = INDEX_METRIC)
        faiss.write_index(index,os.path.join(INDEX_DIR,f"{restaurant_id}.faiss"))

        meta_r.reset_index(drop = True).to_parquet(os.path.join(INDEX_DIR,f"{restaurant_id}_meta.parquet"),engine = "pyarrow",index = False)

In [10]:
create_faiss_by_restaurant(chunks_df,X)

In [11]:
# rid = chunks_df[COL_RESTAURANT_ID].iloc[0]
# index = faiss.read_index(os.path.join(INDEX_DIR,f"{rid}.faiss"))
# meta = pd.read_parquet(os.path.join(INDEX_DIR,f"{rid}_meta.parquet"))
# assert index.ntotal == len(meta)
# print(f"OK: {index.ntotal} vectors")

In [12]:
r_id = chunks_df[COL_RESTAURANT_ID].sample(random_state=42).values[0]
index = faiss.read_index(os.path.join(INDEX_DIR,f"{r_id}.faiss"))
meta = pd.read_parquet(os.path.join(INDEX_DIR,f"{r_id}_meta.parquet"))
query = "I have dietary restrictions and my restrictions make my life difficult."
query_encoded = model.encode([query],convert_to_numpy=True,normalize_embeddings=True)

In [13]:
pd.set_option("display.max_colwidth",None)
D,I = index.search(query_encoded,k=5)
chunks = meta.loc[I[0]]["chunk"].tolist()
pd.Series(chunks)


0                                                                                                I am vegan so finding something in New Orleans has been hard. These guys made me a special order. They even have soy milk! So happy!!
1                                                                         Sat at the counter in front of the kitchen and now I want to have everything. Good portions. Friendly service. Looks like our breakfast place for this trip.
2    This place is amazing! It is rare to go to a place with a small menu and want to try everything. The bread is to die for and everything we tried (we came here everyday of our trip) was amazing. I cant wait to come back again.
3               After waiting in line for 20 mins, my wife and I were denied service because I have I service dog that I need with me at all times. Very disappointing to be denied when service dogs are protected under federal law.
4                                                                           

In [14]:
meta

Unnamed: 0,chunk_index,chunk,business_id,review_id,restaurant_name,chunk_id,n_chars,stars,date
0,0,Came to French toast on our trip to New Orleans for breakfast! It was the best breakfast I had on our whole trip. I got the salmon toast with an over hard egg. The fried chicken and waffles looked delicious but too heavy for us because we were flying out that morning. The bread is fluffy and fresh. I also got a latte which was amazing!! I'm pretty sure everything on the menu is delicious and you can't go wrong. Staff was friendly as well! This would definitely be a place I would go back to when I visit Again in the future,MG_wIwRBwyNnCAEMXe9Jqw,KMh--tSMtHrrFjw1ruaGIw,French Toast,KMh--tSMtHrrFjw1ruaGIw_0,527,5.0,2018-11-16 18:45:13
1,0,"A very accessible place for breakfast on Decatur St in the French Quarter. Great selection and reasonable prices. Clean and bright with very good service. I had the crepes with mushroom, onion, spinach and goat cheese which was ample and perfectly done. My partner had the avocado toast with sunny egg which she loved.",MG_wIwRBwyNnCAEMXe9Jqw,FDCy6i2guGYQObxTAI0p-Q,French Toast,FDCy6i2guGYQObxTAI0p-Q_0,318,5.0,2018-10-20 15:10:08
2,0,"Fantastic breakfast spot. Always packed. The Marci Gras French toast, stuffed with cinnamon cream cheese was pretty great. Enough for two.",MG_wIwRBwyNnCAEMXe9Jqw,jcXZ_vCjvogsMCHVNLggcA,French Toast,jcXZ_vCjvogsMCHVNLggcA_0,138,4.0,2018-12-18 02:39:36
3,0,Made reservations through OpenTable. Arrived and it was busy weekend so we were seated at bar (party of two). Mimosas for drinks and cool part is we could see through the receiving window and watch our food cooked. Energy was upbeat and food was delicious,MG_wIwRBwyNnCAEMXe9Jqw,ejZ3oHmlmgtjd1CYyd95fQ,French Toast,ejZ3oHmlmgtjd1CYyd95fQ_0,255,5.0,2019-02-18 04:05:16
4,0,"Great place to stop in when in town. Servers were great, food great and the hot sauce amazing! Definitely call ahead, hostess was great to work with about the wait and everything.",MG_wIwRBwyNnCAEMXe9Jqw,rHyZ8n-3T4m9L8-5RVGH-Q,French Toast,rHyZ8n-3T4m9L8-5RVGH-Q_0,179,5.0,2018-12-30 17:15:07
...,...,...,...,...,...,...,...,...,...
279,0,"We were looking for a nice breakfast spot off of the French Market area and had stumbled upon French Toast NOLA. I had the bacon, onion, and gruyere crepe, while the other in my party had the traditional French toast. We also had an order of the aebleskivers with the lemon curd and jam dipping sauces. The portions for the prices were huge! We were not expecting so much food. The crepe was savory and loaded with bacon and creamy gruyere. The French toast was just sweet enough without being overbearing and was not soggy or cakey like most other breakfast restaurants. Next time, we will definitely try the King Cake French Toast!",MG_wIwRBwyNnCAEMXe9Jqw,H42ANOt0vI4OHdjszMk5zA,French Toast,H42ANOt0vI4OHdjszMk5zA_0,633,5.0,2021-08-22 19:07:55
280,0,"Good food for sure! I ordered chicken and waffles with a cayenne pepper butter and syrup. Talk about good eating! My friends ordered the king cake French toast (It was super sweet but tasty) and the spinach crepes with a side salad. Everything was good from service to food. The sodas weren't really strong but that's okay; apparently they also had good sweet tea as well my friend ordered that. Even though we arrived 30 minutes before closing time our service was as if we were the first to order upon opening; I really liked that, we didn't feel rushed. So will I return? Of course!",MG_wIwRBwyNnCAEMXe9Jqw,4D0JJh7iVJxVLa7_uAymKg,French Toast,4D0JJh7iVJxVLa7_uAymKg_0,585,4.0,2019-08-01 15:15:03
281,0,First time going to New Orleans and in search of a great breakfast location for my family. Thanks to yelp we found this location. The service was amazing and coffee & food was even better. Will definitely come back again.,MG_wIwRBwyNnCAEMXe9Jqw,fPJOUCA-eJIPtHo0Vr0xFw,French Toast,fPJOUCA-eJIPtHo0Vr0xFw_0,221,5.0,2020-03-22 19:03:57
282,0,"wait took forever, they lied about our table being available 2 hours early. staff was extremely rude. food wasn't even that good, except for those pancake balls which were incredible.",MG_wIwRBwyNnCAEMXe9Jqw,_eI6aUNdAE8QFuf8KnZXcA,French Toast,_eI6aUNdAE8QFuf8KnZXcA_0,183,1.0,2021-06-09 17:09:02
