## imports

In [54]:
from openai import OpenAI
import os
from sentence_transformers import SentenceTransformer
from huggingface_hub import snapshot_download
import faiss
import numpy as np
import openai
import pandas as pd
from dotenv import load_dotenv
import pickle
load_dotenv()

True

In [2]:
client = OpenAI(
    api_key=os.getenv("TOKEN"),
    base_url=os.getenv("BASE_URL"),
)
model_name = os.getenv("MODEL_NAME")

In [4]:
# Download the model to a local directory
local_model_path = "../bge-small-en"
# download the model
# snapshot_download(repo_id="BAAI/bge-small-en", local_dir=local_model_path)

# Now load the locally saved model
model = SentenceTransformer(local_model_path)

## trying out embeddings

In [7]:
ouput = model.encode("hello world",normalize_embeddings=True)
len(ouput)

384

## wrangle dataset

In [16]:
image_folder_path = './products/images/'
products_df = pd.read_json('products/products.jsonl', lines=True)
products_df.head(2)

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp


In [17]:
products_df['text'] = products_df['name'] + ' : ' + products_df['description']+\
                      ' -- Ingredients: ' + products_df['ingredients'].astype(str) +\
                      ' -- Price: ' + products_df['price'].astype(str) +\
                      ' -- Rating: ' + products_df['rating'].astype(str)
products_df['text'].head(2)

0    Cappuccino : A rich and creamy cappuccino made...
1    Jumbo Savory Scone : Deliciously flaky and but...
Name: text, dtype: object

In [18]:
texts = products_df['text'].tolist()
texts[:2]

["Cappuccino : A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations. -- Ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam'] -- Price: 4.5 -- Rating: 4.7",
 "Jumbo Savory Scone : Deliciously flaky and buttery, this jumbo savory scone is filled with herbs and cheese, creating a mouthwatering experience. Perfect for a hearty snack or a light lunch, it pairs beautifully with your favorite coffee or tea. -- Ingredients: ['Flour', 'Butter', 'Cheese', 'Herbs', 'Baking Powder', 'Salt'] -- Price: 3.25 -- Rating: 4.3"]

In [19]:
# about section
with open("products/Merry's_way_about_us.txt") as f:
    Merry_about_us = f.read()
Merry_about_us = "coffe shop merry's way about section : " + Merry_about_us
texts.append(Merry_about_us)    

# menus item
with open("products/menu_items_text.txt") as f:
    menu_items = f.read()
menu_items = "Menu Items : " + menu_items
texts.append(menu_items)  

In [None]:
# Save to a text as a pickle file
with open("data.pkl", "wb") as f:
    pickle.dump(texts, f)

'Menu Items : Menu Items\n\nCappuccino - $4.50\nJumbo Savory Scone - $3.25\nLatte - $4.75\nChocolate Chip Biscotti - $2.50\nEspresso shot - $2.00\nHazelnut Biscotti - $2.75\nChocolate Croissant - $3.75\nDark chocolate (Drinking Chocolate) - $5.00\nCranberry Scone - $3.50\nCroissant - $3.25\nAlmond Croissant - $4.00\nGinger Biscotti - $2.50\nOatmeal Scone - $3.25\nGinger Scone - $3.50\nChocolate syrup - $1.50\nHazelnut syrup - $1.50\nCarmel syrup - $1.50\nSugar Free Vanilla syrup - $1.50\nDark chocolate (Packaged Chocolate) - $3.00'

In [None]:
# Load from pickle file
with open("data.pkl", "rb") as f:
    loaded_data = pickle.load(f)

print(loaded_data)

["Cappuccino : A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations. -- Ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam'] -- Price: 4.5 -- Rating: 4.7", "Jumbo Savory Scone : Deliciously flaky and buttery, this jumbo savory scone is filled with herbs and cheese, creating a mouthwatering experience. Perfect for a hearty snack or a light lunch, it pairs beautifully with your favorite coffee or tea. -- Ingredients: ['Flour', 'Butter', 'Cheese', 'Herbs', 'Baking Powder', 'Salt'] -- Price: 3.25 -- Rating: 4.3", "Latte : Smooth and creamy, our latte combines rich espresso with velvety steamed milk, creating a perfect balance of flavor and texture. Enjoy it as a comforting treat any time of day, whether you're starting your morning or taking a midday break. -- Ingredients: ['Espress

## Generate Embeddings

In [22]:
ouput = model.encode(texts,normalize_embeddings=True)
len(ouput)

20

In [25]:
# Convert to numpy array
embeddings = np.array(ouput, dtype=np.float32)

# Ensure it's 2D (FAISS requires shape: (num_vectors, dimension))
if embeddings.ndim != 2:
    raise ValueError("Embeddings must be a 2D NumPy array of shape (num_vectors, dimension)")

# Create FAISS index
dimension = embeddings.shape[1]  # Should be 384
index = faiss.IndexFlatL2(dimension)  # L2 similarity index

# Add vectors to FAISS index
index.add(embeddings)

# Save the FAISS index
index_file = "faiss_product.index"
faiss.write_index(index, index_file)

print(f"Stored {embeddings.shape[0]} vectors of dimension {embeddings.shape[1]} successfully!")


Stored 20 vectors of dimension 384 successfully!


## get the closest documents or context

In [46]:
# Generate query embedding
query_embedding = model.encode('is cappuccino lactose free?', normalize_embeddings=True)

# Ensure query_embedding is a 2D array (1, dimension)
query_embedding = np.array(query_embedding, dtype=np.float32).reshape(1, -1)


# Load FAISS index
index = faiss.read_index("faiss_product.index")

# Retrieve top-k similar documents
D, I = index.search(query_embedding, 2)
retrieved_docs = [texts[i] for i in I[0]]
context = "\n".join(retrieved_docs)

print(context)

Cappuccino : A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations. -- Ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam'] -- Price: 4.5 -- Rating: 4.7
Menu Items : Menu Items

Cappuccino - $4.50
Jumbo Savory Scone - $3.25
Latte - $4.75
Chocolate Chip Biscotti - $2.50
Espresso shot - $2.00
Hazelnut Biscotti - $2.75
Chocolate Croissant - $3.75
Dark chocolate (Drinking Chocolate) - $5.00
Cranberry Scone - $3.50
Croissant - $3.25
Almond Croissant - $4.00
Ginger Biscotti - $2.50
Oatmeal Scone - $3.25
Ginger Scone - $3.50
Chocolate syrup - $1.50
Hazelnut syrup - $1.50
Carmel syrup - $1.50
Sugar Free Vanilla syrup - $1.50
Dark chocolate (Packaged Chocolate) - $3.00
