In [1]:
# Importing the required libraries
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from openai import OpenAI
import os
import json
from dotenv import load_dotenv
from pprint import pprint
import pandas as pd
import time
import re
load_dotenv()

True

In [2]:
pinecone_api_key = os.environ.get("PINECONE_API_KEY")
runpod_embedding_model_endpoint_id = os.environ.get("RUNPOD_EMBEDDING_MODEL_ENDPOINT_ID")
runpod_api_key = os.environ.get("RUNPOD_API_KEY")

In [3]:
data = []
with open("products/products.jsonl") as f:
    for line in f:
        data.append(json.loads(line))

data = pd.DataFrame(data)
data.head()

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp
2,Latte,Coffee,"Smooth and creamy, our latte combines rich esp...","[Espresso, Steamed Milk, Milk Foam]",4.75,4.8,Latte.jpg
3,Chocolate Chip Biscotti,Bakery,"Crunchy and delightful, this chocolate chip bi...","[Flour, Sugar, Chocolate Chips, Eggs, Almonds,...",2.5,4.6,chocolat_biscotti.jpg
4,Espresso shot,Coffee,"A bold shot of rich espresso, our espresso is ...",[Espresso],2.0,4.9,Espresso_shot.webp


In [4]:
data = "product name: " + data["name"].astype(str) + "\n" + \
       "category: " + data["category"].astype(str) + "\n" + \
       "description: " + data["description"].astype(str) + "\n" + \
       "ingredients: " + data["ingredients"].astype(str) + "\n" + \
       "price: " + data["price"].astype(str) + "\n" + \
       "rating: " + data["rating"].astype(str)

data = data.tolist()

with open("products\menu_items_text.txt", encoding="UTF-8") as f:
    data.append(f.read())

with open("products\Merry's_way_about_us.txt", encoding="UTF-8") as f:
    data.append(f.read())

print(len(data))
print(data)

20
["product name: Cappuccino\ncategory: Coffee\ndescription: A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations.\ningredients: ['Espresso', 'Steamed Milk', 'Milk Foam']\nprice: 4.5\nrating: 4.7", "product name: Jumbo Savory Scone\ncategory: Bakery\ndescription: Deliciously flaky and buttery, this jumbo savory scone is filled with herbs and cheese, creating a mouthwatering experience. Perfect for a hearty snack or a light lunch, it pairs beautifully with your favorite coffee or tea.\ningredients: ['Flour', 'Butter', 'Cheese', 'Herbs', 'Baking Powder', 'Salt']\nprice: 3.25\nrating: 4.3", "product name: Latte\ncategory: Coffee\ndescription: Smooth and creamy, our latte combines rich espresso with velvety steamed milk, creating a perfect balance of flavor and texture. Enjoy it as a

In [5]:
embedding_client = OpenAI(
    base_url=f"https://api.runpod.ai/v2/{runpod_embedding_model_endpoint_id}/openai/v1",
    api_key=runpod_api_key,
)

def get_embeddings(embedding_client, model_name, input_text):
    response = embedding_client.embeddings.create(
        input=input_text,
        model=model_name
    )
    return response.data[0].embedding

model_name = "BAAI/bge-small-en-v1.5"
data_embeddings = []
for text in data:
    data_embeddings.append(get_embeddings(embedding_client, model_name, text))

print(len(data_embeddings))
pprint(data_embeddings)

20
[[-0.012874513864517212,
  -0.05732514336705208,
  0.0017427881248295307,
  0.013682983815670013,
  0.012897394597530365,
  -0.05262686312198639,
  0.03804388269782066,
  0.03386424109339714,
  -0.04167437180876732,
  -0.03825744241476059,
  -0.04271165654063225,
  -0.026542242616415024,
  -0.012355872429907322,
  -0.030965950340032578,
  0.042284540832042694,
  -0.024162594228982925,
  0.012325363233685493,
  -0.05720311030745506,
  -0.05781327933073044,
  -0.005502939224243164,
  0.07602674514055252,
  -0.04438961297273636,
  -0.11300283670425415,
  -0.03450491651892662,
  0.027442239224910736,
  -0.053389571607112885,
  0.0894504114985466,
  -0.009480462409555912,
  -0.017862625420093536,
  -0.14705012738704681,
  -0.031972724944353104,
  0.01900668628513813,
  -0.013789763674139977,
  -0.012592311948537827,
  -0.012127059511840343,
  -0.0032834585290402174,
  0.058972593396902084,
  -0.03018798865377903,
  0.023933781310915947,
  -0.002707613864913583,
  -0.022713448852300644,
 

In [6]:
# Initialize a Pinecone client with your API key
pc = Pinecone(api_key=pinecone_api_key)

# Create a serverless index
index_name = "coffee-shop-app-index"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-east-1'
        ) 
    ) 

# Wait for the index to be ready
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

In [7]:
# Target the index where you'll store the vector embeddings
index = pc.Index("coffee-shop-app-index")

# function to search for the appropriate id
def get_product_name(text):
    # Regular expression to match "product name:" followed by any word or phrase until a newline
    match = re.search(r"product name:\s*(.*)", text, re.IGNORECASE)
    if match:
        return match.group(1).strip()
    return None

# Prepare the records for upsert
# Each contains an 'id', the embedding 'values', and the original text as 'metadata'
records = []
for i, (d, e) in enumerate(zip(data, data_embeddings)):
    if i == len(data) -1:
        id = "Merry's Way Coffee"
    elif i == len(data) -2:
        id = "Menu Items"
    else:
        id = get_product_name(d)

    records.append({
        "id": id,
        "values": e,
        "metadata": {'text': d}
    })

# Upsert the records into the index
index.upsert(
    vectors=records,
    namespace="coffee-shop-app-namespace"
)

upserted_count: 20

In [8]:
time.sleep(10)  # Wait for the upserted vectors to be indexed

print(index.describe_index_stats())

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'coffee-shop-app-namespace': {'vector_count': 20}},
 'total_vector_count': 20}


In [9]:
# Define your query
query = "Tell me about the cappuccino coffee."

# Convert the query into a numerical vector that Pinecone can search with
query_embedding = get_embeddings(embedding_client, model_name, query)

# Search the index for the three most similar vectors
results = index.query(
    namespace="coffee-shop-app-namespace",
    vector=query_embedding,
    top_k=2,
    include_values=False,
    include_metadata=True
)

print(results)

{'matches': [{'id': 'Cappuccino',
              'metadata': {'text': 'product name: Cappuccino\n'
                                   'category: Coffee\n'
                                   'description: A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations.\n'
                                   "ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam']\n"
                                   'price: 4.5\n'
                                   'rating: 4.7'},
              'score': 0.7987942,
              'sparse_values': {'ind

In [21]:
relevant_results = "\n".join([result["metadata"]["text"] for result in results["matches"]])
print(relevant_results)

product name: Cappuccino
category: Coffee
description: A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations.
ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam']
price: 4.5
rating: 4.7
Menu Items

Cappuccino - $4.50
Jumbo Savory Scone - $3.25
Latte - $4.75
Chocolate Chip Biscotti - $2.50
Espresso shot - $2.00
Hazelnut Biscotti - $2.75
Chocolate Croissant - $3.75
Dark chocolate (Drinking Chocolate) - $5.00
Cranberry Scone - $3.50
Croissant - $3.25
Almond Croissant - $4.00
Ginger Biscotti - $2.50
Oatmeal Scone - $3.25
Ginger Scone - $3.50
Chocolate syrup - $1.50
Hazelnut syrup - $1.50
Carmel syrup - $1.50
Sugar Free Vanilla syrup - $1.50
Dark chocolate (Packaged Chocolate) - $3.00


In [22]:
pc.delete_index(index_name)