In [1]:
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
import pandas as pd
import time
import os
import dotenv

dotenv.load_dotenv()

True

In [2]:
token = os.getenv("RUNPOD_TOKEN")
open_ai_base_url = os.getenv("RUNPOD_EMBEDDING_URL")
model_name = os.getenv("MODEL_NAME")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX_NAME")

In [3]:
pinecone_api_key

'pcsk_5ouLiu_8Qp2zmHFJgRwMCG3epyvTuwaaKQd1XmQT84rM1vWTLjtMZ9rhoJRmpWQ88nvqEV'

In [4]:
pc = Pinecone(api_key=pinecone_api_key)
client = OpenAI(api_key=token, base_url=open_ai_base_url)

Try out Embeddings

In [5]:
output = client.embeddings.create(input=["hello world"], model=model_name)
embedding = output.data[0].embedding
print(embedding)

[0.015176702290773392, -0.02265065535902977, 0.00859504658728838, -0.0742514431476593, 0.003908572718501091, 0.0027112148236483335, -0.031238075345754623, 0.044630181044340134, 0.04405056685209274, -0.007908663712441921, -0.02519790083169937, -0.03337348997592926, 0.014375921338796616, 0.04639952629804611, 0.008610299788415432, -0.016137639060616493, 0.007561658509075642, -0.019020449370145798, -0.11458028852939606, -0.018105272203683853, 0.1262945681810379, 0.029728032648563385, 0.025274166837334633, -0.0342886708676815, -0.041091494262218475, 0.006669359747320414, 0.010303379036486149, 0.022437114268541336, 0.00444242637604475, -0.12727075815200806, -0.01610713265836239, -0.020255940034985542, 0.047375716269016266, 0.011561748571693897, 0.06815025955438614, 0.00739006232470274, -0.017998501658439636, 0.04084744676947594, -0.01028049923479557, 0.02373361587524414, 0.010509294457733631, -0.028538301587104797, 0.008137458004057407, -0.015138569287955761, 0.030948270112276077, -0.0659538

In [6]:
len(embedding)

384

Wrangle dataset

In [7]:
df = pd.read_json("products/products.jsonl", lines=True)

In [8]:
df.head(2)

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",385,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",280,4.3,SavoryScone.webp


In [9]:
df["text"] = (
    df["name"]
    + " : "
    + df["description"]
    + " -- Ingredients: "
    + df["ingredients"].astype(str)
    + " -- Price: "
    + df["price"].astype(str)
    + " -- Rating: "
    + df["rating"].astype(str)
)

In [10]:
df["text"].head(2)

0    Cappuccino : A rich and creamy cappuccino made...
1    Jumbo Savory Scone : Deliciously flaky and but...
Name: text, dtype: object

In [11]:
texts = df["text"].tolist()

In [12]:
with open("products/Merry's_way_about_us.txt") as f:
    Merry_way_about_section = f.read()

Merry_way_about_section = (
    "Coffee shop Merry's Way about section: " + Merry_way_about_section
)
texts.append(Merry_way_about_section)

In [13]:
with open("products/menu_items_text.txt") as f:
    menu_items_text = f.read()

menu_items_text = "Menu Items: " + menu_items_text
texts.append(menu_items_text)
texts

["Cappuccino : A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations. -- Ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam'] -- Price: 385 -- Rating: 4.7",
 "Jumbo Savory Scone : Deliciously flaky and buttery, this jumbo savory scone is filled with herbs and cheese, creating a mouthwatering experience. Perfect for a hearty snack or a light lunch, it pairs beautifully with your favorite coffee or tea. -- Ingredients: ['Flour', 'Butter', 'Cheese', 'Herbs', 'Baking Powder', 'Salt'] -- Price: 280 -- Rating: 4.3",
 "Latte : Smooth and creamy, our latte combines rich espresso with velvety steamed milk, creating a perfect balance of flavor and texture. Enjoy it as a comforting treat any time of day, whether you're starting your morning or taking a midday break. -- Ingredients: ['Espres

Generate Embeddings

In [14]:
output = client.embeddings.create(input=texts, model=model_name)

In [15]:
embeddings = output.data
len(embeddings)

20

Push data to database

In [None]:
pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1",
    ),
)

In [17]:
# wait for the index to be ready
while not pc.describe_index(index_name).status.ready:
    time.sleep(1)

index = pc.Index(index_name)

vectors = []
for text, e in zip(texts, embeddings):
    print(text.split(":")[0])
    entry_id = text.split(":")[0]
    vectors.append({"id": entry_id, "values": e.embedding, "metadata": {"text": text}})

index.upsert(vectors=vectors, namespace="ns1")

Cappuccino 
Jumbo Savory Scone 
Latte 
Chocolate Chip Biscotti 
Espresso shot 
Hazelnut Biscotti 
Chocolate Croissant 
Dark chocolate 
Cranberry Scone 
Croissant 
Almond Croissant 
Ginger Biscotti 
Oatmeal Scone 
Ginger Scone 
Chocolate syrup 
Hazelnut syrup 
Carmel syrup 
Sugar Free Vanilla syrup 
Coffee shop Merry's Way about section
Menu Items


{'upserted_count': 20}

Get Closest Documents

In [18]:
output = client.embeddings.create(
    input=["Is Cappuccino lactose-free"], model=model_name
)
embedding = output.data[0].embedding

In [19]:
results = index.query(
    namespace="ns1",
    vector=embedding,
    top_k=3,
    include_values=False,
    include_metadata=True,
)

In [20]:
results

{'matches': [{'id': 'Cappuccino ',
              'metadata': {'text': 'Cappuccino : A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 385 -- Rating: 4.7"},
              'score': 0.746191,
              'values': []},
             {'id': 'Menu Items',
              'metadata': {'text': 'Menu Items: Menu Items\n'
                                   'Cappuccino - ₹385\n'
                                   'Jumbo Sa