In [27]:
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
import os
import time
import pandas as pd
import dotenv
dotenv.load_dotenv()

True

In [13]:
token = os.getenv("RUNPOD_TOKEN")
open_ai_base_url = os.getenv("RUNPOD_EMBEDDING_URL")
model_name = os.getenv("MODEL_NAME")
pinecone_api_key = os.getenv("PINECONE_SECRET")
pinecone_index_name = os.getenv("PINECONE_INDEX_NAME")

In [14]:
pc = Pinecone(api_key=pinecone_api_key)
client = OpenAI(
    api_key=token,
    base_url=open_ai_base_url
)

In [15]:
df = pd.read_json("products/products.jsonl", lines=True)
df.head()

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Latte,Coffee,"Smooth and creamy, our latte combines rich esp...","[Espresso, Steamed Milk, Milk Foam]",4.75,4.8,latte.jpg
2,Chocolate Chip Biscotti,Bakery,"Crunchy and delightful, this chocolate chip bi...","[Flour, Sugar, Chocolate Chips, Eggs, Almonds,...",2.5,4.6,chocolate_biscotti.jpg
3,Espresso shot,Coffee,"A bold shot of rich espresso, our espresso is ...",[Espresso],2.0,4.9,espresso.jpg
4,Hazelnut Biscotti,Bakery,These delicious hazelnut biscotti are perfect ...,"[Flour, Sugar, Hazelnuts, Eggs, Baking Powder]",2.75,4.4,hazelnut_biscotti.jpg


In [16]:
df["text"] = df["name"] + " : " + df["description"] + \
" -- Ingredients: " + df["ingredients"].astype(str) + \
" -- Price: " + df["price"].astype(str) + \
" -- Rating: " + df["rating"].astype(str) 

In [17]:
df["text"].head()

0    Cappuccino : A rich and creamy cappuccino made...
1    Latte : Smooth and creamy, our latte combines ...
2    Chocolate Chip Biscotti : Crunchy and delightf...
3    Espresso shot : A bold shot of rich espresso, ...
4    Hazelnut Biscotti : These delicious hazelnut b...
Name: text, dtype: object

In [18]:
texts = df["text"].to_list()

In [19]:
with open("products/Harvest_Roast_about_us.txt", "r") as f:
    harvest_roast_about_section = f.read()

harvest_roast_text = "Coffee shop Harvest Roast's way about section: " + harvest_roast_about_section
texts.append(harvest_roast_text)

In [20]:
with open("products/menu_items_txt.txt", "r") as f:
    menu_items = f.read()

menu_items_text = "Menu items: " + menu_items
texts.append(menu_items_text)

# Generate Embeddings

In [21]:
output = client.embeddings.create(input=texts, model=model_name)

In [22]:
embeddings = output.data

In [26]:
len(embeddings[0].embedding)

384

# Push data to database

In [28]:
pc.create_index(
    name=pinecone_index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [29]:
# wait for the index to be ready
while not pc.describe_index(pinecone_index_name).status.ready:
    time.sleep(1)

index = pc.Index(pinecone_index_name)

vectors = []

for text, e in zip(texts, embeddings):
    entry_id = text.split(":")[0]
    vectors.append(
        {
            "id": entry_id,
            "values": e.embedding,
            "metadata": {"text": text}
        }
    )

index.upsert(vectors=vectors, namespace="ns1")

{'upserted_count': 19}

# Get closest documents

In [30]:
output = client.embeddings.create(input=["Is cappuccino lactose-free?"], model=model_name)
embedding = output.data[0].embedding

In [33]:
results = index.query(
    namespace="ns1",
    vector=embedding,
    top_k=3,
    include_values = False,
    include_metadata = True
)

In [34]:
results

{'matches': [{'id': 'Cappuccino ',
              'metadata': {'text': 'Cappuccino : A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 4.5 -- Rating: 4.7"},
              'score': 0.734804273,
              'values': []},
             {'id': 'Sugar Free Vanilla syrup ',
              'metadata': {'text': 'Sugar Free Vanilla syrup : Enjoy the sweet '
                                   'flavor of vanilla without th