In [4]:
from pinecone import Pinecone, ServerlessSpec
import os
from openai import OpenAI
import pandas as pd
from time import time
import dotenv

dotenv.load_dotenv()

True

In [5]:
openai_api_key = os.getenv("OPENAI_API_KEY") 
openai_model_name = os.getenv("OPENAI_EMBEDDING_MODEL_NAME") 
pinecone_api_key = os.getenv("PINECONE_API_KEY") 

In [6]:
pc = Pinecone(api_key=pinecone_api_key)

client = OpenAI(
  api_key=openai_api_key, 
)

### Try out embeddings

In [8]:
output = client.embeddings.create(input = ["helloo there"],model=openai_model_name)
embedings = output.data[0].embedding

In [9]:
print(embedings)

[0.026682760566473007, -0.05748298019170761, -0.006223286036401987, 0.01130956131964922, -0.009271014481782913, -0.0031183708924800158, 0.001998179592192173, 0.03245528042316437, -0.0362498015165329, -0.04297767952084541, -0.022632578387856483, -0.025646667927503586, 0.0037777030374854803, 0.019201360642910004, 0.019201360642910004, 0.038483455777168274, -0.05516859143972397, 0.0013304377207532525, -0.0035287714563310146, 0.07502928376197815, 0.05414595454931259, -0.0008628502255305648, -0.017896153032779694, 0.03517334163188934, 0.006811975501477718, 0.04356973245739937, 0.05635269731283188, 0.04090549051761627, 0.01746556907892227, 0.012137089855968952, 0.010858793742954731, -0.05489947646856308, 0.005627869162708521, 0.021246636286377907, -0.027409370988607407, -0.013859426602721214, 0.0009839519625529647, 0.0374608188867569, -0.002627236070111394, -0.05037834495306015, -0.02638673409819603, -0.05293493717908859, 0.058559440076351166, 0.03641127049922943, -0.02486623264849186, -0.01

In [10]:
len(embedings)

1536

### Wrangle dataset

In [11]:
df=pd.read_json('products/products.jsonl',lines=True)

In [12]:
df.head(2)

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp


In [13]:
df['text'] =  df['name']+" : "+df['description'] + \
                " -- Ingredients: " + df['ingredients'].astype(str) + \
                " -- Price: " + df['price'].astype(str) + \
                " -- rating: " + df['rating'].astype(str) 

In [14]:
df['text'].head()

0    Cappuccino : A rich and creamy cappuccino made...
1    Jumbo Savory Scone : Deliciously flaky and but...
2    Latte : Smooth and creamy, our latte combines ...
3    Chocolate Chip Biscotti : Crunchy and delightf...
4    Espresso shot : A bold shot of rich espresso, ...
Name: text, dtype: object

In [15]:
texts = df['text'].tolist()

In [16]:
with open('products/Merry\'s_way_about_us.txt') as f:
    Merry_way_about_section = f.read()
    
Merry_way_about_section = "Coffee shop Merry's Way about section: " + Merry_way_about_section
texts.append(Merry_way_about_section)

In [17]:
with open('products/menu_items_text.txt') as f:
    menue_items_text = f.read()
    
menue_items_text = "Menu Items: " + menue_items_text
texts.append(menue_items_text)

### Generate Embeddings

In [18]:
output = client.embeddings.create(input = texts,model=openai_model_name)

In [19]:
embeddings = output.data

### Push data to database

In [None]:
# index_name = os.getenv("PINECONE_INDEX_NAME")

# pc.create_index(
#     name=index_name,
#     dimension=1536, # Replace with your model dimensions
#     metric="cosine", # Replace with your model metric
#     spec=ServerlessSpec(
#         cloud="aws",
#         region="us-east-1"
#     ) 
# )

In [21]:
# Wait for the index to be ready
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

index = pc.Index(index_name)

vectors = []
for text, e in zip(texts, embeddings):
    entry_id = text.split(":")[0].strip()
    vectors.append({
        "id": entry_id,
        "values": e.embedding,
        "metadata": {'text': text}
    })
    
index.upsert(
    vectors=vectors,
    namespace="ns1"
)

{'upserted_count': 20}

### Get Closest documents

In [22]:
output = client.embeddings.create(input = ["Is Cappuccino lactose-free?"],model=openai_model_name)
embeding = output.data[0].embedding

In [23]:
results = index.query(
    namespace="ns1",
    vector=embeding,
    top_k=3,
    include_values=False,
    include_metadata=True
)

print(results)

{'matches': [{'id': 'Cappuccino',
              'metadata': {'text': 'Cappuccino : A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 4.5 -- rating: 4.7"},
              'score': 0.606460929,
              'values': []},
             {'id': 'Latte',
              'metadata': {'text': 'Latte : Smooth and creamy, our latte '
                                   'combines rich espresso with velvety '
                 