In [1]:
from pinecone import Pinecone, ServerlessSpec
import os
import pandas as pd
from time import time
import dotenv
from langchain_community.embeddings import HuggingFaceEmbeddings
dotenv.load_dotenv()


True

In [2]:
pinecone_api_key = os.getenv("PINECONE_API_KEY")
model_name = "sentence-transformers/all-MiniLM-L6-v2" 

In [3]:
pc = Pinecone(api_key=pinecone_api_key)

embedding_model = HuggingFaceEmbeddings(model_name=model_name)

  embedding_model = HuggingFaceEmbeddings(model_name=model_name)





## Try out embeddings

In [4]:

embeddings = embedding_model.embed_documents(["helloo there"])
embeddings[0]

[-0.07149805873632431,
 0.05053558945655823,
 0.009315689094364643,
 0.07794801145792007,
 -0.05504470318555832,
 -0.08670640736818314,
 0.062370289117097855,
 0.021486937999725342,
 -0.05310399830341339,
 0.01443465519696474,
 0.009361579082906246,
 -0.016171343624591827,
 -0.07071266323328018,
 -0.013639172539114952,
 -0.028334571048617363,
 0.03151993080973625,
 0.06559285521507263,
 0.0188792422413826,
 -0.1317654699087143,
 0.022574087604880333,
 -0.01794448308646679,
 0.02280488796532154,
 -0.1425701081752777,
 0.06657303124666214,
 -0.06678564846515656,
 -0.06102047115564346,
 0.022202422842383385,
 0.05682176351547241,
 -0.03793434426188469,
 0.01472124271094799,
 -0.030035153031349182,
 0.08263616263866425,
 0.06364276260137558,
 0.04875898361206055,
 0.03309981897473335,
 0.024811426177620888,
 -0.07450362294912338,
 -0.06536608934402466,
 0.033822279423475266,
 -0.022015586495399475,
 0.014845259487628937,
 -0.033400602638721466,
 0.02044857107102871,
 -0.041234295815229416,

In [5]:
len(embeddings[0])

384

## Wrangle dataset

In [6]:
df=pd.read_json('products/products.jsonl',lines=True)

In [7]:
df.head(2)

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp


In [8]:
df['text'] =  df['name']+" : "+df['description'] + \
                " -- Ingredients: " + df['ingredients'].astype(str) + \
                " -- Price: " + df['price'].astype(str) + \
                " -- rating: " + df['rating'].astype(str) 

In [9]:
df['text'].head()

0    Cappuccino : A rich and creamy cappuccino made...
1    Jumbo Savory Scone : Deliciously flaky and but...
2    Latte : Smooth and creamy, our latte combines ...
3    Chocolate Chip Biscotti : Crunchy and delightf...
4    Espresso shot : A bold shot of rich espresso, ...
Name: text, dtype: object

In [10]:
texts = df['text'].tolist()

In [11]:
len(texts)

18

In [None]:

with open('products/Coffee_Ghar_about_us.txt') as f:
    Coffee_Ghar_about_section = f.read()
    
Coffee_Ghar_about_section = "Coffee shop Coffee Ghar about section: " + Coffee_Ghar_about_section
texts.append(Coffee_Ghar_about_section)

In [13]:
with open('products/menu_items_text.txt') as f:
    menue_items_text = f.read()
    
menue_items_text = "Menu Items: " + menue_items_text
texts.append(menue_items_text)

## Generate Embeddings

In [14]:
embeddings = embedding_model.embed_documents(texts)


In [15]:
len(embeddings)

20

## Push data to database

In [16]:
index_name = "coffeeshop"

pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [17]:
# Wait for the index to be ready
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

# Connect to the index
index = pc.Index(index_name)

# Prepare vectors for insertion
vectors = []
for i, (text, emb) in enumerate(zip(texts, embeddings)):
    entry_id = text.split(":")[0].strip()  # Unique ID for each text
    vectors.append({
        "id": entry_id,
        "values": emb,  # Directly use emb (entire embedding)
        "metadata": {'text': text}
    })

# Upsert into Pinecone
index.upsert(
    vectors=vectors,
    namespace="ns1"
)

{'upserted_count': 20}

## Get Closest documents

In [18]:
output = embedding_model.embed_documents(["Is Cappuccino lactose-free?"])

In [19]:
embeding = output[0]

In [20]:
len(embeding)

384

In [21]:
results = index.query(
    namespace="ns1",
    vector=embeding,
    top_k=3,
    include_values=False,
    include_metadata=True
)

print(results)

{'matches': [{'id': 'Cappuccino',
              'metadata': {'text': 'Cappuccino : A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 4.5 -- rating: 4.7"},
              'score': 0.632982314,
              'values': []},
             {'id': 'Sugar Free Vanilla syrup',
              'metadata': {'text': 'Sugar Free Vanilla syrup : Enjoy the sweet '
                                   'flavor of vanilla without the 