<a href="https://colab.research.google.com/github/prakhar00007/vector_db/blob/main/pinecone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pinecone

In [None]:
import pinecone
pinecone.__version__

'8.0.0'

In [None]:
from pinecone import Pinecone, ServerlessSpec

In [None]:
documents = [
    {
        "id": "doc-001",
        "text": "Pinecone is a fully managed vector database for search and recommendation.",
        "category": "documentation",
        "tag": "pinecone",
        "difficulty": "beginner",
        "url": "https://example.com/pinecone-intro"
    },
    {
        "id": "doc-002",
        "text": "To use Pinecone with Python, you create an index and upsert vectors with metadata.",
        "category": "documentation",
        "tag": "python",
        "difficulty": "beginner",
        "url": "https://example.com/pinecone-python"
    },
    {
        "id": "doc-003",
        "text": "Vector databases store embeddings that capture semantic meaning for semantic search.",
        "category": "blog",
        "tag": "vector-db",
        "difficulty": "intermediate",
        "url": "https://example.com/vector-db-concepts"
    },
    {
        "id": "doc-004",
        "text": "You can filter Pinecone search results using metadata such as category or difficulty.",
        "category": "faq",
        "tag": "metadata",
        "difficulty": "beginner",
        "url": "https://example.com/pinecone-metadata"
    },
    {
        "id": "doc-005",
        "text": "In Retrieval-Augmented Generation, a vector database like Pinecone stores document chunks.",
        "category": "blog",
        "tag": "rag",
        "difficulty": "intermediate",
        "url": "https://example.com/rag-pinecone"
    }
]


In [None]:
pc = Pinecone(api_key="YOUR_API_KEY")

In [None]:
pc


<pinecone.pinecone.Pinecone at 0x7909703df9e0>

In [None]:
import requests
import numpy as np
from typing import List, Union

EURON_API_KEY = "YOUR_API_KEY"

def generate_embeddings(texts: Union[str, List[str]]):
    # Always make the input a list
    if isinstance(texts, str):
        texts = [texts]

    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURON_API_KEY}"
    }
    payload = {
        "input": texts,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()

    # Convert each embedding to numpy array
    embeddings = [np.array(item["embedding"], dtype=np.float32) for item in data["data"]]

    # Return single vector OR batch of vectors
    return embeddings[0] if len(embeddings) == 1 else np.stack(embeddings)


In [None]:
INDEX_NAME = "euron-pinecone-euri-demo"

In [None]:
pc.list_indexes()

[
    {
        "name": "euron-pinecone-euri-demo",
        "metric": "cosine",
        "host": "euron-pinecone-euri-demo-gftomil.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "region": "us-east-1",
                "cloud": "aws",
                "read_capacity": {
                    "mode": "OnDemand",
                    "status": {
                        "state": "Ready",
                        "current_shards": null,
                        "current_replicas": null
                    }
                }
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 1536,
        "deletion_protection": "disabled",
        "tags": null
    }
]

In [None]:
pc.create_index(
    name = INDEX_NAME,
    dimension = 1536,
    metric = "cosine",
    spec = ServerlessSpec(
        cloud = "aws",
        region = "us-east-1"))

PineconeApiException: (409)
Reason: Conflict
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=utf-8', 'access-control-allow-origin': '*', 'vary': 'origin,access-control-request-method,access-control-request-headers', 'access-control-expose-headers': '*', 'x-pinecone-api-version': '2025-10', 'x-cloud-trace-context': 'ce982789d9dadb5ab4c26a5f85bb12f3', 'date': 'Mon, 08 Dec 2025 09:05:25 GMT', 'server': 'Google Frontend', 'Content-Length': '85', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"error":{"code":"ALREADY_EXISTS","message":"Resource  already exists"},"status":409}


In [None]:
documents

[{'id': 'doc-001',
  'text': 'Pinecone is a fully managed vector database for search and recommendation.',
  'category': 'documentation',
  'tag': 'pinecone',
  'difficulty': 'beginner',
  'url': 'https://example.com/pinecone-intro'},
 {'id': 'doc-002',
  'text': 'To use Pinecone with Python, you create an index and upsert vectors with metadata.',
  'category': 'documentation',
  'tag': 'python',
  'difficulty': 'beginner',
  'url': 'https://example.com/pinecone-python'},
 {'id': 'doc-003',
  'text': 'Vector databases store embeddings that capture semantic meaning for semantic search.',
  'category': 'blog',
  'tag': 'vector-db',
  'difficulty': 'intermediate',
  'url': 'https://example.com/vector-db-concepts'},
 {'id': 'doc-004',
  'text': 'You can filter Pinecone search results using metadata such as category or difficulty.',
  'category': 'faq',
  'tag': 'metadata',
  'difficulty': 'beginner',
  'url': 'https://example.com/pinecone-metadata'},
 {'id': 'doc-005',
  'text': 'In Retrie

In [None]:
documents[0]['text']

'Pinecone is a fully managed vector database for search and recommendation.'

In [None]:
texts=[doc['text'] for doc in documents]

In [None]:
texts

['Pinecone is a fully managed vector database for search and recommendation.',
 'To use Pinecone with Python, you create an index and upsert vectors with metadata.',
 'Vector databases store embeddings that capture semantic meaning for semantic search.',
 'You can filter Pinecone search results using metadata such as category or difficulty.',
 'In Retrieval-Augmented Generation, a vector database like Pinecone stores document chunks.']

In [None]:
doc_embeddings=generate_embeddings(texts)

In [None]:
doc_embeddings

array([[-0.001367  ,  0.00097596,  0.02587722, ...,  0.01030115,
        -0.00445685, -0.00447976],
       [ 0.02906471,  0.02790113,  0.04518151, ..., -0.01396294,
         0.00476263,  0.01302218],
       [-0.02904676,  0.02357966,  0.01055084, ...,  0.02753553,
         0.00489206,  0.0044698 ],
       [ 0.02287637, -0.01052978,  0.05373645, ..., -0.00688338,
         0.02043264, -0.01085604],
       [ 0.01344805,  0.02365223,  0.04482685, ..., -0.00082055,
         0.04612951,  0.01821171]], dtype=float32)

In [None]:
for doc,emb in zip(documents , doc_embeddings):
  print(doc,emb)

{'id': 'doc-001', 'text': 'Pinecone is a fully managed vector database for search and recommendation.', 'category': 'documentation', 'tag': 'pinecone', 'difficulty': 'beginner', 'url': 'https://example.com/pinecone-intro'} [-0.001367    0.00097596  0.02587722 ...  0.01030115 -0.00445685
 -0.00447976]
{'id': 'doc-002', 'text': 'To use Pinecone with Python, you create an index and upsert vectors with metadata.', 'category': 'documentation', 'tag': 'python', 'difficulty': 'beginner', 'url': 'https://example.com/pinecone-python'} [ 0.02906471  0.02790113  0.04518151 ... -0.01396294  0.00476263
  0.01302218]
{'id': 'doc-003', 'text': 'Vector databases store embeddings that capture semantic meaning for semantic search.', 'category': 'blog', 'tag': 'vector-db', 'difficulty': 'intermediate', 'url': 'https://example.com/vector-db-concepts'} [-0.02904676  0.02357966  0.01055084 ...  0.02753553  0.00489206
  0.0044698 ]
{'id': 'doc-004', 'text': 'You can filter Pinecone search results using metad

In [None]:
vector_to_upsert = []
for doc,emb in  zip(documents, doc_embeddings):
    metadata = {
        "category": doc["category"],
        "tag": doc["tag"],
        "difficulty": doc["difficulty"],
        "url": doc["url"],
        "text": doc["text"]
        }
    vector_item = {
        "id": doc["id"],
        "values": emb.tolist(),
        "metadata": metadata
    }

    vector_to_upsert.append(vector_item)

In [None]:
vector_to_upsert

In [None]:
vector_to_upsert[0]['metadata']

{'category': 'documentation',
 'tag': 'pinecone',
 'difficulty': 'beginner',
 'url': 'https://example.com/pinecone-intro',
 'text': 'Pinecone is a fully managed vector database for search and recommendation.'}

In [None]:
index = pc.Index(INDEX_NAME)

In [None]:
index

<pinecone.db_data.index.Index at 0x79095b7078f0>

In [None]:
INDEX_NAME

'euron-pinecone-euri-demo'

In [None]:
index.upsert(vectors=vector_to_upsert)

UpsertResponse(upserted_count=5, _response_info={'raw_headers': {'date': 'Mon, 08 Dec 2025 09:35:02 GMT', 'content-type': 'application/json', 'content-length': '19', 'connection': 'keep-alive', 'x-pinecone-request-lsn': '1', 'x-pinecone-request-logical-size': '31805', 'x-pinecone-request-latency-ms': '278', 'x-pinecone-request-id': '7682488162771850726', 'x-envoy-upstream-service-time': '268', 'grpc-status': '0', 'server': 'envoy'}})

In [None]:
query_text = "How to use Pinecone with Python?"
query_embedding = generate_embeddings(query_text)


In [None]:
query_embedding

array([ 0.00058833, -0.03537055,  0.04636481, ..., -0.01094834,
       -0.01569109,  0.03009645], dtype=float32)

In [None]:
query_request={
    "vector":query_embedding.tolist(),
    "top_k":3,
    "include_metadata":True
}

In [None]:
index.query(**query_request)

QueryResponse(matches=[{'id': 'doc-002',
 'metadata': {'category': 'documentation',
              'difficulty': 'beginner',
              'tag': 'python',
              'text': 'To use Pinecone with Python, you create an index and '
                      'upsert vectors with metadata.',
              'url': 'https://example.com/pinecone-python'},
 'score': 0.702996254,
 'values': []}, {'id': 'doc-001',
 'metadata': {'category': 'documentation',
              'difficulty': 'beginner',
              'tag': 'pinecone',
              'text': 'Pinecone is a fully managed vector database for search '
                      'and recommendation.',
              'url': 'https://example.com/pinecone-intro'},
 'score': 0.566223145,
 'values': []}, {'id': 'doc-004',
 'metadata': {'category': 'faq',
              'difficulty': 'beginner',
              'tag': 'metadata',
              'text': 'You can filter Pinecone search results using metadata '
                      'such as category or difficul

In [None]:
pi_response_meta = index.query(
    vector=query_embedding.tolist(),
    top_k=3,
    include_metadata=True,
    filter={
        "difficulty": {"$eq": "beginner"}}
)

In [None]:
pi_response_meta

QueryResponse(matches=[{'id': 'doc-002',
 'metadata': {'category': 'documentation',
              'difficulty': 'beginner',
              'tag': 'python',
              'text': 'To use Pinecone with Python, you create an index and '
                      'upsert vectors with metadata.',
              'url': 'https://example.com/pinecone-python'},
 'score': 0.702996254,
 'values': []}, {'id': 'doc-001',
 'metadata': {'category': 'documentation',
              'difficulty': 'beginner',
              'tag': 'pinecone',
              'text': 'Pinecone is a fully managed vector database for search '
                      'and recommendation.',
              'url': 'https://example.com/pinecone-intro'},
 'score': 0.566223145,
 'values': []}, {'id': 'doc-004',
 'metadata': {'category': 'faq',
              'difficulty': 'beginner',
              'tag': 'metadata',
              'text': 'You can filter Pinecone search results using metadata '
                      'such as category or difficul