### Pinecone Vector DB

In [32]:
import os
from dotenv import load_dotenv

load_dotenv()

PINECONE_TOKEN = os.getenv("PINECONE_TOKEN")

In [48]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key={PINECONE_TOKEN})

print(pc)

<pinecone.control.pinecone.Pinecone object at 0x132319010>


In [12]:
index_name = "sudhanshu"

pc.create_index(
    name=index_name,
    dimension=1536, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

{
    "name": "sudhanshu",
    "metric": "cosine",
    "host": "sudhanshu-lbd4slx.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 1536,
    "deletion_protection": "disabled",
    "tags": null
}

In [13]:
pc.list_indexes()

[
    {
        "name": "sudhanshu",
        "metric": "cosine",
        "host": "sudhanshu-lbd4slx.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 1536,
        "deletion_protection": "disabled",
        "tags": null
    }
]

In [14]:
pc.delete_index("sudhanshu")

In [15]:

index_name = "sudhanshu"

pc.create_index(
    name=index_name,
    dimension=1536, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

{
    "name": "sudhanshu",
    "metric": "cosine",
    "host": "sudhanshu-lbd4slx.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 1536,
    "deletion_protection": "disabled",
    "tags": null
}

In [None]:
import requests
import numpy as np

EURI_API_KEY = os.getenv("EURI_API_KEY")

def generate_embeddings(data):
    url = "https://api.euron.one/api/v1/euri/alpha/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURI_API_KEY}"
    }
    payload = {
        "input": data,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    # Convert to numpy array for vector operations
    embedding = np.array(data['data'][0]['embedding'])
    
    print(f"Generated embedding with shape: {embedding.shape}")
    print(f"First 5 values: {embedding[:5]}")
    
    # Example: Calculate vector norm
    norm = np.linalg.norm(embedding)
    print(f"Vector norm: {norm}")
    
    return embedding

In [19]:
embeddings_data = generate_embeddings("this is my own data for sudhanshu kumar")

Generated embedding with shape: (1536,)
First 5 values: [-0.01381046 -0.01698404  0.05499979 -0.02343879 -0.00204905]
Vector norm: 0.9999999807339258


In [None]:
embeddings_data

array([-0.01381046, -0.01698404,  0.05499979, ...,  0.00326939,
        0.01023346, -0.01964663])

In [21]:
len(embeddings_data)

1536

In [22]:
index = pc.Index('sudhanshu')
index.upsert([("item-id-001",embeddings_data.tolist(),{"name":"sudhanshu"})])

{'upserted_count': 1}

In [23]:

index.upsert( vectors=[ { "id": "item-id-oo1", "values": embeddings_data.tolist(), "metadata": { "text": "Lorem Ipsum is simply dummy text of the printing and typesetting industry." } } ], batch_size=1 )

Upserted vectors: 100%|██████████| 1/1 [00:01<00:00,  1.53s/it]


{'upserted_count': 1}

In [24]:

index.upsert( vectors =[ ("item-id-002", embeddings_data.tolist(),{"name":"ankit warathe", "info":"personal info"}) ] )

{'upserted_count': 1}

In [25]:
index.fetch(ids=["item-id-002"])

FetchResponse(namespace='', vectors={'item-id-002': Vector(id='item-id-002', values=[-0.0138104623, -0.0169840436, 0.0549997948, -0.0234387871, -0.00204904494, -0.00540920952, 0.00762466574, 0.0420634113, -0.00569832837, -0.000362869207, 0.0235194713, -0.0323006138, -0.0432198867, -0.0375450924, 0.053681951, 0.0310365595, -0.00923162792, -0.0227529705, -0.00351312873, 0.0537088476, 0.0219864696, 0.00913749635, 0.0484643653, -0.0194045734, 0.0164864901, -0.0402345695, -0.0278091878, -0.0177908875, 0.0261551589, -0.0555645861, -0.0458286814, -0.0321392454, 0.00344925374, -0.0106973927, 0.00815583766, 0.00740278419, -0.0279167667, 0.00365096447, -0.031897191, -0.0287505034, 0.0112621831, -0.0710021779, -0.00767173152, 0.0301221367, -0.013467554, -0.0234522354, -0.0276478194, -0.0126808817, -0.0321930349, -0.0306062419, -0.0313592963, 0.0115378546, 0.0253079738, 0.0528213196, 0.0370340906, 0.00831048284, 0.017441256, 0.0693884939, -0.0290194508, -0.0615890138, 0.0126876058, -0.00124892569,

In [26]:
text = "my name is sudhanshu"
embedings_to_search = generate_embeddings(text)
result = index.query(vector=embedings_to_search.tolist(),
            top_k = 2,
            include_metadata = True)
print(result)

Generated embedding with shape: (1536,)
First 5 values: [-0.00645238 -0.02682346 -0.02852873 -0.00903333 -0.06539947]
Vector norm: 1.0000000004372394
{'matches': [{'id': 'item-id-001',
              'metadata': {'name': 'sudhanshu'},
              'score': 0.643921196,
              'values': []},
             {'id': 'item-id-oo1',
              'metadata': {'text': 'Lorem Ipsum is simply dummy text of the '
                                   'printing and typesetting industry.'},
              'score': 0.643921196,
              'values': []}],
 'namespace': '',
 'usage': {'read_units': 6}}


In [27]:
index.upsert([("item-id-001",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-002",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-003",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-004",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-005",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-006",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-007",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-008",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-009",embeddings_data.tolist(),{"name":"sudhanshu"})],
             
             namespace = 'sudh1')

{'upserted_count': 9}

In [28]:
index.upsert([("item-id-001",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-002",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-003",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-004",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-005",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-006",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-007",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-008",embeddings_data.tolist(),{"name":"sudhanshu"}),
              ("item-id-009",embeddings_data.tolist(),{"name":"sudhanshu"})],
             
             namespace = 'sudh2')

{'upserted_count': 9}

### WEAVIATE Vector DB

In [None]:
import os
import weaviate
from weaviate.classes.init import Auth

weaviate_url = os.getenv("WEAVIATE_URL")
weaviate_api_key = os.getenv("WEAVIATE_API_KEY")

# Connect to Weaviate Cloud
client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
)

print(client.is_ready())

True


In [35]:
text_data = "this is to test my weaviate databsed by sudhanshu"
embedings   = generate_embeddings(text_data)

Generated embedding with shape: (1536,)
First 5 values: [-0.01164255 -0.01081737  0.08393172 -0.01693335 -0.02432522]
Vector norm: 1.0000000177512753


In [40]:
collection = client.collections.get("Document")
collection.data.insert(
    properties = {
        "text" : text_data,
        "author" : "sudhanshu"
    },
    vector = embedings.tolist()
    
)

UUID('4604eb50-9e44-416a-9404-540e09693e7d')

In [45]:
test_search = "sudhanshu is mentor"
embedings_search_data = generate_embeddings(test_search)

result = collection.query.near_vector(
    near_vector = embedings_search_data.tolist(),
    limit = 1
)

Generated embedding with shape: (1536,)
First 5 values: [-0.03895542 -0.00112551  0.03618279 -0.01580398 -0.02324848]
Vector norm: 0.9999999445699245


In [46]:
for i in result.objects:
    print(i.properties['text'])

this is to test my weaviate databsed by sudhanshu


In [47]:
result.objects

[GenerativeObject(uuid=_WeaviateUUIDInt('282335f4-81a2-40e0-ab28-f03640765ace'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'text': 'this is to test my weaviate databsed by sudhanshu', 'author': 'sudhanshu'}, references=None, vector={}, collection='Document')]