In [30]:
!pip install pinecone



In [31]:
import pinecone
from pinecone import Pinecone, ServerlessSpec

In [32]:
API_KEY="99505dca-9b45-4c1b-b3fe-398ff62a76d9"

In [33]:
pc = Pinecone(api_key=API_KEY)

In [8]:
index_name = "test"

In [9]:
pc.create_index(
    name=index_name,
    dimension=2, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)
# pod_type = 's1'

In [26]:
description = pc.describe_index(index_name) # describe an index

host = description.host
print(f"Your index is hosted at {description.host}")

Your index is hosted at test-cbjmo6h.svc.aped-4627-b74a.pinecone.io


In [27]:
if(description.status['state']=='Ready'):
  print("Index is ready")
else:
  print("Index is not ready")

Index is ready


In [28]:
description.dimension

2

In [29]:
description.pod_type # to get the pod type (hardware config for efficient storage)

# **Insertion and Updation**

In [10]:
index = pc.Index(index_name) # connect to an index

index.upsert(
    vectors=[
        {"id": "vec1", "values": [1.0, 1.5]},
        {"id": "vec2", "values": [2.0, 1.0]},
        {"id": "vec3", "values": [0.1, 3.0]},
        {"id": "vec4", "values": [1.0, -2.5]},
        {"id": "vec5", "values": [3.0, -2.0]},
        {"id": "vec6", "values": [0.5, -1.5]},
    ],
    namespace="ns1"
)

# index.upsert(
#     vectors=[
#         {"id": "vec1", "values": [1.0, -2.5]},
#         {"id": "vec2", "values": [3.0, -2.0]},
#         {"id": "vec3", "values": [0.5, -1.5]},
#     ],
#     namespace="ns1"
# )

# index.upsert(
#     vectors=[
#         {"id": "vec1", "values": [1.0, 0.5]},
#         {"id": "vec2", "values": [3.0, -1.0]},
#         {"id": "vec3", "values": [0.5, -2.5]},
#     ],
#     namespace="ns2"
# )

{'upserted_count': 6}

In [13]:
index.update(id="vec1", values=[1.0, 11.5], namespace="ns1")

{}

# **Query Vector Data**

In [18]:
index.query(vector=[1,2],top_k=2,namespace="ns1") #cosine similarity score

{'matches': [{'id': 'vec1', 'score': 0.92980659, 'values': []},
             {'id': 'vec3', 'score': 0.90882951, 'values': []}],
 'namespace': 'ns1',
 'usage': {'read_units': 5}}

# **Fetch Vectors By Id**

In [20]:
index.fetch(ids=["vec1","vec3"],namespace="ns1")

{'namespace': 'ns1',
 'usage': {'read_units': 1},
 'vectors': {'vec1': {'id': 'vec1', 'values': [1.0, 11.5]},
             'vec3': {'id': 'vec3', 'values': [0.1, 3.0]}}}

# **Delete Vectors**

In [21]:
index.delete(ids=["vec1","vec2"],namespace="ns1")

{}

In [23]:
index.delete(delete_all=True,namespace="ns1")

{}

# **Vector_Paritioning-Namespace**

In [34]:
import numpy as np

In [35]:
emails_with_subject = 20
emails_with_body = 45
emails_with_other = 45
dimensions = 2

In [36]:
# create vectors, assume embedded
# python list of lists
vects_subj= np.random.rand(emails_with_subject, dimensions).tolist()
vects_body= np.random.rand(emails_with_body, dimensions).tolist()
vects_other= np.random.rand(emails_with_other, dimensions).tolist()

# create ids for these vectors ,IDs are string

ids_subj = map(str, np.arange(emails_with_subject).tolist())
ids_body = map(str, np.arange(emails_with_body).tolist())
ids_other = map(str, np.arange(emails_with_other).tolist())

# zip id and vector
vectors_subj = list(zip(ids_subj, vects_subj)) # list of tuples (id, values)
vectors_body = list(zip(ids_body, vects_body) )
vectors_other = list(zip(ids_other, vects_other))


# upsert vectors into index with different namespace

index.upsert(vectors_subj, namespace='subject')
index.upsert(vectors_body, namespace='body')
index.upsert(vectors_other) # default namespace is ""

{'upserted_count': 45}

In [38]:
index.query(vector = list(np.random.rand(2)),
          top_k=3,
          namespace='',
          include_values=True)

{'matches': [{'id': '10',
              'score': 0.999987721,
              'values': [0.345027089, 0.401302]},
             {'id': '7',
              'score': 0.99981463,
              'values': [0.880247831, 0.975007772]},
             {'id': '15',
              'score': 0.998124957,
              'values': [0.69944948, 0.71220994]}],
 'namespace': '',
 'usage': {'read_units': 6}}

In [39]:
# update and fetch
vectors_other[44]  # current value

('44', [0.9142305745341301, 0.0638388481053932])

In [42]:
new_value = [0.1, 0.1]

In [43]:
index.update(id = '44',
          values= new_value,
          namespace = '')
index.fetch(ids = ['44'], namespace = '')

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'44': {'id': '44', 'values': [0.1, 0.1]}}}

In [44]:
# delete
index.delete(ids = ['0', '1'], namespace='subject')

{}

In [45]:
index.delete(delete_all=True) # deleted vector without namespace

{}

In [46]:
index.delete(delete_all=True, namespace='body')

{}

# **Vector_Partitioning-Metadata**

In [47]:
index.upsert([
    ("1", [0.1, 0.1], {"topic": "subject", "year": 2023}),
    ("2", [0.2, 0.2], {"topic": "other", "year": 2022}),
    ("3", [0.3, 0.3], {"topic": "body", "year": 2023}),
    ("4", [0.4, 0.4], {"topic": "body"}),
    ("5", [0.5, 0.5], {"topic": "subject"})
])

{'upserted_count': 5}

In [48]:
index.query(vector =[0,0],
          top_k=2,
          include_metadata=True,
          include_values=True,
          filter={
             "topic" : {"$eq": "subject"},
              "year" : 2023
         })

{'matches': [{'id': '1',
              'metadata': {'topic': 'subject', 'year': 2023.0},
              'score': 0.0,
              'values': [0.1, 0.1]}],
 'namespace': '',
 'usage': {'read_units': 6}}

In [49]:
index.fetch(ids=['1'])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'1': {'id': '1',
                   'metadata': {'topic': 'subject', 'year': 2023.0},
                   'values': [0.1, 0.1]}}}

In [51]:
index.update(id='1',
           values=[0.1,0.11],
           set_metadata = {'topic': 'other', 'year': 2020})

{}

In [52]:
index.fetch(ids=['1'])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'1': {'id': '1',
                   'metadata': {'topic': 'other', 'year': 2020.0},
                   'values': [0.1, 0.11]}}}

In [55]:
# index.delete(filter = {
#     "topic" : {"$eq": "other"}
# })
# serverless doesn't support