### [Quickstart](https://docs.pinecone.io/docs/quickstart)

In [5]:
from pinecone import Pinecone

PINECONE_API_KEY='4fd8d1d8-b9a5-4c8f-8552-718de23ba269'
pc = Pinecone(api_key=PINECONE_API_KEY)

### [Create a starter index](https://docs.pinecone.io/docs/create-an-index#create-a-starter-index)
This is what you get in the free plan

In [7]:
from pinecone import PodSpec

pc.create_index(
  name="starter-index",
  dimension=1536,
  metric="cosine",
  spec=PodSpec(
    environment="gcp-starter"
  )
)

### [View and describe index](https://docs.pinecone.io/docs/view-index-information)

In [18]:
indexes = pc.list_indexes()
indexes

{'indexes': [{'dimension': 1536,
              'host': 'starter-index-btoei15.svc.gcp-starter.pinecone.io',
              'metric': 'cosine',
              'name': 'starter-index',
              'spec': {'pod': {'environment': 'gcp-starter',
                               'pod_type': 'starter',
                               'pods': 1,
                               'replicas': 1,
                               'shards': 1}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [33]:
index_name=indexes[0].name
index_desc = pc.describe_index(index_name)
index_desc

{'dimension': 1536,
 'host': 'starter-index-btoei15.svc.gcp-starter.pinecone.io',
 'metric': 'cosine',
 'name': 'starter-index',
 'spec': {'pod': {'environment': 'gcp-starter',
                  'pod_type': 'starter',
                  'pods': 1,
                  'replicas': 1,
                  'shards': 1}},
 'status': {'ready': True, 'state': 'Ready'}}

#### Refer to index by name and use the `pc` object instantied from the API to get an `index` object to use for operations (eg. upsert) on the index

In [29]:
index = pc.Index(index_name)
index

<pinecone.data.index.Index at 0x105594c70>

### [Upsert data](https://docs.pinecone.io/docs/upsert-data)

#### Upserting vectors of dim of 8 shouldn't work when our starter index has a dim of 1536

In [30]:
index.upsert(
  vectors=[
    {"id": "A", "values": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]},
    {"id": "B", "values": [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2]},
    {"id": "C", "values": [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]},
    {"id": "D", "values": [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4]}
  ]
)

PineconeApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'content-type': 'application/json', 'Content-Length': '101', 'x-pinecone-request-latency-ms': '25', 'x-pinecone-request-id': '5598630691600012422', 'date': 'Fri, 01 Mar 2024 19:15:45 GMT', 'x-envoy-upstream-service-time': '26', 'server': 'envoy', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"code":3,"message":"Vector dimension 8 does not match the dimension of the index 1536","details":[]}


In [32]:
import numpy as np

In [48]:
sample_vectors = np.random.rand(4, index_desc.dimension)
sample_vectors

array([[0.0837335 , 0.61704947, 0.78371755, ..., 0.2786163 , 0.56873498,
        0.69632044],
       [0.97706767, 0.10809191, 0.49855633, ..., 0.91404073, 0.59277767,
        0.10946874],
       [0.79173738, 0.19034994, 0.61152529, ..., 0.16482291, 0.13104325,
        0.41498728],
       [0.66236913, 0.84809796, 0.95425312, ..., 0.59358758, 0.57428864,
        0.58601205]])

In [59]:
vectors = []
for i in range(sample_vectors.shape[0]):
    vectors.append({
        'id': chr(i), # has to be string, so conver to ascii
        'values': sample_vectors[i]
    })
vectors
index.upsert(vectors)

{'upserted_count': 4}

In [66]:
data1 = [ {'id': f'ID:{i}', 'values': sample_vectors[i] } for i in range(sample_vectors.shape[0]) ]
index.upsert(data1)

{'upserted_count': 4}

In [63]:
data_generator = map(lambda i: { 'id': '{i}', 'values': sample_vectors[i] }, range(sample_vectors.shape[0]))
data_generator 

<map at 0x11773d330>

In [65]:
index.upsert(data_generator)

{'upserted_count': 4}

### [Fetch data by ID](https://docs.pinecone.io/docs/fetch-data)

#### Fetching a vector by an ID that exists

In [71]:
index.fetch(['1'])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'1': {'id': '1',
                   'values': [0.977067649,
                              0.108091906,
                              0.498556316,
                              0.11178229,
                              0.853596687,
                              0.111725874,
                              0.995046198,
                              0.038121704,
                              0.552848697,
                              0.643537581,
                              0.614486814,
                              0.0086817937,
                              0.52388519,
                              0.780967891,
                              0.853241205,
                              0.25937888,
                              0.805024624,
                              0.693695068,
                              0.173523724,
                              0.0621305332,
                              0.439709127,
                     

#### Fetching a vector by an ID that doesn't exist

In [73]:
index.fetch(['Z'])

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

### [Querying data](https://docs.pinecone.io/docs/query-data)

In [67]:
q = np.random.rand(index_desc.dimension)
q

array([0.9712577 , 0.96002629, 0.04077189, ..., 0.26359871, 0.53915806,
       0.17021803])

In [68]:
index.query(q)

TypeError: Index.query() missing 1 required keyword-only argument: 'top_k'

In [69]:
index.query(
    vector=q,
    top_k=10
)

PineconeApiValueError: Unable to prepare type ndarray for serialization

#### query() needs to pass in a vector of type list
##### Not retrieving values in query

In [70]:
index.query(
    vector=q.tolist(),
    top_k=10
)

{'matches': [{'id': '\x03', 'score': 0.750776, 'values': []},
             {'id': '3', 'score': 0.750776, 'values': []},
             {'id': '{i}', 'score': 0.750776, 'values': []},
             {'id': 'ID:3', 'score': 0.750776, 'values': []},
             {'id': '0', 'score': 0.748378754, 'values': []},
             {'id': 'ID:0', 'score': 0.748378754, 'values': []},
             {'id': '\x00', 'score': 0.748378754, 'values': []},
             {'id': '\x01', 'score': 0.741489172, 'values': []},
             {'id': '1', 'score': 0.741489172, 'values': []},
             {'id': 'ID:1', 'score': 0.741489172, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 5}}

##### Retrieving values as well

In [75]:
index.query(
    vector=q.tolist(),
    top_k=10,
    include_values=True
)

{'matches': [{'id': '\x03',
              'score': 0.750776,
              'values': [0.662369132,
                         0.848098,
                         0.954253137,
                         0.949824572,
                         0.333357,
                         0.507745802,
                         0.186786294,
                         0.790866911,
                         0.734642744,
                         0.800082743,
                         0.628827393,
                         0.798687398,
                         0.0945301503,
                         0.95218569,
                         0.716072917,
                         0.455044419,
                         0.743742,
                         0.562635779,
                         0.5042606,
                         0.551556885,
                         0.241829365,
                         0.753199399,
                         0.181394517,
                         0.899932086,
                         0.419179857,


##### Retrieving values and metadata as well

In [78]:
results = index.query(
    vector=q.tolist(),
    top_k=10,
    include_values=True,
    include_metadata=True
)
results.matches[0]

{'id': '\x03',
 'score': 0.750776,
 'values': [0.662369132,
            0.848098,
            0.954253137,
            0.949824572,
            0.333357,
            0.507745802,
            0.186786294,
            0.790866911,
            0.734642744,
            0.800082743,
            0.628827393,
            0.798687398,
            0.0945301503,
            0.95218569,
            0.716072917,
            0.455044419,
            0.743742,
            0.562635779,
            0.5042606,
            0.551556885,
            0.241829365,
            0.753199399,
            0.181394517,
            0.899932086,
            0.419179857,
            0.598113716,
            0.0285456963,
            0.0925497711,
            0.568334103,
            0.228952289,
            0.559048533,
            0.858965337,
            0.251897931,
            0.0180070922,
            0.840171874,
            0.519462764,
            0.790516436,
            0.468188167,
            0.813349366

### Update data

In [80]:
index.update(
    id = '1',
    values = np.random.rand(index_desc.dimension).tolist()
)

{}

In [81]:
index.fetch(['1'])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'1': {'id': '1',
                   'values': [0.169400916,
                              0.596117,
                              0.865663171,
                              0.702722132,
                              0.860423625,
                              0.98516494,
                              0.517791271,
                              0.886729777,
                              0.358082235,
                              0.28342852,
                              0.71981746,
                              0.886285961,
                              0.5786376,
                              0.271163762,
                              0.16139856,
                              0.179932266,
                              0.659023464,
                              0.10340561,
                              0.366430253,
                              0.16832307,
                              0.00411810586,
                             

### Delete data

In [84]:
index.delete(
    ids=['1']
)

{}

In [85]:
index.fetch(['1'])

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

### Delete index

In [86]:
pc.delete_index?

[0;31mSignature:[0m [0mpc[0m[0;34m.[0m[0mdelete_index[0m[0;34m([0m[0mname[0m[0;34m:[0m [0mstr[0m[0;34m,[0m [0mtimeout[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Deletes a Pinecone index.

Deleting an index is an irreversible operation. All data in the index will be lost.
When you use this command, a request is sent to the Pinecone control plane to delete 
the index, but the termination is not synchronous because resources take a few moments to
be released. 

You can check the status of the index by calling the `describe_index()` command.
With repeated polling of the describe_index command, you will see the index transition to a 
`Terminating` state before eventually resulting in a 404 after it has been removed.

:param name: the name of the index.
:type name: str
:param timeout: Number of seconds to poll status checking whether the index has been deleted.

In [88]:
pc.delete_index(name=index_name)

### [RAG documents](https://docs.pinecone.io/docs/manage-rag-documents)

In [93]:
rag_index = pc.create_index(
    name='ragcorpus',
    dimension=1024,
    spec=PodSpec(
        environment="gcp-starter"
    ),
    metric='dotproduct'
)

#### Trying to upsert into an already deleted index (by mistake)

In [94]:
index.upsert(
  vectors=[
    {"id": "doc1#chunk1", "values": np.random.rand(1024).tolist()},
    {"id": "doc1#chunk2", "values": np.random.rand(1024).tolist()},
    {"id": "doc1#chunk3", "values": np.random.rand(1024).tolist()},
    {"id": "doc1#chunk4", "values": np.random.rand(1024).tolist()}
  ],
  namespace="ns1"
)

UnauthorizedException: (401)
Reason: Unauthorized
HTTP response headers: HTTPHeaderDict({'x-pinecone-auth-rejected-reason': 'Malformed domain', 'www-authenticate': 'Malformed domain', 'Content-Length': '12', 'content-type': 'text/plain', 'date': 'Fri, 01 Mar 2024 20:59:48 GMT', 'server': 'envoy', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: Unauthorized


In [96]:
rag_index = pc.Index(name='ragcorpus')
rag_index.upsert(
  vectors=[
    {"id": "doc1#chunk1", "values": np.random.rand(1024).tolist()},
    {"id": "doc1#chunk2", "values": np.random.rand(1024).tolist()},
    {"id": "doc1#chunk3", "values": np.random.rand(1024).tolist()},
    {"id": "doc1#chunk4", "values": np.random.rand(1024).tolist()}
  ],
  namespace="ns1"
)

{'upserted_count': 4}

#### [List all records under a parent document ](https://docs.pinecone.io/docs/manage-rag-documents#list-all-record-ids-for-a-parent-document)
##### (only for serverless indexes as `.list` isn't possible in pod based indexes)

In [97]:
# To iterate over all result pages using a generator function
for ids in rag_index.list(prefix='doc1', namespace='ns1'):
    print(ids)
# To manually control pagination, use list_paginate().
# See https://docs.pinecone.io/docs/get-record-ids#paginate-through-results for details.

# Response:
# ['doc1#chunk1', 'doc1#chunk2', 'doc1#chunk3']

AttributeError: 'Index' object has no attribute 'list'

##### Use metadata key, value pairs for pod based indexes [link](https://docs.pinecone.io/docs/upsert-data#upsert-records-with-metadata)

In [98]:
rag_index.describe_index_stats()

{'dimension': 1024,
 'index_fullness': 4e-05,
 'namespaces': {'ns1': {'vector_count': 4}},
 'total_vector_count': 4}

##### Add parent RAG document metadata during upsert

In [99]:
rag_index.upsert(
  vectors=[
    {"id": "chunk1", "values": np.random.rand(1024).tolist(), "metadata": {"parent": "doc1"}}, # part of doc1
    {"id": "chunk2", "values": np.random.rand(1024).tolist(), "metadata": {"parent": "doc1"}}, # part of doc1
    {"id": "chunk1", "values": np.random.rand(1024).tolist(), "metadata": {"parent": "doc2"}}, # part of doc2
    {"id": "chunk3", "values": np.random.rand(1024).tolist(), "metadata": {"parent": "doc1"}}  # part of doc1
  ],
  namespace="ns1"
)

{'upserted_count': 4}

##### [Use metadata filters in queries](https://docs.pinecone.io/docs/query-data#using-metadata-filters-in-queries)

##### vectors under doc1

In [110]:
rag_index.query(
    vector=np.random.rand(1024).tolist(),
    filter={"parent": "doc1"},
    namespace="ns1",
    top_k=3,
    include_metadata=True
)

{'matches': [{'id': 'chunk3',
              'metadata': {'parent': 'doc1'},
              'score': 268.298492,
              'values': []},
             {'id': 'chunk2',
              'metadata': {'parent': 'doc1'},
              'score': 256.528198,
              'values': []}],
 'namespace': 'ns1',
 'usage': {'read_units': 6}}

##### vectors under doc2

In [111]:
rag_index.query(
    vector=np.random.rand(1024).tolist(),
    filter={"parent": "doc2"},
    namespace="ns1",
    top_k=3,
    include_metadata=True
)

{'matches': [{'id': 'chunk1',
              'metadata': {'parent': 'doc2'},
              'score': 254.33812,
              'values': []}],
 'namespace': 'ns1',
 'usage': {'read_units': 6}}

### [Text documents](https://docs.pinecone.io/docs/manage-rag-documents)