## Diving into Pinecone

In [2]:
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)

True

In [4]:
import pinecone
pinecone.init(
    api_key=os.environ.get('PINECONE_API_KEY'),
    environment = os.environ.get('PINECONE_ENV')
)

  from tqdm.autonotebook import tqdm


In [7]:
pinecone.info.version()

VersionResponse(server='2.0.11', client='2.2.4')

### Pinecone Indexes
An index is the highest level organizational unit of vector data. It accepts, installs vectors, serves queries over the vectors it contains and does other vector operations over its contains. Each index runs on at least one pod.

In [32]:
pinecone.list_indexes()

['langchain-pinecone']

In [33]:
index_name = 'langchain-pinecone'
if index_name not in pinecone.list_indexes():
    print(f"Creating index {index_name}...")
    pinecone.create_index(index_name, dimension=1536, metric='cosine', pods=1, pod_type='p1.x2')
    print("Done")
else:
    print(f'Index {index_name} already exist!')

Index langchain-pinecone already exist!


In [34]:
pinecone.describe_index(index_name)

IndexDescription(name='langchain-pinecone', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='starter', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

In [35]:
#deleting an index
index_name = 'langchain-pinecone'
# if index_name in pinecone.list_indexes():
#     print(f"Deleting index {index_name}...")
#     pinecone.delete_index(index_name)
#     print("Done")
# else:
#     print(f'Index {index_name} does not exist!')

In [38]:
# To do any work with Indexes, you must first select it
index_name = 'langchain-pinecone'
index = pinecone.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

#### Namespace
Pinecone allows you to partition the vectors in an index into namespaces.

Queries and other operations are then limited to one namespace. Different requests can search different subsets of your index. e.g namespace for indexing articles by content and another by title.

Every index is made of one or more namespaces.

Every vector exists in exactly one namespace.

Namespaces are uniquely identified by a namespace name.

In [42]:
# inserting into pinecone index
import random
vectors = [[random.random() for _ in range(1536)] for v in range(5)]  #list of list, 5 vectors
# vectors



In [63]:
# To insert a vector into  we need the vector and its id
ids = list('abcde')
index_name = pinecone.Index(index_name)
index.upsert(vectors=zip(ids, vectors)) # upserts inserts a new value or updates existing value if already exist 
# zip func connects the ids and vectors in a list of tuple

{'upserted_count': 5}

In [64]:
# Updating a vector
index.upsert(vectors=[('c', [0.3] * 1536)])

{'upserted_count': 1}

In [None]:
# Fetching a vector
index = pinecone.Index('langchain-pinecone')
index.fetch(ids=['c', 'd'])

In [65]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 5e-05,
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5}

In [67]:
# Deleting a vector
index.delete(ids=['b', 'c'])

{}

In [70]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 3e-05,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}

In [79]:
# Fetching a vector that does not exist will not throw an error, but return an empty vector
index.fetch(ids=['b'])

{'namespace': '', 'vectors': {}}

In [81]:
# Deleting all vectors
# index.delete(delete_all=True)

In [100]:
# Querying
queries = [[random.random() for _ in range(1536)] for v in range(1)]

In [101]:
# retrieve ids of most similar vectors in the namespace of the index along with the similarity scores
index.query(
    vector=queries,
    top_k = 3, # returns top 3 most similar,
    include_values=False
)

{'matches': [{'id': 'a', 'score': 0.755741358, 'values': []},
             {'id': 'e', 'score': 0.755475819, 'values': []},
             {'id': 'd', 'score': 0.754842341, 'values': []}],
 'namespace': ''}