In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [4]:
from pinecone import Pinecone

pc = Pinecone()

pc.list_indexes()

{'indexes': [{'deletion_protection': 'disabled',
              'dimension': 1536,
              'host': 'langchain-vb8smdh.svc.aped-4627-b74a.pinecone.io',
              'metric': 'cosine',
              'name': 'langchain',
              'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
              'status': {'ready': True, 'state': 'Ready'}}]}

## Working with Pinecone Indexes

In [19]:
from pinecone import ServerlessSpec

index_name = 'langchain'

if index_name not in pc.list_indexes().names():
    print(f'Creating index: {index_name}')

    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )

    print(f'Index {index_name} created!')
else:
    print(f'index {index_name} already exists!')

Creating index: langchain
Index langchain created!


In [18]:
index_name = 'langchain'

if index_name in pc.list_indexes().names():
    print(f'Deleting index {index_name} ...')
    pc.delete_index(index_name)
    print('Done')
else:
    print('Index {index_name} does not exists.')

Deleting index langchain ...
Done


In [8]:
index = pc.Index(name=index_name)

index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

## Working with vectors

In [20]:
import random

vectors = [[random.random() for _ in range(1536)] for _ in range(5)]

ids = list('abcde')

index_name = 'langchain'

index = pc.Index(index_name)

index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [11]:
index.upsert(vectors=[('c', [0.5] * 1536)])

{'upserted_count': 1}

In [12]:
index = pc.Index(index_name)

index.fetch(ids=['c', 'd'])

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'c': {'id': 'c',
                   'values': [0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
                              0.5,
             

In [17]:
index.delete(ids=['b', 'c'])

{}

In [15]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}

In [16]:
index.fetch(ids=['x'])

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}

In [21]:
query_vector = [random.random() for _ in range(1536)]

In [22]:
index.query(
    vector=query_vector,
    top_k=3,
    include_values=False
)

{'matches': [{'id': 'e', 'score': 0.759537578, 'values': []},
             {'id': 'b', 'score': 0.757509291, 'values': []},
             {'id': 'c', 'score': 0.75406611, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 5}}

## Namespaces

In [23]:
index = pc.Index('langchain')

import random

vectors = [[random.random() for _ in range(1536)] for _ in range(5)]

ids = list('abcde')

index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [24]:
vectors = [[random.random() for _ in range(1536)] for _ in range(5)]

ids = list('xyz')

index.upsert(vectors=zip(ids, vectors), namespace='first-namespace')

{'upserted_count': 3}

In [25]:
vectors = [[random.random() for _ in range(1536)] for _ in range(2)]

ids = list('qp')

index.upsert(vectors=zip(ids, vectors), namespace='second-namespace')

{'upserted_count': 2}

In [26]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5},
                'first-namespace': {'vector_count': 3},
                'second-namespace': {'vector_count': 2}},
 'total_vector_count': 10}

In [28]:
index.fetch(ids=['x'], namespace='first-namespace')

{'namespace': 'first-namespace',
 'usage': {'read_units': 1},
 'vectors': {'x': {'id': 'x',
                   'values': [0.556296,
                              0.184888914,
                              0.476913214,
                              0.946484447,
                              0.143904328,
                              0.70780158,
                              0.671874404,
                              0.34754771,
                              0.351841778,
                              0.652597427,
                              0.0382664949,
                              0.887811244,
                              0.816363037,
                              0.203408286,
                              0.118193947,
                              0.881812215,
                              0.746587694,
                              0.854560733,
                              0.235955015,
                              0.303676546,
                              0.256475,
            

In [29]:
index.delete(ids=['x'], namespace='first-namespace')

{}

In [30]:
index.delete(delete_all=True, namespace='first-namespace')

{}

In [31]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5},
                'second-namespace': {'vector_count': 2}},
 'total_vector_count': 7}

## Splitin an Embedding Text Using LangChain