# Setup

In [1]:
import numpy as np
from pinecone import Pinecone
import yaml
import os 

In [2]:
with open('config.yaml', 'r') as config_file:
    config = yaml.safe_load(config_file)
os.environ['PINECONE_API_KEY'] = config['PINECONE_API_KEY']

In [3]:
pinecone_client = Pinecone()

## Pinecone

In [4]:
indices = pinecone_client.list_indexes()
for index in indices:
    index_name = index['name']
    print(f"Index: {index_name}")
    print(pinecone_client.describe_index(index_name))

Index: nlp
{'dimension': 2048,
 'host': 'nlp-d4fzflu.svc.aped-4627-b74a.pinecone.io',
 'metric': 'cosine',
 'name': 'nlp',
 'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
 'status': {'ready': True, 'state': 'Ready'}}


In [5]:
indice_nome = 'nlp'

In [6]:
vetores = [np.random.normal(0, 1, 2048).tolist() for _ in range(5)]
ids = ['a', 'b', 'c', 'd', 'e']
indice = pinecone_client.Index(indice_nome)
indice.upsert(vectors=list(zip(ids, vetores)))

{'upserted_count': 5}

In [7]:
print(len(vetores))
print(vetores[4])

5
[-2.1758204228674107, -0.14253050820713303, 0.338607739954057, -0.8469830555106357, -0.3949716511478302, -1.3058659896887501, 0.6323285782501152, 0.5821405575981631, 0.8891157235813932, -0.6062439755581227, 0.4011899916443502, -0.633533406769824, -1.3542657966539573, -1.8077031145022495, -0.5420860687865191, -1.0231091241171049, -0.27697808587855066, -0.6693962242291929, -1.1031629709953354, 1.5331202899172545, -0.3810019249915311, 0.5109488452536509, 0.7765480673297569, 1.858375125342297, 1.3998539917561053, -1.7655250430436444, 0.029308060829323926, 1.2031348717588117, 1.7312770605748162, 0.7566237018455267, 0.9450829372214199, -1.6783736374192477, 0.24094262325810595, -1.1763077175892227, 0.4157494434656131, 1.1668827087213407, -1.2130902387442895, -0.8315332042508935, 0.7768646965746405, 2.0332495068227296, -0.724687725923582, -0.9279752998759502, 0.7952809940221585, -0.689535856040337, 2.5069192055892815, -0.40092968809732527, -0.42334376044734623, 1.318874594869727, 0.574832696

In [None]:
print(indice.fetch(ids=['c']))
#1.29802501

In [None]:
response = indice.fetch(ids=['c'])
if 'vectors' in response and 'c' in response['vectors']:
    retorna_vetor = response['vectors']['c']['values']
    atualiza_vetor = [x + 1 for x in retorna_vetor]  
    indice.upsert(vectors=[('c', atualiza_vetor)])
    print(indice.fetch(ids=['c']))
else:
    print("Vetor com ID 'c' não encontrado no índice.")

In [12]:
indice.delete(ids=['d', 'e'])

{}

In [13]:
print(indice.fetch(ids=['d', 'e'])) 

{'namespace': '', 'usage': {'read_units': 1}, 'vectors': {}}


In [14]:
print(indice.describe_index_stats())

{'dimension': 2048,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}


In [15]:
indice.upsert(vectors=list(zip(ids, vetores)), namespace='namespace1')
indice.upsert(vectors=list(zip(['x', 'y', 'z'], 
                               [np.random.normal(0, 1, 2048).tolist() for _ in range(3)])), 
                                namespace='namespace2')

{'upserted_count': 3}

In [16]:
print(indice.fetch(ids=['a'], namespace='namespace1'))

{'namespace': 'namespace1',
 'usage': {'read_units': 1},
 'vectors': {'a': {'id': 'a',
                   'values': [0.321271092,
                              -1.19110286,
                              -0.158609182,
                              1.7483418,
                              0.148385569,
                              -0.0419360287,
                              -1.92524755,
                              -0.227817461,
                              -0.485063523,
                              0.621390283,
                              0.404230982,
                              0.281891882,
                              0.763986886,
                              -1.39394295,
                              0.95260334,
                              -1.14189947,
                              0.415561348,
                              -0.531248152,
                              0.609962225,
                              0.414119333,
                              0.952329457,
       

In [17]:
print(indice.fetch(ids=['x'], namespace='namespace2'))

{'namespace': 'namespace2',
 'usage': {'read_units': 1},
 'vectors': {'x': {'id': 'x',
                   'values': [-1.36476791,
                              -0.0617415644,
                              1.53858435,
                              -0.804278612,
                              0.112696782,
                              0.25088647,
                              -0.282600194,
                              1.82487237,
                              -0.471699417,
                              -0.281049669,
                              0.162631184,
                              -2.06593037,
                              -0.292142272,
                              0.876026511,
                              -0.838127792,
                              0.0782847479,
                              -1.83179057,
                              0.575292349,
                              -0.906042874,
                              0.952325106,
                              -0.31837216,
   

In [18]:
indice.delete(ids=['x'], namespace='namespace2')
print(indice.fetch(ids=['x'], namespace='namespace2'))  # Deve retornar vazio

{'namespace': 'namespace2', 'usage': {'read_units': 1}, 'vectors': {}}


In [19]:
query_vector = np.random.normal(0, 1, 2048).tolist()
print(indice.query(vector=query_vector, top_k=3, include_values=False))

{'matches': [{'id': 'b', 'score': 0.0129087754, 'values': []},
             {'id': 'c', 'score': -0.0201933552, 'values': []},
             {'id': 'a', 'score': -0.0301291179, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 1}}
