# Pinecone demo

In [8]:
from pinecone import Pinecone, PodSpec
from pinecone import ServerlessSpec
from sentence_transformers import SentenceTransformer
import os
from dotenv import load_dotenv

_ = load_dotenv()

In [2]:
model_name = 'distilbert-base-nli-stsb-mean-tokens'
model = SentenceTransformer(
    model_name
)

In [3]:
pinecone_key = os.environ.get("PINECONE_API_KEY")
pc_database = Pinecone(
    api_key=pinecone_key
)

In [4]:
pc_database.list_indexes()

[]

In [9]:
pc_database.create_index(
    name="vector-demo",
    dimension=768,
    metric="euclidean",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

{
    "name": "vector-demo",
    "metric": "euclidean",
    "host": "vector-demo-weinz5z.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 768,
    "deletion_protection": "disabled",
    "tags": null
}

In [10]:
data = [
    {"id": "vector1",  "text": "I love using vector databases"},
    {"id": "vector2",  "text": "Vector databases are great for storing and retrieving vectors"},
    {"id": "vector3",  "text": "Using vector databases makes my life easier"},
    {"id": "vector4",  "text": "Vector databases are efficient for storing vectors"},
    {"id": "vector5",  "text": "I enjoy working with vector databases"},
    {"id": "vector6",  "text": "Vector databases are useful for many applications"},
    {"id": "vector7",  "text": "I find vector databases very helpful"},
    {"id": "vector8",  "text": "Vector databases can handle large amounts of data"},
    {"id": "vector9",  "text": "I think vector databases are the future of data storage"},
    {"id": "vector10", "text": "Using vector databases has improved my workflow"}
]

In [11]:
vector_data = []

for sentence in data:
    embedding = model.encode(sentence['text'])
    vector_info = {
        "id": sentence["id"],
        "values": embedding.tolist()
    }
    vector_data.append(vector_info)

In [16]:
index = pc_database.Index("vector-demo")

In [17]:
index.upsert(vectors=vector_data)

{'upserted_count': 10}

In [18]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'metric': 'euclidean',
 'namespaces': {'': {'vector_count': 10}},
 'total_vector_count': 10,
 'vector_type': 'dense'}

In [19]:
# Search text and generate its embedding
search_text = "Vector database are really helpful"

search_embedding = model.encode(search_text).tolist()

search_embedding

[-1.3508154153823853,
 -0.04717322066426277,
 0.6145597100257874,
 0.388064444065094,
 -0.3705011010169983,
 -0.3702497184276581,
 -0.035115618258714676,
 -0.2716139256954193,
 -0.806709885597229,
 -0.7500272989273071,
 0.8581597208976746,
 0.4651612639427185,
 -0.43425750732421875,
 -0.06276769191026688,
 -0.12459816783666611,
 -0.32911768555641174,
 -0.15257646143436432,
 -0.01020719762891531,
 -0.4419441819190979,
 -0.3211641013622284,
 0.004632068332284689,
 -0.5454490780830383,
 0.26186707615852356,
 0.5611477494239807,
 0.25307440757751465,
 -0.2177196741104126,
 0.538478434085846,
 0.9433020949363708,
 0.07330197840929031,
 0.4561034142971039,
 0.3259100914001465,
 -0.414325088262558,
 -0.5881977081298828,
 0.6617789268493652,
 -0.2866050601005554,
 0.0727635845541954,
 -0.13625988364219666,
 0.3189540505409241,
 0.29715466499328613,
 -0.4441143572330475,
 0.1369938999414444,
 0.4173368811607361,
 0.3039737343788147,
 0.4968508183956146,
 0.20381347835063934,
 -0.479876309633255

In [21]:
result = index.query(vector=search_embedding, top_k=3)['matches']

In [29]:
for res in result:
    for i in data:
        if i['id'] == res['id']:
            print(i['text'])

I find vector databases very helpful
Vector databases are efficient for storing vectors
I enjoy working with vector databases
