https://docs.pinecone.io/guides/get-started/quickstart

In [1]:
%pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
[0mSuccessfully installed python-dotenv-1.1.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install  "pinecone[grpc]" 

Collecting pinecone[grpc]
  Downloading pinecone-7.3.0-py3-none-any.whl.metadata (9.5 kB)
Collecting googleapis-common-protos>=1.66.0 (from pinecone[grpc])
  Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)
Collecting grpcio>=1.59.0 (from pinecone[grpc])
  Downloading grpcio-1.74.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting lz4>=3.1.3 (from pinecone[grpc])
  Downloading lz4-4.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting pinecone-plugin-assistant<2.0.0,>=1.6.0 (from pinecone[grpc])
  Downloading pinecone_plugin_assistant-1.8.0-py3-none-any.whl.metadata (30 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone[grpc])
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Collecting protobuf<6.0,>=5.29 (from pinecone[grpc])
  Downloading protobuf-5.29.5-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Collecting protoc-g

In [3]:
import os
import dotenv
dotenv.load_dotenv()

True

In [4]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import time

api_key=os.environ.get('PINECONE_API_KEY')
pc = Pinecone(api_key=api_key)

In [5]:
data = [
    {"id": "vec1", "text": "Apple is a popular fruit known for its sweetness and crisp texture."},
    {"id": "vec2", "text": "The tech company Apple is known for its innovative products like the iPhone."},
    {"id": "vec3", "text": "Many people enjoy eating apples as a healthy snack."},
    {"id": "vec4", "text": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces."},
    {"id": "vec5", "text": "An apple a day keeps the doctor away, as the saying goes."},
    {"id": "vec6", "text": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership."}
]

# Convert the text into numerical vectors that Pinecone can index
embeddings = pc.inference.embed(
    model="multilingual-e5-large",
    inputs=[d['text'] for d in data],
    parameters={"input_type": "passage", "truncate": "END"}
)


In [6]:
print(embeddings)

EmbeddingsList(
  model='multilingual-e5-large',
  vector_type='dense',
  data=[
    {'vector_type': dense, 'values': [0.049346923828125, -0.01326751708984375, ..., -0.0197601318359375, -0.0109100341796875]},
    {'vector_type': dense, 'values': [0.03253173828125, -0.0278167724609375, ..., -0.0200042724609375, -0.02105712890625]},
    ... (2 more embeddings) ...,
    {'vector_type': dense, 'values': [0.03131103515625, -0.01861572265625, ..., -0.0299072265625, -0.032958984375]},
    {'vector_type': dense, 'values': [0.039306640625, -0.010162353515625, ..., 0.0011491775512695312, -0.04278564453125]}
  ],
  usage={'total_tokens': 130}
)


In [8]:
# Create a serverless index
index_name = "example-index"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=1024,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-east-1'
        ) 
    ) 

# Wait for the index to be ready
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

In [9]:
# Target the index where you'll store the vector embeddings
index = pc.Index("example-index")

# Prepare the records for upsert
# Each contains an 'id', the embedding 'values', and the original text as 'metadata'
records = []
for d, e in zip(data, embeddings):
    records.append({
        "id": d['id'],
        "values": e['values'],
        "metadata": {'text': d['text']}
    })

# Upsert the records into the index
index.upsert(
    vectors=records,
    namespace="example-namespace"
)

upserted_count: 6

In [10]:
# Define your query
query = "Tell me about the tech company known as Apple."

# Convert the query into a numerical vector that Pinecone can search with
query_embedding = pc.inference.embed(
    model="multilingual-e5-large",
    inputs=[query],
    parameters={
        "input_type": "query"
    }
)

# Search the index for the three most similar vectors
results = index.query(
    namespace="example-namespace",
    vector=query_embedding[0].values,
    top_k=3,
    include_values=False,
    include_metadata=True
)

print(results)

{'matches': [], 'namespace': 'example-namespace', 'usage': {'read_units': 1}}
