# Diving Pinecone 

In [1]:
import os 
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [10]:
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(
    api_key=os.environ.get("PINECONE_API_KEY")
)


In [12]:
pc.list_indexes()

[]

In [15]:
index_name = 'langchain-pinecone'
if index_name not in pc.list_indexes():
    print(f"{index_name} index name is not in the list")
    print()
    print(f"Creating index {index_name}....")
    pc.create_index(
        index_name, 
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )
    print()
    print("Completed creating indexes.")
else:
    print(f"Index {index_name} already exists.")

langchain-pinecone index name is not in the list

Creating index langchain-pinecone....

Completed creating indexes.


In [17]:
pc.describe_index(index_name)

{
    "name": "langchain-pinecone",
    "metric": "cosine",
    "host": "langchain-pinecone-m5fta1m.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 1536,
    "deletion_protection": "disabled",
    "tags": null
}

In [18]:
"""

# deleting an index name
index_name = "langchain-pinecone"
if index_name in pc.list_indexes():
    print(f"Found an list index name {index_name}")
    take_input = input("If you want to delete the index, write delete: ")
    if take_input == "delete":
        print(f"deleting index {index_name}....")
        pc.delete_index(index_name)
        print()
        print("Completed deleting index !!")
    else:
        print(f"index {index_name} does not exist")

"""
        

'\n\n# deleting an index name\nindex_name = "langchain-pinecone"\nif index_name in pc.list_indexes():\n    print(f"Found an list index name {index_name}")\n    take_input = input("If you want to delete the index, write delete: ")\n    if take_input == "delete":\n        print(f"deleting index {index_name}....")\n        pc.delete_index(index_name)\n        print()\n        print("Completed deleting index !!")\n    else:\n        print(f"index {index_name} does not exist")\n\n'

In [19]:
print(f"connecting the index with {index_name}...")
index = pc.Index(index_name)

print(f"Completed connecting {index_name}")
print(index.describe_index_stats())

connecting the index with langchain-pinecone...
Completed connecting langchain-pinecone
{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}


In [23]:
import random 
vectors = [random.random() for _ in range(1536)]
print(len(vectors))

1536


In [27]:
vectors = [[random.random() for _ in range(1536)] for v in range(5)]
print(len(vectors[0]))

1536


In [51]:
vectors[0]

[0.9554567588020764,
 0.44791446551159686,
 0.4375962533288683,
 0.7189379624828529,
 0.7009861906353245,
 0.8148169625972299,
 0.768253646546099,
 0.762628716814208,
 0.3686423837845335,
 0.4310955431638509,
 0.7365559634267901,
 0.16474534050666623,
 0.14133964311146308,
 0.5576072610077341,
 0.6120105358086377,
 0.4728334856561388,
 0.011784984332983806,
 0.04891392845515652,
 0.3147255219492574,
 0.19423905888157744,
 0.6580939049948623,
 0.6011694657454058,
 0.19225928364992895,
 0.5421062632183979,
 0.3639980806491687,
 0.29083923378115784,
 0.9369800713834006,
 0.0795614855812996,
 0.6866220646982046,
 0.3896181420067044,
 0.8561852027617917,
 0.19474131788063298,
 0.5720529834891899,
 0.740708053218165,
 0.5890860207754385,
 0.8245378594588675,
 0.8879813429968116,
 0.6770173035815737,
 0.6447630277722856,
 0.16244225349469876,
 0.5371331790407382,
 0.29987538067302777,
 0.8096595015277157,
 0.43745021266311546,
 0.4970042017454801,
 0.0996341150366925,
 0.44763043099488575,
 0

In [52]:
vectors[1]

[0.7676067652451137,
 0.7305672362681057,
 0.4779607286333708,
 0.1849233658418833,
 0.24618903215704768,
 0.23949245090665972,
 0.15789506824293054,
 0.5563935526350373,
 0.236125196816643,
 0.9762989770748163,
 0.81318085348036,
 0.8491338623258458,
 0.984930079954797,
 0.8328518702671808,
 0.21223344117907517,
 0.4793471101310973,
 0.910874536946371,
 0.37604033224955435,
 0.8516818692441533,
 0.9297298515420628,
 0.8838161613335709,
 0.5575697083168178,
 0.1580017242071926,
 0.5988739566868717,
 0.2949887465610085,
 0.800271102242407,
 0.9378255096692837,
 0.5139283615993729,
 0.9082701279454158,
 0.8612216620733569,
 0.136657610728121,
 0.6365313438286683,
 0.9605318168396538,
 0.41047175763350774,
 0.29799657908340504,
 0.7789525327264332,
 0.4822008093602619,
 0.5978540074262974,
 0.8652599772885103,
 0.20084611475416814,
 0.8792490872589437,
 0.9487159849757146,
 0.7802113479804605,
 0.34250628728352517,
 0.25381978788597426,
 0.1530006780854959,
 0.6458391024245507,
 0.2418943

In [53]:
vectors[2]

[0.007433018316358453,
 0.3361071675203219,
 0.30904216319458766,
 0.9346856534958817,
 0.8379493497468559,
 0.612729255379023,
 0.1394185074269424,
 0.3415634348455998,
 0.7323348999219952,
 0.5746544833737934,
 0.9222469961195104,
 0.009324892635219961,
 0.24145957827221887,
 0.41125445950559647,
 0.4560913478003217,
 0.1179416312045315,
 0.6759736471702511,
 0.2366863641398974,
 0.6961605506716965,
 0.10486815020566731,
 0.8549416074592973,
 0.2943392333286913,
 0.7745571691851941,
 0.7204098368260444,
 0.04194600773545665,
 0.769515194551377,
 0.2936374428099767,
 0.3935337607285514,
 0.5968445491618996,
 0.7237684752901444,
 0.034518599799239436,
 0.9238865467830649,
 0.48189080139574114,
 0.5865814559985849,
 0.043171036087235826,
 0.22447608219177895,
 0.32796852167441837,
 0.0974306703816219,
 0.34122422750402703,
 0.0680375263084042,
 0.5696054635188635,
 0.9373624974130361,
 0.9431529677608067,
 0.06144910295152084,
 0.7121215152562808,
 0.843478358245341,
 0.8967688570039803

In [25]:
# vectors 
ids = list('abcde')


In [43]:
index.upsert(vectors=zip(ids, vectors))

{'upserted_count': 5}

In [30]:
print([('c', [0.3] * 1536)])

[('c', [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.

In [44]:
output = [('c', [0.3] * 1536)]
print(len(output))

1


In [45]:
# updating a vector 
index.upsert(vectors=[('c', [0.3] * 1536)])

{'upserted_count': 1}

In [33]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5,
 'vector_type': 'dense'}

In [34]:
index.delete(ids=['b','c'])

{}

In [46]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 5}},
 'total_vector_count': 5,
 'vector_type': 'dense'}

In [36]:
index.fetch(ids=['a', 'c'])

FetchResponse(namespace='', vectors={'a': Vector(id='a', values=[0.955456734, 0.447914451, 0.437596262, 0.718937933, 0.700986207, 0.814816952, 0.768253624, 0.762628734, 0.36864239, 0.431095541, 0.736555934, 0.164745346, 0.141339645, 0.557607234, 0.612010539, 0.472833484, 0.0117849847, 0.0489139296, 0.314725518, 0.194239065, 0.658093929, 0.601169467, 0.192259282, 0.542106271, 0.363998085, 0.290839225, 0.936980069, 0.0795614868, 0.686622083, 0.389618129, 0.856185198, 0.194741324, 0.572052956, 0.740708053, 0.589086, 0.824537873, 0.887981355, 0.677017331, 0.644763052, 0.162442252, 0.537133157, 0.299875379, 0.809659481, 0.4374502, 0.497004211, 0.0996341184, 0.447630435, 0.822763503, 0.149325624, 0.147085786, 0.405459553, 0.300112486, 0.359396607, 0.80938518, 0.877882719, 0.604908705, 0.964303851, 0.0536376946, 0.147600651, 0.82270664, 0.365199655, 0.734436214, 0.642081201, 0.51803261, 0.252872288, 0.157280236, 0.931336, 0.462126583, 0.242030188, 0.340939909, 0.865406096, 0.656972051, 0.9912

In [37]:
index.delete(delete_all=True)

{}

In [38]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}

In [58]:
queries = [[random.random() for _ in range(1536)] for v in range(2)]

In [59]:
queries

[[0.39864179160823787,
  0.33170102182345174,
  0.7444376469266724,
  0.008187219956791836,
  0.4378417826450417,
  0.02307940968833877,
  0.4822574731199658,
  0.5812231916975293,
  0.6204264914175339,
  0.1516502292576869,
  0.1063172389724315,
  0.40812324350586826,
  0.13979119550213936,
  0.7246616472108763,
  0.40436441588587557,
  0.7100251577862834,
  0.9309674874388337,
  0.9243597789084809,
  0.6080315006040183,
  0.31792580973745155,
  0.6041948771613836,
  0.1797544091242571,
  0.7359180704107292,
  0.12346857646615228,
  0.4066295492591524,
  0.9338140365031691,
  0.5413045409850028,
  0.22649079371798153,
  0.5279891670232941,
  0.1696614126036584,
  0.14831290172163358,
  0.42139179304675134,
  0.17263876891585683,
  0.9525002772051966,
  0.6084203586465392,
  0.10081636086481027,
  0.22074397536180157,
  0.46918769938079674,
  0.8782269142617158,
  0.3711352352701278,
  0.6433336352145649,
  0.060106733681917324,
  0.8243263915177194,
  0.3976349869987402,
  0.159220352

In [60]:
index.query(
    queries=queries,
    top_k=3,
    include_values=False
)

PineconeApiException: (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Sat, 20 Sep 2025 10:03:02 GMT', 'Content-Type': 'text/plain', 'Content-Length': '89', 'Connection': 'keep-alive', 'server': 'envoy'})
HTTP response body: queries[804]: invalid value 0.072556786289301511 for type type.googleapis.com/QueryVector


In [None]:
index.upsert(
vectors=[
    {
        "id": "doc1",
        "values": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
        "metadata": {"title": "my document", "category": "tech"}
    }
]
)

# multiple vectors 
vectors_to_upsert = [
    
]

In [61]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
with open('WeShallFightOnTheBeaches.txt', 'rb') as f:
    churchill_speech = f.read()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

In [65]:
# since the churchill_speech is in the bytes structure and RecursiveCharacterTextSplitter wants
# in the string. so we might need to convert it into the string 
if isinstance(churchill_speech, bytes):
    churchill_speech = churchill_speech.decode('utf-8')

# Now create the chunks
chunks = text_splitter.create_documents([churchill_speech])
print(chunks[0])

page_content='I have, myself, full confidence that if all do their duty, if nothing is neglected, and if the best'


In [68]:
print(chunks[1].page_content)

and if the best arrangements are made, as they are being made, we shall prove ourselves once again


In [69]:
print(f"Now you have {len(chunks)}")

Now you have 22


In [70]:
# Embedding Cost 
def print_embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-ada-002')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f"Total Tokens: {total_tokens}")
    print(f"Embedding Cost in USD: {total_tokens / 1000 * 0.0004:.6f}")

print_embedding_cost(chunks)

Total Tokens: 412
Embedding Cost in USD: 0.000165


In [None]:
from langchain.embeddings import GooglePalmEmbeddings
embeddings = 4