In [None]:
pip install --upgrade pip

In [None]:
!pip install pinecone-client pandas

In [None]:
from pinecone import Pinecone
import os

In [None]:
# initialize connection to pinecone (get API key at app.pc.io)
api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'
environment = os.environ.get('PINECONE_ENVIRONMENT') or 'PINECONE_ENVIRONMENT'

# configure client
pc = Pinecone(api_key=api_key)

In [None]:
pc.list_indexes()

In [None]:
# List indexes
indexes = pc.list_indexes()
print(indexes[0].name)

In [None]:
# Describe index
pc.describe_index(indexes[0].name)

In [None]:
#create index
new_index_name = "stock-prices"
dimension = 3
metric = "cosine"

In [None]:
# if exists delete
for index in pc.list_indexes().indexes:
    if index.name == new_index_name:
        pc.delete_index(new_index_name)

In [None]:
from pinecone import PodSpec
pc.create_index(
    name=new_index_name, 
    dimension=dimension, 
    metric=metric, 
    spec=PodSpec(
        environment="gcp-starter",
        pod_type="starter",
        pods=1,
        replicas=1,
        shards=1
))

In [None]:
pc.describe_index(new_index_name)

In [None]:
index = pc.Index(name=new_index_name)

In [None]:
index

In [None]:
# Insert some data
# [(id1, vecvtor1), (id2, vector2)]
index.upsert(
    [
        ("TSLA", [1., 1., 1.]),
        ("IBM",  [1., 2., 3.]),
        ("GM",   [2., 1., 1.]),
        ("AMZN", [3., 2., 1.]),
        ("C",    [2., 2., 2.]),
    ]
)

In [None]:
index.describe_index_stats()

In [None]:
# Add meta data
index.upsert(
    [
        ("TSLA", [1., 1., 1.], {"stock_name":"Tesla", "sector":"automotive"}),
        ("C", [1., 1., 1.], {"stock_name":"Citi", "sector":"automotive"}),
    ]
)

In [None]:
# insert data from a dataframe
data = {
    "id": ["NIO","v"],
    "vector": [
        [4., 4., 4.],
        [2., 2., 5.]
    ]
}
data

In [None]:
import pandas as pd
df = pd.DataFrame(data)
df

In [None]:
index.upsert(zip(df.id, df.vector))

In [None]:
index.describe_index_stats()

In [None]:
# Query the vector data
index.query(
  vector=[2., 2., 4.],
  top_k=1,
  include_values=True
)

In [None]:
# by meta data
index.query(
    vector=[2., 2., 4.],
    filter={
        #"stock_name": {"$eq": "Tesla"},
        # "sector": "automotive",
        "sector": {"$eq": "automotive"},
    },
    top_k=3,
    include_metadata=True
)