## Loading data into Pinecone

Make sure you have your Pinecone API Key and Environment URL handy.

In [None]:
## Install the client library into your Jupyter environment
!pip3 install pinecone-client
!pip3 install pandas
!pip3 install openai

In [None]:
# Import dependencies
import pinecone
import pandas as pd
import numpy as np
import openai

In [2]:
# Init the openai client
## IMPORTANT: YOUR API KEY IS SENSITIVE INFORMATION
## IMPORTANT: DELETE THE API KEY FROM THIS NOTEBOOK BEFORE UPLOADING TO GITHUB
openai.api_key = "OPENAI_API_KEY"

In [3]:
## Queries in a Vector database are done by embedding
## the query text into a vector and then using a similary search across
## the vector space. So lets say that we want to query our dataset
## for the "awesome string"

query_string = "What are the most awesome strings?"
query_vector = openai.Embedding.create(input = [query_string], model="text-embedding-ada-002")['data'][0]['embedding']

In [4]:
# Init the pinecone client
## IMPORTANT: YOUR API KEY IS SENSITIVE INFORMATION
## IMPORTANT: DELETE THE API KEY FROM THIS NOTEBOOK BEFORE UPLOADING TO GITHUB
client = pinecone.init(api_key="PINECONE_API_KEY", environment="PINECONE_ENVIRONMENT")

In [5]:
index = pinecone.Index("test")

In [6]:
rows = index.query(
  vector=query_vector,
  top_k=3,
  include_values=True
)

In [13]:
## We can see below the top 3 rows that were returned and the a score
## which represents how similar the row matches the query
for row in rows["matches"]:
    print(row["id"] + "  " + str(row["score"]))

First Line  0.847613811
Third Line  0.694703043
Second Line  0.687299669


In [None]:
## At this point you're probably wondering, how we can get back
## the original text. Thats the important part of the key 
## that we stored along side the embedding vector. This
## should correspond to a primary key in a PostgreSQL DB
## or maybe just a row ID in Airtable. The important part to
## understand is that Vector DBs act as an index. A way to 
## use natural language to search for data. The underlying storage
## should still be somewhere else for retrieval.