<a href="https://colab.research.google.com/github/midshipman/chatgpt/blob/main/vector_pinecone_openai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pinecone-client openai

import os
import pinecone
import openai

PINECONE_API_KEY = "YOUR_PINECONE_API_KEY"
PINECONE_ENV = "YOUR_PINECONE_ENVIRONMENT"
OPENAI_API_KEY = "YOUR_OPENAI_API_KEY"

pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
openai.api_key = OPENAI_API_KEY

PINECONE_TABLE_NAME = "NAME-OF-YOUR-PINECONE-INDEX"

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
def create_pinecone_index(table_name, dimension=1536, metric="cosine", pod_type="p1"):
    if table_name not in pinecone.list_indexes():
        pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type)

def complete(prompt):
    res = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    return res['choices'][0]['message']['content'].strip()

def get_ada_embedding(text):
    text = text.replace("\n", " ")
    return openai.Embedding.create(input=text, model="text-embedding-ada-002")["data"][0]["embedding"]

def upsert_to_index(index, texts):
    pinecone_vectors = []
    for loopIndex, text in enumerate(texts, start=1):
        pinecone_vectors.append(("test-openai-"+str(loopIndex), get_ada_embedding(text), {"text": text}))
    index.upsert(vectors=pinecone_vectors)

def query_index(index, query_text, top_k=3):
    q_embedding = get_ada_embedding(query_text)
    pineQ = index.query(q_embedding, top_k=top_k, include_values=False, include_metadata=True)
    return pineQ

def print_results(pineQ):
    print(f"\033[36m" + str(pineQ) + "\033[0m")
    print("\n")
    for match in pineQ.matches:
        print(f"\033[1m\033[32m" + match.metadata['text'] + " (" + str(round(match.score*100,2)) + "%)" + "\033[0m")


In [None]:
def main():
    create_pinecone_index(PINECONE_TABLE_NAME)
    index = pinecone.Index(PINECONE_TABLE_NAME)

    """ 
    texts = [
        "AI Agents as virtual employees are the future",
        "Vector Databases are the future",
        "AGI is not here....yet."
    ] 
    
    upsert_to_index(index, texts)
    """

    query_text = "are vector dbs the future?"
    results = query_index(index, query_text)
    print_results(results)

In [None]:
main()

[36m{'matches': [{'id': 'test-openai-2',
              'metadata': {'text': 'Vector Databases are the future'},
              'score': 0.917166531,
              'values': []},
             {'id': 'test-openai-1',
              'metadata': {'text': 'AI Agents as virtual employees are the '
                                   'future'},
              'score': 0.774859846,
              'values': []},
             {'id': 'test-openai-3',
              'metadata': {'text': 'AGI is not here....yet.'},
              'score': 0.712659299,
              'values': []}],
 'namespace': ''}[0m


[1m[32mVector Databases are the future (91.72%)[0m
[1m[32mAI Agents as virtual employees are the future (77.49%)[0m
[1m[32mAGI is not here....yet. (71.27%)[0m


In [None]:
def fetch_vector(index, vector_id):
    return index.fetch(ids=[vector_id])

index = pinecone.Index(PINECONE_TABLE_NAME)
# print(fetch_vector(index, 'test-openai-3'))

index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}

In [None]:
# index.delete(ids=["test-openai-1", "test-openai-2", "test-openai-3"])