In [2]:
from dotenv import load_dotenv 
load_dotenv()
import os
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI

api_key = os.environ.get("PINECONE_API_KEY")


  from tqdm.autonotebook import tqdm


In [3]:
pc = Pinecone(api_key=api_key)
spec = ServerlessSpec(cloud='aws', region='us-east-1')
pc.create_index(
        name='forest-rag',
        dimension=1536,
        metric="cosine",
        spec=spec
    )

In [None]:
import json
data = json.load(open('animals.json'))
data['animals']

In [18]:
processed_data = []
client = OpenAI()
for animal in data['animals']:
    response = client.embeddings.create(
        input = animal['description'],
        model = "text-embedding-3-small",
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": animal["species"],
        "metadata": {
            "scientific_name": animal["scientific_name"],
            "category": animal["category"],
            "description": animal["description"],
            "traits": animal["traits"]
        }
    })

In [58]:
processed_data = []
client = OpenAI()
for review in data['reviews']:
    response = client.embeddings.create(
        input = review['review'],
        model = "text-embedding-3-small",
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review["state"],
        "metadata": {
            "year": review["year"],
            "description": review["description"],
            "link": review["link"]
        }
    })

In [19]:
index = pc.Index('forest-rag')
index.upsert(
    vectors = processed_data,
    namespace='ns1'
)

{'upserted_count': 55}

In [16]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 32}},
 'total_vector_count': 32}