In [6]:
from dotenv import load_dotenv
load_dotenv()
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
import os
import json

In [11]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [12]:
pc.create_index(
    name="rag",
    dimension=1536,
    metric='cosine',
    spec=ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
)

In [13]:
data = json.load(open("reviews.json"))

In [15]:
processed_data = []
client = OpenAI()

In [17]:
for review in data:
    response = client.embeddings.create(
        input = review['review'], 
        model = 'text-embedding-3-small'
    )
    embedding = response.data[0].embedding
    processed_data.append(
        {
            "values": embedding,
            "id": review['professor'],
            "metadata": {
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"]
            }
        }
    )

In [19]:
from pprint import pprint

In [21]:
index = pc.Index("rag")

In [22]:
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1"
)
print(f"Upserted count: {upsert_response['upserted_count']}")

Upserted count: 20


In [23]:
print(index.describe_index_stats())

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}
