In [None]:
from dotenv import load_dotenv
load_dotenv()

import os
import google.generativeai as genai
import json
from pinecone import Pinecone, ServerlessSpec


In [None]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag1",
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [None]:
import json
data = json.load(open("reviews.json"))
data['reviews']

In [None]:
processed_data = []
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

for review in data["reviews"]:
    response = genai.embed_content(
        content=review['review'], 
        model="models/text-embedding-004"
    )
    embedding = response.get('embedding', None)

    if embedding is not None:
        processed_data.append(
          {
            "values": embedding,
            "id": review["professor"],
            "metadata":{
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )
    else:
        print("Error: 'embedding' key not found in response")

with open("processed_data.json", "w") as outfile:
    json.dump(processed_data, outfile, indent = 4)

In [None]:
index = pc.Index('rag1')
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")

In [None]:
index.describe_index_stats()