In [1]:
#Importing the required libraries
from dotenv import load_dotenv
load_dotenv()
from pinecone import Pinecone, ServerlessSpec
import google.generativeai as genai
import os
import json

  from tqdm.autonotebook import tqdm


In [2]:
#Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [3]:
#Create a Pinecone index
pc.create_index(
    name = "rmp-ai-assistant",
    dimension = 768,
    metric = "cosine",
    spec = ServerlessSpec(cloud="aws", region="us-east-1"),
)

In [4]:
#Load the review data
data = json.load(open("reviews.json"))

In [5]:
#Initialize Gemini Pro
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
client = genai.GenerativeModel('gemini-1.5-flash')

In [6]:
#Create embeddings for each review
processed_data = []
model = 'models/embedding-001'
for review in data["reviews"]:
    embedding = genai.embed_content(model=model, content=review["review"], task_type="retrieval_document")
    processed_data.append({
        "values": embedding["embedding"],
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"],
        }
    })

In [7]:
#Insert the embedding into the Pinecone index
index = pc.Index("rmp-ai-assistant")
upsert_response = index.upsert(vectors=processed_data, namespace="ns1")
print(f"Upserted count: {upsert_response['upserted_count']}")

Upserted count: 20


In [8]:
#Print index statistics
print(index.describe_index_stats())

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}
