In [2]:
from dotenv import load_dotenv
load_dotenv()
import os
import google.generativeai as genai
from pinecone import Pinecone, ServerlessSpec

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension= 768, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [4]:
import json
data=json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Sarah Thompson',
  'subject': 'Calculus',
  'stars': 4,
  'review': 'Dr. Thompson explains complex topics clearly, but the exams are challenging.'},
 {'professor': 'Professor Michael Richards',
  'subject': 'Literature',
  'stars': 5,
  'review': 'One of the best literature professors. His passion for the subject is contagious.'},
 {'professor': 'Dr. Linda Carter',
  'subject': 'Chemistry',
  'stars': 3,
  'review': 'Her lectures are informative but often rushed. Labs are well organized.'},
 {'professor': 'Professor John Blake',
  'subject': 'History',
  'stars': 2,
  'review': 'Not very engaging and assignments are vague.'},
 {'professor': 'Dr. Emily Cooper',
  'subject': 'Physics',
  'stars': 4,
  'review': 'Great lecturer, but problem sets are tough.'},
 {'professor': 'Professor Alan Gray',
  'subject': 'Philosophy',
  'stars': 5,
  'review': 'Thought-provoking discussions and fair grading.'},
 {'professor': 'Dr. Maria Garcia',
  'subject': 'Biology',
  'stars': 

In [5]:
processed_data=[]
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
model = genai.GenerativeModel('gemini-1.5-flash')

for review in data['reviews']:
    response=genai.embed_content(
        model="models/text-embedding-004", 
        content=review['review'],
        )
    embedding = response['embedding']
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })

In [8]:
index=pc.Index("rag")
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [35]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}