In [13]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec


In [10]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
  name="rag", 
  dimension=1536, # make sure this matches the dimension of your embeddings in the OpenAI API
  metric="cosine",
  spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)



In [12]:
import json
data = json.load(open("reviews.json"))
data["reviews"]

[{'professor': 'Dr. Emily Johnson',
  'subject': 'Physics',
  'stars': 4,
  'review': 'Dr. Johnson explains complex concepts clearly and is always willing to help during office hours. Her enthusiasm for physics is contagious!'},
 {'professor': 'Prof. Michael Chang',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Brilliant instructor! Prof. Chang's lectures are engaging and his programming assignments are challenging but rewarding. Highly recommended!"},
 {'professor': 'Dr. Sarah Martinez',
  'subject': 'Biology',
  'stars': 3,
  'review': 'Dr. Martinez knows her subject well, but her lectures can be a bit dry. The lab sessions are interesting though.'},
 {'professor': 'Prof. David Wilson',
  'subject': 'History',
  'stars': 2,
  'review': 'Prof. Wilson seems knowledgeable but often goes off-topic during lectures. Grading can be inconsistent and feedback is minimal.'},
 {'professor': 'Dr. Lisa Thompson',
  'subject': 'Psychology',
  'stars': 5,
  'review': 'Dr. Thompson is 

In [17]:
processed_data = []
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

for review in data["reviews"]:
  response = client.embeddings.create(
    input=review['review'],
    model="text-embedding-3-small",
  )
  embedding = response.data[0].embedding
  processed_data.append({
    "values": embedding,
    "id": review['professor'],
    "metadata": {
      "review": review["review"],
      "subject": review["subject"],
      "stars": review["stars"]
    }
  })

In [19]:
index = pc.Index('rag')
index.upsert(
  vectors=processed_data,
  namespace="ns1",
)

{'upserted_count': 5}

In [20]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 5}},
 'total_vector_count': 5}