In [8]:
from dotenv import load_dotenv
load_dotenv()
import os 
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec



In [6]:
pc = Pinecone(api_key = os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name = "rag", dimension = 1536, metric = "cosine", spec = ServerlessSpec(cloud = "aws", region = "us-east-1")
)

In [7]:
import json 
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Sarah Johnson',
  'subject': 'Biology',
  'stars': 4,
  'review': "Dr. Johnson's lectures are engaging and informative. She's always willing to help during office hours."},
 {'professor': 'Prof. Michael Chang',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Excellent teacher! Prof. Chang explains complex concepts in a way that's easy to understand."},
 {'professor': 'Dr. Emily Rodriguez',
  'subject': 'Psychology',
  'stars': 3,
  'review': 'Dr. Rodriguez knows her subject well, but her assignments can be unclear at times.'},
 {'professor': 'Prof. David Lee',
  'subject': 'Mathematics',
  'stars': 4,
  'review': "Prof. Lee's passion for math is contagious. His problem-solving techniques are very helpful."},
 {'professor': 'Dr. Lisa Patel',
  'subject': 'Chemistry',
  'stars': 5,
  'review': "Dr. Patel's lab sessions are well-organized and her explanations are crystal clear."},
 {'professor': 'Prof. Robert Brown',
  'subject': 'History',
  'stars': 2,
  

In [9]:
processed_data = []
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input = review['review'],
        model = "text-embedding-3-small",
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding, 
        "id": review["professor"], 
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })

In [11]:
index = pc.Index('rag')
index.upsert(
    vectors = processed_data,
    namespace = "ns1"
)

{'upserted_count': 20}

In [12]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}