In [13]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [9]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [11]:
import json 
data = json.load(open("reviews.json"))
data["reviews"]

[{'professor': 'Dr. Emily Johnson',
  'subject': 'Biology',
  'stars': 4,
  'review': "Dr. Johnson's lectures are engaging and well-structured. She really knows how to make complex concepts understandable."},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Brilliant professor! His passion for coding is contagious. Challenging assignments, but you'll learn a ton."},
 {'professor': 'Dr. Sarah Thompson',
  'subject': 'Psychology',
  'stars': 3,
  'review': 'Decent lecturer, but the course material could be more up-to-date. Office hours are helpful.'},
 {'professor': 'Prof. David Martinez',
  'subject': 'History',
  'stars': 4,
  'review': 'Prof. Martinez brings history to life with his storytelling. Exams are tough but fair.'},
 {'professor': 'Dr. Rachel Kim',
  'subject': 'Chemistry',
  'stars': 5,
  'review': "Best chemistry professor I've had! Clear explanations and great lab sessions. Always available for extra help."},
 {'professor': '

In [16]:
processed_data = []
client = OpenAI()

for review in data["reviews"]:
    response = client.embeddings.create(
        input = review["review"],
        model = "text-embedding-3-small",
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]

        }
    })

In [17]:
processed_data[0]

{'values': [0.00823426,
  -0.0067942156,
  0.022698136,
  0.027100736,
  0.01735665,
  -0.005208264,
  0.0014781066,
  0.048821926,
  0.005090904,
  0.013347367,
  0.046335153,
  -0.003324154,
  -0.01630358,
  0.013233178,
  0.025400596,
  0.019462794,
  -0.04780692,
  -0.02687236,
  0.059276517,
  0.06592482,
  0.04704566,
  -0.015542323,
  0.03207428,
  -0.014121311,
  -0.031363774,
  -0.04841592,
  0.021822691,
  0.0152124455,
  0.03212503,
  0.009693335,
  0.055013478,
  0.0012243544,
  -0.039788347,
  -0.039534595,
  -0.024169898,
  0.018130595,
  -0.0054302975,
  0.003907784,
  0.019526232,
  0.012078606,
  0.03052639,
  -0.0054176096,
  -0.034205798,
  -0.0013274413,
  0.05846451,
  0.017001398,
  0.008595856,
  -0.005798238,
  0.046208277,
  0.04339163,
  -0.034434177,
  0.013360054,
  0.005791894,
  -0.025908101,
  -0.028318746,
  -0.01900604,
  -0.007929756,
  0.05633299,
  -0.02213988,
  -0.029333755,
  0.03648957,
  -0.00027377484,
  -0.0149206305,
  0.008652951,
  -0.03156

In [18]:
index = pc.Index("rag")
index.upsert(
    vectors=processed_data,
    namespace="ms1"
)

{'upserted_count': 20}

In [19]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ms1': {'vector_count': 20}},
 'total_vector_count': 20}