In [72]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [63]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag-m", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [64]:
import json
data = json.load(open("reviews.json"))
data["reviews"]

[{'professor': 'Dr. Alice Johnson',
  'subject': 'Biology',
  'stars': 4,
  'review': 'Dr. Johnson is very knowledgeable and passionate about the subject. The classes are engaging, but the workload can be heavy.'},
 {'professor': 'Prof. Robert Smith',
  'subject': 'Mathematics',
  'stars': 5,
  'review': 'Prof. Smith explains complex concepts clearly and is always available for help. Highly recommended!'},
 {'professor': 'Dr. Emily Davis',
  'subject': 'History',
  'stars': 3,
  'review': 'Dr. Davis is well-organized, but the lectures can be a bit dry. More interactive elements would be helpful.'},
 {'professor': 'Dr. Michael Brown',
  'subject': 'Physics',
  'stars': 4,
  'review': 'Dr. Brown makes difficult topics more understandable with real-world examples. Tests are challenging but fair.'},
 {'professor': 'Prof. Sarah Wilson',
  'subject': 'Chemistry',
  'stars': 2,
  'review': "Prof. Wilson's classes are difficult to follow, and there is little feedback on assignments."},
 {'prof

In [68]:
processed_data = []
client = OpenAI()

# Create embeddings for each review
for review in data["reviews"]:
    response = client.embeddings.create(
        input=review['review'], model="text-embedding-ada-002"
    )
    embedding = response.data[0].embedding
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata":{
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )


In [66]:
index = pc.Index("rag-m")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")


Upserted count: 21


In [67]:
print(index.describe_index_stats())

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 21}},
 'total_vector_count': 21}
