In [3]:
from dotenv import load_dotenv
load_dotenv()
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
import os



In [4]:
# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Create a Pinecone index
pc.create_index(
    name="rag",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

In [5]:
import json
data= json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. John Smith',
  'review': 'Great professor who explains concepts clearly and is always available during office hours.',
  'subject': 'Math',
  'stars': 5},
 {'professor': 'Dr. Emily Johnson',
  'review': 'Engaging lectures with practical examples, but the grading can be tough.',
  'subject': 'Computer Science',
  'stars': 4},
 {'professor': 'Dr. Michael Brown',
  'review': 'Provides detailed feedback on assignments, but the workload is heavy.',
  'subject': 'History',
  'stars': 3},
 {'professor': 'Dr. Sarah Davis',
  'review': 'Very supportive and understanding, especially with first-year students.',
  'subject': 'English',
  'stars': 5},
 {'professor': 'Dr. Robert Wilson',
  'review': 'Lectures can be a bit dry, but the material covered is important for the exams.',
  'subject': 'Physics',
  'stars': 3},
 {'professor': 'Dr. Jessica Lee',
  'review': 'Highly knowledgeable and passionate about the subject, but sometimes hard to follow.',
  'subject': 'Chemistry',
  '

In [6]:
processed_data = []
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'],
        model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"],
        }
    })

In [7]:
processed_data[0]

{'values': [-0.039836287,
  -0.012422336,
  0.0030790805,
  0.029334674,
  -0.0021436636,
  -0.017062003,
  0.016263781,
  0.038988177,
  -1.0072783e-05,
  -0.033076342,
  0.023535088,
  0.029733784,
  0.0040316465,
  -0.041856788,
  0.021901228,
  0.03908795,
  -0.011804961,
  0.033076342,
  0.035720453,
  0.03464784,
  0.0035670563,
  -0.025742672,
  0.013307864,
  0.0350719,
  -0.049290232,
  -0.01665042,
  0.0045087095,
  0.03522156,
  0.02163931,
  -0.001884865,
  0.08690646,
  -0.012266433,
  -0.0074459184,
  -0.039162785,
  -0.056075122,
  0.0516849,
  -0.013731919,
  0.0198932,
  0.009510072,
  0.014230808,
  -0.018433949,
  0.040335175,
  0.0020828615,
  0.014567559,
  0.038314674,
  0.0028374312,
  -0.051535234,
  -0.018770698,
  0.03442334,
  0.040983733,
  -0.03582023,
  -0.018808115,
  0.042305786,
  0.009011183,
  -0.04287951,
  0.01207935,
  0.024482977,
  -0.00044549227,
  0.037516452,
  -0.01956892,
  0.041881733,
  -0.0025115942,
  0.005587557,
  -0.004976418,
  -0.02

In [8]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
    )

{'upserted_count': 9}

In [9]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}