In [2]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec 


In [11]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="professor-chatbot",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region='us-east-1'),
)

In [12]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Sarah Thompson',
  'subject': 'Biology',
  'stars': 4,
  'review': "Dr. Thompson's lectures are engaging and well-structured. She's always willing to help during office hours."},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Brilliant instructor! Prof. Chen's passion for coding is contagious. His projects are challenging but rewarding."},
 {'professor': 'Dr. Emily Rodriguez',
  'subject': 'Psychology',
  'stars': 3,
  'review': 'Dr. Rodriguez knows her stuff, but her exams can be overly difficult. More practice questions would be helpful.'},
 {'professor': 'Prof. David Anderson',
  'subject': 'History',
  'stars': 4,
  'review': 'Prof. Anderson brings history to life with his storytelling. Sometimes goes off-topic, but always interesting.'},
 {'professor': 'Dr. Lisa Patel',
  'subject': 'Chemistry',
  'stars': 5,
  'review': 'Dr. Patel is an amazing teacher. Her lab demonstrations are top-notch and she explains comp

In [13]:
professed_data = []
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input = review['review'],
        model = "text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    professed_data.append({
        'values': embedding,
        'id': review['professor'],
        'metadata': {
            'review': review['review'],
            'subject': review['subject'],
            'stars': review['stars']
        }
    })

In [14]:
professed_data[0]

{'values': [0.01592425,
  0.0067771273,
  0.03219468,
  0.047666237,
  0.009080552,
  0.027241655,
  0.028413339,
  0.01223611,
  -0.008368221,
  0.004254012,
  -0.0028143718,
  0.015870994,
  0.025031433,
  -0.038905237,
  0.0056653586,
  0.037919957,
  0.01774835,
  -0.02206228,
  0.02511132,
  0.057891846,
  0.042180628,
  -0.011903245,
  0.03586951,
  -0.034431536,
  -0.04689399,
  -0.048438486,
  -0.0004150408,
  0.026309634,
  -0.017668463,
  0.0036182404,
  0.062152512,
  -0.031422436,
  0.010638359,
  -0.03328648,
  -0.0541105,
  0.043805007,
  -0.0064242906,
  0.017948069,
  0.033126704,
  0.00448036,
  0.042473547,
  0.01676307,
  -0.013447737,
  0.01302167,
  0.009479989,
  -0.028786147,
  -0.030410528,
  -0.007935497,
  0.03823951,
  0.01777498,
  -0.03618906,
  0.027294913,
  0.020251494,
  -0.02395295,
  -0.08068643,
  0.022754638,
  -0.008268361,
  0.019332787,
  0.021103628,
  -0.038958494,
  0.041887704,
  -0.010112433,
  0.005492269,
  -0.01792144,
  -0.037999846,
  -

In [15]:
index = pc.Index('professor-chatbot')
index.upsert(
    vectors = professed_data,
    namespace = 'ns1'
)

{'upserted_count': 20}