In [2]:
from dotenv import load_dotenv
load_dotenv()
import os
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
from pinecone import Pinecone, ServerlessSpec
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

  from tqdm.autonotebook import tqdm


In [None]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
   name="aiprofessor", dimension=768, metric="cosine", spec=ServerlessSpec(cloud="aws", region='us-east-1')
)

In [4]:
import json
data = json.load(open('reviews.json'))

In [5]:
processed_data = []
client = HuggingFaceEmbeddings()

for review in data['reviews']:
   embedding = client.embed_query(review['review'])

   processed_data.append({
      "values": embedding,
      "id": review["professor"],
      "metadata": {
         "review": review["review"],
         "department": review["department"],
         "teaching_quality": review["teaching_quality"],
         "workload": review["workload"],
         "difficulty": review["difficulty"],
         "overall_rating": review["overall_rating"],
      }
   })

   



In [6]:
processed_data[0]


{'values': [0.017434455454349518,
  0.07394008338451385,
  -0.004039899446070194,
  0.013087536208331585,
  0.00628536706790328,
  0.03147275000810623,
  -0.013149258680641651,
  0.0073314243927598,
  -0.04639557749032974,
  0.037899624556303024,
  -0.021421300247311592,
  -0.006698096636682749,
  -0.013563210144639015,
  -0.010219656862318516,
  -0.020973095670342445,
  -0.0514836423099041,
  -0.0013711462961509824,
  0.055241659283638,
  -0.03693746030330658,
  -0.0026855014730244875,
  -0.0024509995710104704,
  0.004839594475924969,
  -0.04981016367673874,
  -0.014707223512232304,
  0.029904132708907127,
  -3.106467556790449e-05,
  0.012841638177633286,
  0.013946257531642914,
  0.03102918155491352,
  0.03752608224749565,
  0.006209965329617262,
  0.028190860524773598,
  -0.04783743992447853,
  0.039105795323848724,
  1.877981958386954e-06,
  0.006335105746984482,
  -0.0191709753125906,
  0.0005332417786121368,
  -0.023446721956133842,
  0.004853643011301756,
  0.03079759143292904,


In [40]:
index = pc.Index('aiprofessor')

index.upsert(
   vectors=processed_data,
   namespace='ns1'
)



{'upserted_count': 30}

In [41]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 30}},
 'total_vector_count': 30}