In [7]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec 

In [8]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag-doc", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud= "aws", region="us-east-1") 
)

In [9]:
import json
data = json.load(open("data_doctors.json"))
data['doctors']

[{'doctor': 'Dr. John Smith',
  'specialty': 'Cardiology',
  'location': 'New York, NY',
  'clinic_name': 'New York Heart Clinic',
  'clinic_location': '1234 Heart St, New York, NY 10001',
  'website': 'https://www.nyheartclinic.com',
  'Phone': '(123) 456-7890',
  'Email': 'info@nyheartclinic.com'},
 {'doctor': 'Dr. Emily Davis',
  'specialty': 'Dermatology',
  'location': 'Los Angeles, CA',
  'clinic_name': 'LA Skin Care Center',
  'clinic_location': '5678 Skin Ave, Los Angeles, CA 90001',
  'website': 'https://www.laskincare.com',
  'Phone': '(987) 654-3210',
  'Email': 'appointments@laskincare.com'},
 {'doctor': 'Dr. Michael Brown',
  'specialty': 'Orthopedics',
  'location': 'Chicago, IL',
  'clinic_name': 'Chicago Bone & Joint Hospital',
  'clinic_location': '91011 Joint Dr, Chicago, IL 60601',
  'website': 'https://www.chicagobonejoint.com',
  'Phone': '(312) 123-4567',
  'Email': 'contact@chicagobonejoint.com'},
 {'doctor': 'Dr. Linda Johnson',
  'specialty': 'Pediatrics',
  'l

In [10]:
processed_data = []
client = OpenAI()

for doctor in data['doctors']:
    response = client.embeddings.create(
        input = doctor['doctor'],
        model = "text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": doctor["doctor"],
        "metadata" : {
            "specialty": doctor["specialty"],
            "location": doctor["location"],
            "clinic_name": doctor["clinic_name"],
            "clinic_location": doctor["clinic_location"],
            "website": doctor["website"],
            "phone": doctor["Phone"],
            "email": doctor["Email"],
        }

    })

In [11]:
processed_data[0]

{'values': [0.008299129,
  0.022991437,
  -0.018744325,
  -0.004971223,
  -0.024912395,
  -0.035027426,
  0.05018497,
  0.00023062255,
  -0.007064765,
  -0.08548253,
  0.03724853,
  -0.03097541,
  -0.0025118748,
  0.018384146,
  0.029744798,
  0.018204056,
  -0.02777882,
  -0.0017014715,
  -0.056968343,
  0.03229607,
  0.015142533,
  -0.012928932,
  0.023786834,
  -0.07101533,
  -0.004911193,
  0.0024086984,
  0.024207043,
  0.010535241,
  0.027703783,
  -0.00061155425,
  0.041960876,
  -0.020770334,
  0.0357778,
  0.04733355,
  -0.024297088,
  0.0113231335,
  0.005312643,
  0.014624775,
  0.0053951843,
  -0.020680288,
  0.031485666,
  -0.0377888,
  -0.01113554,
  0.009409681,
  0.020155028,
  -0.034577202,
  0.002894565,
  -0.008089025,
  0.035237532,
  0.048894323,
  -0.013311623,
  0.007893927,
  0.009214585,
  0.027238552,
  -0.013746839,
  -0.0044872323,
  -0.00029452154,
  0.016027974,
  0.03772877,
  -0.010895421,
  0.016688302,
  0.012118529,
  -0.027178522,
  0.009927439,
  -0

In [13]:
index = pc.Index('rag-doc')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [14]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}