In [1]:
pip install python-dotenv pandas

Collecting python-dotenv
  Using cached python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting pandas
  Using cached pandas-2.3.1-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting numpy>=1.22.4 (from pandas)
  Using cached numpy-2.2.6-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached python_dotenv-1.1.1-py3-none-any.whl (20 kB)
Using cached pandas-2.3.1-cp310-cp310-win_amd64.whl (11.3 MB)
Using cached numpy-2.2.6-cp310-cp310-win_amd64.whl (12.9 MB)
Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, python-dotenv, numpy, pandas

   ---------------------------------------- 0/5 [pytz]
   ---------------------------------------- 0/5 [pytz]
   ----------------

In [2]:
import os, json, re, uuid, glob
from pathlib import Path
from typing import List, Dict

import pandas as pd
from dotenv import load_dotenv

# ---- 1. Load Environment Variables ----
# Point directly to your .env file (absolute path)
env_path = Path(r"C:\Users\kalva\AI_Projects\HomeShield_AI") / ".env"
load_dotenv(env_path)

# ---- 2. Required Variables Check ----
required_vars = [
    "AZURE_OPENAI_ENDPOINT",
    "AZURE_OPENAI_API_KEY", 
    "PINECONE_API_KEY"
]

missing = [var for var in required_vars if not os.getenv(var)]
if missing:
    raise RuntimeError(f"Missing env vars: {missing}")

# ---- 3. Path Configuration ----
POLICY_DIR = Path(r"C:\Users\kalva\AI_Projects\HomeShield_AI\policies_docs")
CUSTOMERS_CSV = Path(r"C:\Users\kalva\AI_Projects\HomeShield_AI\homeshield_sample_data\customers.csv")
EVAL_PAIRS = Path(r"C:\Users\kalva\AI_Projects\HomeShield_AI\homeshield_sample_data\evaluation_pairs.jsonl")

# ---- 4. Validation ----
print("Azure endpoint configured:", bool(os.getenv("AZURE_OPENAI_ENDPOINT")))
print("Policy directory exists:", POLICY_DIR.exists())
print("Customer data exists:", CUSTOMERS_CSV.exists())

Azure endpoint configured: True
Policy directory exists: True
Customer data exists: True


In [3]:
pip install openai pinecone

Collecting openai
  Downloading openai-1.99.9-py3-none-any.whl.metadata (29 kB)
Collecting pinecone
  Using cached pinecone-7.3.0-py3-none-any.whl.metadata (9.5 kB)
Collecting anyio<5,>=3.5.0 (from openai)
  Using cached anyio-4.10.0-py3-none-any.whl.metadata (4.0 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Using cached httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.10.0-cp310-cp310-win_amd64.whl.metadata (5.3 kB)
Collecting pydantic<3,>=1.9.0 (from openai)
  Using cached pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
Collecting sniffio (from openai)
  Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Collecting tqdm>4 (from openai)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting idna>=2.8 (from anyio<5,>=3.5.0->openai)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)

In [5]:
import os
from openai import AzureOpenAI
from pinecone import Pinecone

# ----- Secure Configuration -----
def get_env_var(name, default=None):
    """Safely get environment variable with validation"""
    value = os.environ.get(name, default)
    if value is None and default is None:
        raise ValueError(f"Missing required environment variable: {name}")
    return value

# Azure OpenAI Config
AZURE_OPENAI_ENDPOINT = get_env_var("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = get_env_var("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_API_VER = get_env_var("AZURE_OPENAI_API_VERSION")
EMBED_MODEL = get_env_var("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") 
CHAT_MODEL = get_env_var("AZURE_OPENAI_CHAT_DEPLOYMENT")

# Pinecone Config
PC_API_KEY = get_env_var("PINECONE_API_KEY")
PINECONE_INDEX = get_env_var("PINECONE_INDEX")
PINECONE_REGION = get_env_var("PINECONE_REGION")

# ----- Client Initialization -----
# Azure OpenAI Client
oai_client = AzureOpenAI(
    api_key=AZURE_OPENAI_API_KEY,
    api_version=AZURE_OPENAI_API_VER,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
)

# Pinecone Client
pc = Pinecone(api_key=PC_API_KEY)

# ----- Index Verification -----
print(f"Connecting to existing Pinecone index: {PINECONE_INDEX}")
index = pc.Index(PINECONE_INDEX)

# Verify index configuration matches your expectations
index_stats = index.describe_index_stats()
print("\nIndex Configuration:")
print(f"- Dimensions: {index_stats.dimension}")
print(f"- Metric: {index_stats.metric}")
print(f"- Vector Count: {index_stats.total_vector_count}")
print(f"- Embedding Model: {EMBED_MODEL}")

# Safety check for embedding dimension
expected_dim = 1536  # text-embedding-3-small uses 1536 dimensions
if index_stats.dimension != expected_dim:
    print(f"\nWARNING: Index dimension ({index_stats.dimension}) doesn't match expected ({expected_dim})")
    print("You may need to recreate your index with the correct dimensions")
else:
    print("\nIndex validation successful - ready for operations!")

Connecting to existing Pinecone index: homeshield-policies

Index Configuration:
- Dimensions: 1536
- Metric: cosine
- Vector Count: 0
- Embedding Model: text-embedding-3-small

Index validation successful - ready for operations!
