## Initializations

In [16]:
from openai import OpenAI
from supabase import create_client
from dotenv import load_dotenv
import pandas as pd
import json
import os


load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

## Functions

### Embed First 3 Benefits Chunk

In [None]:
def embed_benefits_chunks():
    """
    Embed the first 3 chunks from benefits_wellbeing_with_context.json 
    into Supabase test_chunks table (only if they don't already exist)
    """
    
    # Initialize clients
    print("Initializing clients...")
    openai_client = OpenAI(api_key=OPENAI_API_KEY)
    supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
    
    # Check what chunks already exist
    print("Checking for existing chunks...")
    try:
        existing_chunks = supabase.table("test_chunks").select("source_file, chunk_index, chunk_heading").execute()
        existing_set = set()
        for chunk in existing_chunks.data:
            key = (chunk['source_file'], chunk['chunk_index'])
            existing_set.add(key)
            print(f"  📋 Found existing: {chunk['chunk_heading']} (index {chunk['chunk_index']})")
        
        print(f"📊 Found {len(existing_chunks.data)} existing chunks in database")
        
    except Exception as e:
        print(f"❌ Error checking existing chunks: {e}")
        existing_set = set()
    
    # Load the benefits data
    print("\nLoading benefits data...")
    with open("data/benefits_wellbeing_with_context.json", "r") as f:
        benefits_data = json.load(f)
    
    # Take first 3 chunks for testing
    test_chunks = benefits_data[:3]
    print(f"📊 Processing {len(test_chunks)} chunks...")
    
    # Track what we actually process
    processed_count = 0
    skipped_count = 0
    
    # Process each chunk
    for i, chunk in enumerate(test_chunks):
        source_file = "benefits_wellbeing_with_context.json"
        chunk_key = (source_file, i)
        
        print(f"\nProcessing chunk {i+1}: {chunk['chunk_heading']}")
        
        # Check if this chunk already exists
        if chunk_key in existing_set:
            print(f"Skipping - chunk already exists in database")
            skipped_count += 1
            continue
        
        # Prepare content for embedding (combine heading + text for better context)
        embedding_content = f"{chunk['chunk_heading']}\n\n{chunk['text']}"
        
        # Generate embedding
        print(f"🧠 Generating embedding for '{chunk['chunk_heading']}'...")
        try:
            response = openai_client.embeddings.create(
                model="text-embedding-3-small",
                input=embedding_content
            )
            embedding = response.data[0].embedding
            print(f"✅ Generated embedding with {len(embedding)} dimensions")
            
        except Exception as e:
            print(f"❌ Error generating embedding: {e}")
            continue
        
        # Prepare data for insertion
        chunk_data = {
            "source_file": source_file,
            "chunk_index": i,
            "chunk_heading": chunk["chunk_heading"],
            "content": chunk["text"],
            "situational_context": chunk["situational_context"],
            "embedding": embedding
        }
        
        # Insert into Supabase
        print(f"💾 Inserting chunk into Supabase...")
        try:
            result = supabase.table("test_chunks").insert(chunk_data).execute()
            print(f"✅ Successfully inserted chunk: {chunk['chunk_heading']}")
            processed_count += 1
            
        except Exception as e:
            print(f"❌ Error inserting into Supabase: {e}")
            continue
    
    # Summary
    print(f"\n🎉 Processing complete!")
    print(f"   ✅ Newly embedded: {processed_count} chunks")
    print(f"   ⏭️  Skipped existing: {skipped_count} chunks")
    print(f"   📊 Total chunks: {processed_count + skipped_count}")
    
    # Test a simple query
    print("\n🔍 Final database state...")
    try:
        test_query = supabase.table("test_chunks").select("*").execute()
        print(f"📊 Total chunks in database: {len(test_query.data)}")
        for chunk in test_query.data:
            print(f"  - {chunk['chunk_heading']} (ID: {chunk['id'][:8]}...)")
            
    except Exception as e:
        print(f"❌ Error testing retrieval: {e}")


In [6]:
def test_similarity_search(query_text="health insurance plans"):
    """
    Similarity search that handles embedding data types correctly
    """
    print(f"\n🔍 Testing similarity search for: '{query_text}'")
    
    # Initialize clients
    openai_client = OpenAI(api_key=OPENAI_API_KEY)
    supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
    
    try:
        # Generate embedding for query
        print("🧠 Generating query embedding...")
        response = openai_client.embeddings.create(
            model="text-embedding-3-small",
            input=query_text
        )
        query_embedding = response.data[0].embedding
        
        # Get all chunks
        print("🔍 Retrieving chunks from database...")
        all_chunks = supabase.table("test_chunks").select("*").execute()
        print(f"📊 Retrieved {len(all_chunks.data)} chunks for similarity comparison")
        
        # Debug: Check what type the embedding is
        if all_chunks.data:
            sample_embedding = all_chunks.data[0]['embedding']
            print(f"🔍 Debug - Embedding type: {type(sample_embedding)}")
            print(f"🔍 Debug - Embedding preview: {str(sample_embedding)[:100]}...")
        
        # Calculate similarities with proper type handling
        import numpy as np
        similarities = []
        
        for chunk in all_chunks.data:
            if chunk['embedding']:
                # Handle different embedding formats from Supabase
                chunk_embedding = chunk['embedding']
                
                # Convert to numpy array if it's a list or string
                if isinstance(chunk_embedding, list):
                    chunk_embedding = np.array(chunk_embedding)
                elif isinstance(chunk_embedding, str):
                    # Try parsing as JSON array
                    import json
                    try:
                        chunk_embedding = np.array(json.loads(chunk_embedding))
                    except:
                        print(f"❌ Could not parse embedding for {chunk['chunk_heading']}")
                        continue
                else:
                    # Assume it's already a numpy array or compatible
                    chunk_embedding = np.array(chunk_embedding)
                
                # Ensure query embedding is also numpy array
                query_embedding_np = np.array(query_embedding)
                
                # Calculate cosine similarity (dot product of normalized vectors)
                # For now just use dot product for simplicity
                similarity = np.dot(query_embedding_np, chunk_embedding)
                
                similarities.append({
                    'chunk': chunk,
                    'similarity': float(similarity)  # Ensure it's a regular float
                })
        
        # Sort by similarity
        similarities.sort(key=lambda x: x['similarity'], reverse=True)
        
        print(f"\n🎯 Top matches for '{query_text}':")
        for i, match in enumerate(similarities[:3]):
            chunk = match['chunk']
            score = match['similarity']
            print(f"  {i+1}. {chunk['chunk_heading']} (similarity: {score:.3f})")
            
    except Exception as e:
        print(f"❌ Error in similarity search: {e}")
        import traceback
        print(f"Full traceback: {traceback.format_exc()}")


### Embedding & Testing

In [4]:
# Test the smart embedding function (will skip existing chunks)
embed_benefits_chunks()


🔧 Initializing clients...
🔍 Checking for existing chunks...
  📋 Found existing: Leaves (index 1)
  📋 Found existing: Health Benefits (index 0)
  📋 Found existing: Perks (index 2)
📊 Found 3 existing chunks in database

📂 Loading benefits data...
📊 Processing 3 chunks...

🔄 Processing chunk 1: Health Benefits
⏭️  Skipping - chunk already exists in database

🔄 Processing chunk 2: Leaves
⏭️  Skipping - chunk already exists in database

🔄 Processing chunk 3: Perks
⏭️  Skipping - chunk already exists in database

🎉 Processing complete!
   ✅ Newly embedded: 0 chunks
   ⏭️  Skipped existing: 3 chunks
   📊 Total chunks: 3

🔍 Final database state...
📊 Total chunks in database: 3
  - Leaves (ID: a64140ab...)
  - Health Benefits (ID: fd4e01a2...)
  - Perks (ID: d62a02fe...)


In [7]:
# Test similarity search
test_similarity_search("health insurance plans")
test_similarity_search("vacation time off")


🔍 Testing similarity search for: 'health insurance plans'
🧠 Generating query embedding...
🔍 Retrieving chunks from database...
📊 Retrieved 3 chunks for similarity comparison
🔍 Debug - Embedding type: <class 'str'>
🔍 Debug - Embedding preview: [-0.016314207,0.03778174,0.030891964,0.039434165,-0.038173843,-0.025262512,-0.018246705,-0.004253596...

🎯 Top matches for 'health insurance plans':
  1. Health Benefits (similarity: 0.489)
  2. Leaves (similarity: 0.173)
  3. Perks (similarity: 0.137)

🔍 Testing similarity search for: 'vacation time off'
🧠 Generating query embedding...
🔍 Retrieving chunks from database...
📊 Retrieved 3 chunks for similarity comparison
🔍 Debug - Embedding type: <class 'str'>
🔍 Debug - Embedding preview: [-0.016314207,0.03778174,0.030891964,0.039434165,-0.038173843,-0.025262512,-0.018246705,-0.004253596...

🎯 Top matches for 'vacation time off':
  1. Leaves (similarity: 0.407)
  2. Perks (similarity: 0.270)
  3. Health Benefits (similarity: 0.200)


## Embed Entire Contextual Benefits Doc

### Multi-Document Embedding Function


In [None]:
def embed_multiple_documents(document_files, table_name="faq_docs"):
    """
    Embed multiple JSON documents into Supabase table with duplicate checking
    
    Args:
        document_files: List of file paths to JSON documents
        table_name: Supabase table name to insert into
    """
    
    # Initialize clients
    print("🔧 Initializing clients...")
    openai_client = OpenAI(api_key=OPENAI_API_KEY)
    supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
    
    # Check what chunks already exist across all documents
    print("🔍 Checking for existing chunks...")
    try:
        existing_chunks = supabase.table(table_name).select("source_file, chunk_index, chunk_heading").execute()
        existing_set = set()
        existing_by_file = {}
        
        for chunk in existing_chunks.data:
            key = (chunk['source_file'], chunk['chunk_index'])
            existing_set.add(key)
            
            # Track by file for reporting
            file_name = chunk['source_file']
            if file_name not in existing_by_file:
                existing_by_file[file_name] = []
            existing_by_file[file_name].append(chunk['chunk_heading'])
        
        print(f"📊 Found {len(existing_chunks.data)} existing chunks in database")
        for file_name, headings in existing_by_file.items():
            print(f"  📋 {file_name}: {len(headings)} chunks")
        
    except Exception as e:
        print(f"❌ Error checking existing chunks: {e}")
        existing_set = set()
        existing_by_file = {}
    
    # Process each document file
    total_processed = 0
    total_skipped = 0
    total_errors = 0
    
    for doc_file in document_files:
        print(f"\n📂 Processing document: {doc_file}")
        
        # Load the document data
        try:
            with open(doc_file, "r") as f:
                document_data = json.load(f)
            print(f"📊 Loaded {len(document_data)} chunks from {doc_file}")
        except Exception as e:
            print(f"❌ Error loading {doc_file}: {e}")
            total_errors += 1
            continue
        
        # Track progress for this document
        doc_processed = 0
        doc_skipped = 0
        doc_errors = 0
        
        # Process each chunk in the document
        for i, chunk in enumerate(document_data):
            source_file = doc_file.split('/')[-1]  # Get just the filename
            chunk_key = (source_file, i)
            
            print(f"\n  🔄 Processing chunk {i+1}/{len(document_data)}: {chunk.get('chunk_heading', 'Untitled')}")
            
            # Check if this chunk already exists
            if chunk_key in existing_set:
                print(f"  ⏭️  Skipping - chunk already exists in database")
                doc_skipped += 1
                continue
            
            # Validate chunk structure
            if 'text' not in chunk:
                print(f"  ❌ Skipping - chunk missing 'text' field")
                doc_errors += 1
                continue
            
            # Prepare content for embedding
            heading = chunk.get('chunk_heading', 'Untitled')
            content = chunk['text']
            embedding_content = f"{heading}\n\n{content}"
            
            # Generate embedding
            print(f"  🧠 Generating embedding...")
            try:
                response = openai_client.embeddings.create(
                    model="text-embedding-3-small",
                    input=embedding_content
                )
                embedding = response.data[0].embedding
                print(f"  ✅ Generated embedding with {len(embedding)} dimensions")
                
            except Exception as e:
                print(f"  ❌ Error generating embedding: {e}")
                doc_errors += 1
                continue
            
            # Prepare data for insertion
            chunk_data = {
                "source_file": source_file,
                "chunk_index": i,
                "chunk_heading": heading,
                "content": content,
                "situational_context": chunk.get("situational_context", ""),
                "embedding": embedding
            }
            
            # Insert into Supabase
            print(f"  💾 Inserting chunk into Supabase...")
            try:
                result = supabase.table(table_name).insert(chunk_data).execute()
                print(f"  ✅ Successfully inserted: {heading}")
                doc_processed += 1
                
            except Exception as e:
                print(f"  ❌ Error inserting into Supabase: {e}")
                doc_errors += 1
                continue
        
        # Document summary
        print(f"\n📋 Document '{doc_file}' summary:")
        print(f"   ✅ Newly embedded: {doc_processed} chunks")
        print(f"   ⏭️  Skipped existing: {doc_skipped} chunks")
        print(f"   ❌ Errors: {doc_errors} chunks")
        
        # Update totals
        total_processed += doc_processed
        total_skipped += doc_skipped
        total_errors += doc_errors
    
    # Final summary
    print(f"\n🎉 Multi-document processing complete!")
    print(f"   📁 Documents processed: {len(document_files)}")
    print(f"   ✅ Total newly embedded: {total_processed} chunks")
    print(f"   ⏭️  Total skipped existing: {total_skipped} chunks")
    print(f"   ❌ Total errors: {total_errors} chunks")
    
    # Final database state
    print(f"\n🔍 Final database state...")
    try:
        final_query = supabase.table(table_name).select("source_file, chunk_heading").execute()
        print(f"📊 Total chunks in '{table_name}' table: {len(final_query.data)}")
        
        # Group by source file
        by_file = {}
        for chunk in final_query.data:
            file_name = chunk['source_file']
            if file_name not in by_file:
                by_file[file_name] = []
            by_file[file_name].append(chunk['chunk_heading'])
        
        for file_name, headings in by_file.items():
            print(f"  📋 {file_name}: {len(headings)} chunks")
            
    except Exception as e:
        print(f"❌ Error querying final state: {e}")


### Test Multi-Document Embedding


In [6]:
# Example usage function
def embed_all_available_documents():
    """
    Embed all available documents in the data directory
    """
    import os
    
    # List of documents to process
    document_files = [
        "data/benefits_wellbeing_with_context.json",
        "data/employee_handbook_with_context.json"
    ]
    
    # Filter to only existing files
    existing_files = []
    for file_path in document_files:
        if os.path.exists(file_path):
            existing_files.append(file_path)
            print(f"✅ Found: {file_path}")
        else:
            print(f"❌ Missing: {file_path}")
    
    if not existing_files:
        print("❌ No document files found!")
        return
    
    # Embed all documents
    embed_multiple_documents(existing_files, table_name="faq_docs")

In [7]:
# Doing the embeddings
embed_all_available_documents()

✅ Found: data/benefits_wellbeing_with_context.json
✅ Found: data/employee_handbook_with_context.json
🔧 Initializing clients...
🔍 Checking for existing chunks...
📊 Found 5 existing chunks in database
  📋 benefits_wellbeing_with_context.json: 5 chunks

📂 Processing document: data/benefits_wellbeing_with_context.json
📊 Loaded 5 chunks from data/benefits_wellbeing_with_context.json

  🔄 Processing chunk 1/5: Health Benefits
  ⏭️  Skipping - chunk already exists in database

  🔄 Processing chunk 2/5: Leaves
  ⏭️  Skipping - chunk already exists in database

  🔄 Processing chunk 3/5: Perks
  ⏭️  Skipping - chunk already exists in database

  🔄 Processing chunk 4/5: 401k & Financial Benefits
  ⏭️  Skipping - chunk already exists in database

  🔄 Processing chunk 5/5: Time Off &  Holidays
  ⏭️  Skipping - chunk already exists in database

📋 Document 'data/benefits_wellbeing_with_context.json' summary:
   ✅ Newly embedded: 0 chunks
   ⏭️  Skipped existing: 5 chunks
   ❌ Errors: 0 chunks

📂 Proc

## Job Descriptions

# Test

In [17]:
import re

def clean_html(text):
    """Remove HTML tags from text (reused from greenhouse.ipynb)"""
    if pd.isna(text):
        return ""
    # Remove HTML tags
    clean = re.sub('<.*?>', '', str(text))
    # Replace common HTML entities
    clean = clean.replace('&nbsp;', ' ').replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>')
    return clean.strip()

def convert_jobs_csv_to_chunks():
    """
    Convert cleaned_job_dataset.csv to chunked JSON format (following your established pattern)
    """
    print("🔄 Converting job descriptions CSV to JSON chunks...")
    
    # Load the cleaned job dataset
    csv_path = 'data/cleaned_job_dataset.csv'
    
    if not os.path.exists(csv_path):
        print(f"❌ CSV file not found: {csv_path}")
        return None
    
    df = pd.read_csv(csv_path)
    print(f"📊 Loaded {len(df)} job descriptions from CSV")
    
    chunks = []
    for _, row in df.iterrows():
        # Clean HTML from job description
        clean_description = clean_html(row['job_description'])
        
        chunk = {
            "job_id": int(row['job_id']),
            "chunk_heading": f"{row['job_title']} - {row['job_department']}",
            "text": clean_description,
            "department": row['job_department'],
            "title": row['job_title'],
            "chunk_link": f"job_{row['job_id']}"  # Could be actual job URL if available
        }
        chunks.append(chunk)
    
    # Save as JSON (following your pattern)
    output_path = 'data/job_descriptions.json'
    with open(output_path, 'w') as f:
        json.dump(chunks, f, indent=2)
    
    print(f"✅ Created {len(chunks)} job description chunks")
    print(f"📄 Saved to: {output_path}")
    
    # Show department breakdown
    dept_counts = {}
    for chunk in chunks:
        dept = chunk['department']
        dept_counts[dept] = dept_counts.get(dept, 0) + 1
    
    print(f"\n📈 Department breakdown:")
    for dept, count in sorted(dept_counts.items(), key=lambda x: x[1], reverse=True):
        print(f"  📋 {dept}: {count} jobs")
    
    return chunks


In [18]:
# Test the CSV to JSON conversion
job_chunks = convert_jobs_csv_to_chunks()


🔄 Converting job descriptions CSV to JSON chunks...
📊 Loaded 236 job descriptions from CSV
✅ Created 236 job description chunks
📄 Saved to: data/job_descriptions.json

📈 Department breakdown:
  📋 Engineering: 82 jobs
  📋 People: 25 jobs
  📋 Business Operations & Strategy: 25 jobs
  📋 Product Management: 20 jobs
  📋 Customer Experience: 19 jobs
  📋 Legal & Policy: 17 jobs
  📋 Communications: 12 jobs
  📋 Business Development: 12 jobs
  📋 Research: 9 jobs
  📋 Marketing: 6 jobs
  📋 Data Science: 4 jobs
  📋 Design: 3 jobs
  📋 Test: 2 jobs


### Job Description Embedding Function


In [21]:
def embed_job_descriptions_modified():
    """
    Modified version of embed_multiple_documents specifically for job descriptions
    Uses job_id instead of chunk_index for deduplication
    """
    
    # Initialize clients
    print("🔧 Initializing clients...")
    openai_client = OpenAI(api_key=OPENAI_API_KEY)
    supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
    
    # Check what job descriptions already exist (using job_id instead of chunk_index)
    print("🔍 Checking for existing job descriptions...")
    try:
        existing_jobs = supabase.table("job_descriptions").select("job_id, job_title, department").execute()
        existing_job_ids = set()
        existing_by_dept = {}
        
        for job in existing_jobs.data:
            job_id = job['job_id']
            existing_job_ids.add(job_id)
            
            # Track by department for reporting
            dept = job['department']
            if dept not in existing_by_dept:
                existing_by_dept[dept] = []
            existing_by_dept[dept].append(job['job_title'])
        
        print(f"📊 Found {len(existing_jobs.data)} existing job descriptions in database")
        for dept, titles in existing_by_dept.items():
            print(f"  📋 {dept}: {len(titles)} jobs")
        
    except Exception as e:
        print(f"❌ Error checking existing jobs: {e}")
        existing_job_ids = set()
        existing_by_dept = {}
    
    # Load job descriptions
    job_file = "data/job_descriptions.json"
    print(f"\\n📂 Processing job descriptions from: {job_file}")
    
    try:
        with open(job_file, "r") as f:
            job_data = json.load(f)
        print(f"📊 Loaded {len(job_data)} job descriptions from file")
    except Exception as e:
        print(f"❌ Error loading {job_file}: {e}")
        return
    
    # Track progress
    processed_count = 0
    skipped_count = 0
    error_count = 0
    
    # Process each job description
    for job in job_data:
        job_id = job['job_id']
        job_title = job['title']
        department = job['department']
        
        print(f"\\n  🔄 Processing job {job_id}: {job_title} ({department})")
        
        # Check if this job already exists
        if job_id in existing_job_ids:
            print(f"  ⏭️  Skipping - job already exists in database")
            skipped_count += 1
            continue
        
        # Validate job structure
        if 'text' not in job or not job['text'].strip():
            print(f"  ❌ Skipping - job missing or empty description")
            error_count += 1
            continue
        
        # Prepare content for embedding (job-specific format)
        heading = job['chunk_heading']
        content = job['text']
        embedding_content = f"{heading}\\n\\n{content}"
        
        # Generate embedding
        print(f"  🧠 Generating embedding...")
        try:
            response = openai_client.embeddings.create(
                model="text-embedding-3-small",
                input=embedding_content
            )
            embedding = response.data[0].embedding
            print(f"  ✅ Generated embedding with {len(embedding)} dimensions")
            
        except Exception as e:
            print(f"  ❌ Error generating embedding: {e}")
            error_count += 1
            continue
        
        # Prepare data for insertion (job-specific schema)
        job_data_insert = {
            "job_id": job_id,
            "department": department,
            "job_title": job_title,
            "content": content,
            "chunk_heading": heading,
            "chunk_link": job.get("chunk_link", ""),
            "embedding": embedding
        }
        
        # Insert into Supabase
        print(f"  💾 Inserting job into Supabase...")
        try:
            result = supabase.table("job_descriptions").insert(job_data_insert).execute()
            print(f"  ✅ Successfully inserted: {job_title}")
            processed_count += 1
            
        except Exception as e:
            print(f"  ❌ Error inserting into Supabase: {e}")
            error_count += 1
            continue
    
    # Final summary
    print(f"\\n🎉 Job description processing complete!")
    print(f"   ✅ Newly embedded: {processed_count} jobs")
    print(f"   ⏭️  Skipped existing: {skipped_count} jobs")
    print(f"   ❌ Errors: {error_count} jobs")
    
    # Final database state
    print(f"\\n🔍 Final database state...")
    try:
        final_query = supabase.table("job_descriptions").select("department, job_title").execute()
        print(f"📊 Total jobs in database: {len(final_query.data)}")
        
        # Group by department
        by_dept = {}
        for job in final_query.data:
            dept = job['department']
            if dept not in by_dept:
                by_dept[dept] = []
            by_dept[dept].append(job['job_title'])
        
        for dept, titles in sorted(by_dept.items(), key=lambda x: len(x[1]), reverse=True):
            print(f"  📋 {dept}: {len(titles)} jobs")
            
    except Exception as e:
        print(f"❌ Error querying final state: {e}")


In [20]:
# Job Description Embedding Function

embed_job_descriptions_modified()         


🔧 Initializing clients...
🔍 Checking for existing job descriptions...
📊 Found 0 existing job descriptions in database
\n📂 Processing job descriptions from: data/job_descriptions.json
📊 Loaded 236 job descriptions from file
\n  🔄 Processing job 4002141005: Senior Recruiter (People)
  🧠 Generating embedding...
  ✅ Generated embedding with 1536 dimensions
  💾 Inserting job into Supabase...
  ✅ Successfully inserted: Senior Recruiter
\n  🔄 Processing job 4002859005: Recruiting Coordinator (People)
  🧠 Generating embedding...
  ✅ Generated embedding with 1536 dimensions
  💾 Inserting job into Supabase...
  ✅ Successfully inserted: Recruiting Coordinator
\n  🔄 Processing job 4002971005: Enterprise Partnerships (Business Operations & Strategy)
  🧠 Generating embedding...
  ✅ Generated embedding with 1536 dimensions
  💾 Inserting job into Supabase...
  ✅ Successfully inserted: Enterprise Partnerships
\n  🔄 Processing job 4002972005: Community Lead (Business Operations & Strategy)
  🧠 Generatin