In [None]:
import json
import os
from typing import List, Dict, Any
from langchain_cohere import CohereEmbeddings
from supabase import create_client
from dotenv import load_dotenv

load_dotenv()
# Supabase and Cohere credentials
supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_KEY")
cohere_api_key = os.environ.get("COHERE_API_KEY")

# Initialize clients
supabase_client = create_client(supabase_url, supabase_key)
embeddings_model = CohereEmbeddings(
    model="embed-english-v3.0",
    cohere_api_key=cohere_api_key
)

def load_forms_from_json(file_path: str) -> List[Dict[str, Any]]:
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data.get('forms', [])

def upsert_forms_to_supabase(json_file_path: str):
    # Load forms from JSON
    forms = load_forms_from_json(json_file_path)
    
    # Process each form
    for form in forms:
        # Create text for embedding
        text_to_embed = f"{form['name']}: {form['description']}"
        
        # Generate embedding using LangChain's Cohere embeddings
        embedding_vector = embeddings_model.embed_query(text_to_embed)
        
        # Prepare data for upsert with all fields
        form_data = {
            "form_id": form['id'],
            "form_name": form['name'],
            "form_description": form['description'],
            "form_structure": form['structure'],  # This will be stored as JSONB
            "embedding": embedding_vector
        }
        
        # Direct upsert to ensure all fields are added
        response = supabase_client.table("forms").upsert(form_data).execute()
        
        # Check for errors
        if hasattr(response, 'error') and response.error is not None:
            print(f"Error upserting form {form['id']}: {response.error}")
        else:
            print(f"Successfully upserted form {form['id']}")

if __name__ == "__main__":
    upsert_forms_to_supabase("data.json")
