In [0]:
dbutils.widgets.text("catalog_use", "", "Catalog to use")
dbutils.widgets.text("schema_use", "", "Schema to use")
dbutils.widgets.text("vector_search_endpoint", "", "Vector Search Endpoint")
dbutils.widgets.text("embedding_model", "", "Embedding Model")
dbutils.widgets.text("source_table", "", "Source Table")
dbutils.widgets.text("workspace_url", "")
dbutils.widgets.text("sp_secret_scope", "")
dbutils.widgets.text("sp_secret_key_client_id", "")
dbutils.widgets.text("sp_secret_key_secret", "")

In [0]:
catalog_use = dbutils.widgets.get("catalog_use")
schema_use = dbutils.widgets.get("schema_use")
vector_search_endpoint = dbutils.widgets.get("vector_search_endpoint")
embedding_model = dbutils.widgets.get("embedding_model")
source_table = dbutils.widgets.get("source_table")
workspace_url = dbutils.widgets.get("workspace_url")
sp_secret_scope = dbutils.widgets.get("sp_secret_scope")
sp_secret_key_client_id = dbutils.widgets.get("sp_secret_key_client_id")
sp_secret_key_secret = dbutils.widgets.get("sp_secret_key_secret")

In [0]:
print(f"""
catalog_use: {catalog_use}
schema_use: {schema_use}
vector_search_endpoint: {vector_search_endpoint}
embedding_model: {embedding_model}
source_table: {source_table}
workspace_url: {workspace_url}
sp_secret_scope: {sp_secret_scope}
sp_secret_key_client_id: {sp_secret_key_client_id}
sp_secret_key_secret: {sp_secret_key_secret}
""")

In [0]:
service_principal_client_id = dbutils.secrets.get(sp_secret_scope, sp_secret_key_client_id)
service_principal_client_secret = dbutils.secrets.get(sp_secret_scope, sp_secret_key_secret)

In [0]:
print(f"""
service_principal_client_id: {service_principal_client_id}
service_principal_client_secret: {service_principal_client_secret}
""")

In [0]:
from databricks.vector_search.client import VectorSearchClient

In [0]:
vs_client = VectorSearchClient(
	workspace_url=workspace_url
	,service_principal_client_id=service_principal_client_id
	,service_principal_client_secret=service_principal_client_secret
)

In [0]:
# Check if the vector search endpoint is online
try:
	endpoint = vs_client.get_endpoint(vector_search_endpoint)
	endpoint_status = endpoint.get('endpoint_status', {}).get('state', 'UNKNOWN')
	
	print(f"Endpoint: {vector_search_endpoint}")
	print(f"Status: {endpoint_status}")
	
	if endpoint_status == 'ONLINE':
		print("✓ Endpoint is online and ready.")
	else:
		print(f"⚠ Warning: Endpoint is not online. Current state: {endpoint_status}")
		
except Exception as e:
	print(f"Error checking endpoint status: {e}")
	raise

In [0]:
# Define the index name and source table
source_table_name = f"{catalog_use}.{schema_use}.{source_table}"
index_name = f"{source_table_name}_index_raw"

print(f"Index name: {index_name}")
print(f"Source table: {source_table_name}")
print(f"Embedding model: {embedding_model}")
print(f"Vector search endpoint: {vector_search_endpoint}")

In [0]:
# Check if index exists and create or sync accordingly
try:
	# Try to get existing index
	index = vs_client.get_index(
		endpoint_name=vector_search_endpoint
		,index_name=index_name
	)
	print(f"Index '{index_name}' already exists.")
	print(f"Syncing index...")
	
	# Sync the index to refresh it
	index.sync()
	print(f"Index synced successfully.")
	
except Exception as e:
	if "does not exist" in str(e).lower() or "not found" in str(e).lower():
		print(f"Index does not exist. Creating new index...")
		
		# Create a new delta sync index
		index = vs_client.create_delta_sync_index(
			endpoint_name=vector_search_endpoint
			,index_name=index_name
			,source_table_name=source_table_name
			,pipeline_type="TRIGGERED"
			,primary_key="path"
			,embedding_source_column="text"
			,embedding_model_endpoint_name=embedding_model
		)
		print(f"Index '{index_name}' created successfully.")
	else:
		print(f"Error: {e}")
		raise