## 1. Create & Populate Azure AI Search Index

Crearemos un índice mínimo llamado `documentindex`, que contendrá algunos elementos de ejemplo. Asegúrate de configurar tus variables de entorno para `SEARCH_ENDPOINT` y `SEARCH_API_KEY`. Utilizaremos las clases de `azure.search.documents.indexes` para gestionar el esquema del índice. También subiremos algunos datos de ejemplo.


1.1 **Connect to the hub** and retrieve connections

In [None]:
# Import required Azure libraries
import os
from azure.core.credentials import AzureKeyCredential  # For authentication
from azure.search.documents.indexes import SearchIndexClient  # For managing search indexes
from azure.search.documents.indexes.models import SearchIndex, SimpleField, SearchFieldDataType, SearchableField  # Index schema components
from azure.search.documents import SearchClient  # For document operations (upload/search)
from azure.identity import DefaultAzureCredential  # For Azure authentication
from azure.ai.projects import AIProjectClient  # To access project resources
from azure.ai.projects.models import ConnectionType  # Enum for connection types
from pathlib import Path
from dotenv import load_dotenv

# First, initialize the AI Project client which gives us access to project resources
# This uses DefaultAzureCredential for authentication and the project connection string
# Load environment variables
load_dotenv(override=True)

project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=os.environ["PROJECT_CONNECTION_STRING"]
)

# Get the Azure AI Search connection details from our project
# This includes endpoint URL and API key needed to access the search service
search_conn = project_client.connections.get_default(
    connection_type=ConnectionType.AZURE_AI_SEARCH, 
    include_credentials=True
)
if not search_conn:
    raise RuntimeError("❌ No default Azure AI Search connection found in your project.")

# Name of our search index - this is where our fitness data will be stored
index_name = "senaindex"

try:
    # Create a SearchIndexClient - this is used for managing the index itself (create/update/delete)
    credential = AzureKeyCredential(search_conn.key)
    index_client = SearchIndexClient(endpoint=search_conn.endpoint_url, credential=credential)
    print("✅ Created SearchIndexClient from project_client connection")
    
    # Create a SearchClient - this is used for document operations (upload/search/delete documents)
    # We'll use this later to add our fitness items to the index
    search_client = SearchClient(
        endpoint=search_conn.endpoint_url,
        index_name=index_name,
        credential=credential
    )
    print("✅ Created SearchClient for document operations")
    
except Exception as e:
    print(f"❌ Error creating search clients: {e}")

1.2 **Define the index** schema with a `senaindex` key and a few fields to store product info.

In [None]:
import os
from azure.search.documents.indexes.models import (
    AzureOpenAIEmbeddingSkill,
    AzureOpenAIParameters,
    AzureOpenAIVectorizer,
    FieldMapping,
    HnswAlgorithmConfiguration,
    HnswParameters,
    IndexProjectionMode,
    InputFieldMappingEntry,
    OutputFieldMappingEntry,
    SearchableField,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SearchIndexer,
    SearchIndexerDataContainer,
    SearchIndexerDataSourceConnection,
    SearchIndexerDataSourceType,
    SearchIndexerIndexProjections,
    SearchIndexerIndexProjectionSelector,
    SearchIndexerIndexProjectionsParameters,
    SearchIndexerSkillset,
    SemanticConfiguration,
    SemanticField,
    SemanticPrioritizedFields,
    SemanticSearch,
    SimpleField,
    SplitSkill,
    VectorSearch,
    VectorSearchAlgorithmMetric,
    VectorSearchProfile,
)

index_name = "courses"
azure_openai_embedding_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
credential = AzureKeyCredential(search_conn.key)
index_client = SearchIndexClient(endpoint=search_conn.endpoint_url, credential=credential)
print("✅ Created SearchIndexClient from project_client connection")


def create_index():
    # Define the fields (columns) for our search index
    # Each field has specific attributes that control how it can be used in searches:
    fields = [
        # Primary key field - must be unique for each document
        SimpleField(name="Id", type=SearchFieldDataType.String, key=True),
        
        # Name field - SearchableField means we can do full-text search on it
        # filterable=True lets us filter results by name
        SearchableField(name="Name", type=SearchFieldDataType.String, filterable=True),
        
        # Category field - SearchableField for text search
        # filterable=True lets us filter by category
        # facetable=True enables category grouping in results
        SearchableField(name="Category", type=SearchFieldDataType.String, filterable=True, facetable=True),
        
        # Price field - SimpleField for numeric values
        # filterable=True enables price range filters
        # sortable=True lets us sort by price
        # facetable=True enables price range grouping
        SimpleField(name="Duration", type=SearchFieldDataType.Double, filterable=True, sortable=True, facetable=True),
        
        # Description field - SearchableField for full-text search on product descriptions
        SearchableField(name="Description", type=SearchFieldDataType.String)
    ]

    vector_search = VectorSearch(
                    algorithms=[
                        HnswAlgorithmConfiguration(name="algo", parameters=HnswParameters(metric=VectorSearchAlgorithmMetric.COSINE))
                    ],
                    vectorizers=[
                        AzureOpenAIVectorizer(
                            name="openai_vectorizer",
                            azure_open_ai_parameters=AzureOpenAIParameters(
                                resource_uri=azure_openai_embedding_endpoint,
                                deployment_id='text-embedding-ada-002',
                                model_name='text-embedding-ada-002'
                            )
                        )
                    ],
                    profiles=[
                        VectorSearchProfile(name="vp", algorithm_configuration_name="algo", vectorizer="openai_vectorizer")
                    ]
                )
    
    semantic_search=SemanticSearch(
        configurations=[
            SemanticConfiguration(
                name="default",
                prioritized_fields=SemanticPrioritizedFields(title_field=SemanticField(field_name="title"), content_fields=[SemanticField(field_name="description")])
            )
        ],
        default_configuration_name="default"
    )

    # Create an index definition with our fields
    index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search, semantic_search=semantic_search)

    # Check if index already exists - if so, delete it to start fresh
    # This is useful during development but be careful in production!
    if index_name in [x.name for x in index_client.list_indexes()]:
        index_client.delete_index(index_name)
        print(f"🗑️ Deleted existing index: {index_name}")

    # Create the new index with our schema
    created = index_client.create_index(index)
    print(f"🎉 Created index: {created.name}")

# Execute the function to create our search index
create_index()

NameError: name 'AzureOpenAIParameters' is not defined

1.3 **Upload some sample documents** to `courses`. We'll add a few items for demonstration.


In [None]:
def upload_docs():
    # Create a SearchClient to interact with our search index
    # This uses the connection details (endpoint, key) we configured earlier
    search_client = SearchClient(
        endpoint=search_conn.endpoint_url,
        index_name=index_name,
        credential=AzureKeyCredential(search_conn.key)
    )

    sample_docs = [
        {
            "Id": "1",
            "Name": "Curso Profissional de Data Science",
            "Category": "Cursos Profissionais",
            "Duration": 40.0,
            "Description": "Um curso intensivo desenhado para profissionais que buscam aprofundar seus conhecimentos em Data Science."
        },
        {
            "Id": "2",
            "Name": "Curso Profissional de Desenvolvimento Web",
            "Category": "Cursos Profissionais",
            "Duration": 35.0,
            "Description": "Aprenda tecnologias modernas de desenvolvimento web em um ambiente profissional."
        },
        {
            "Id": "3",
            "Name": "Curso Profissional de Cibersegurança",
            "Category": "Cursos Profissionais",
            "Duration": 45.0,
            "Description": "Um curso completo para dominar a segurança da informação e evitar ameaças."
        },
        {
            "Id": "4",
            "Name": "Curso Profissional de Gestão de Projetos",
            "Category": "Cursos Profissionais",
            "Duration": 30.0,
            "Description": "Adquira habilidades para gerir projetos de maneira eficiente em ambientes corporativos."
        }
    ]

    # Upload all documents to the search index in a single batch operation
    # The search service will index these documents, making them searchable
    # based on the field types we defined in our index schema
    result = search_client.upload_documents(documents=sample_docs)
    print(f"🚀 Upload result: {result}")

# Call the function to upload the documents
upload_docs()
print("✅ Documents uploaded to search index")


🚀 Upload result: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000002298DD32490>, <azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000002298DD324D0>, <azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000002298DD32550>, <azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000002298DD325D0>]
✅ Documents uploaded to search index
