# How update ACS and embeding container after delet data from source

In [None]:
%%writefile config.json  
{  
    "subscription_id": "",  
    "resource_group": "",  
    "workspace_name": ""  
}  

In [2]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential  
from azure.ai.ml import MLClient  
  
try:  
    credential = DefaultAzureCredential()  
    # Check if given credential can get token successfully.  
    credential.get_token("https://management.azure.com/.default")  
except Exception as ex:  
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work  
    credential = InteractiveBrowserCredential()  

In [None]:
ml_client = MLClient.from_config(credential=credential)  
ml_registry = MLClient(credential=ml_client._credential, registry_name="azureml")  

In [4]:

  
generate_chunkembed_component = ml_registry.components.get("llm_rag_crack_and_chunk_and_embed", label="latest")  
update_acs_index_component = ml_registry.components.get("llm_rag_update_acs_index", label="latest") 

In [5]:
from azure.ai.ml import Input, Output  
from azure.ai.ml.dsl import pipeline  
from azure.ai.ml.entities._job.pipeline._io import PipelineInput  
  
def use_automatic_compute(component, instance_count=1, instance_type="Standard_e4ds_v4"):  
    component.set_resources(  
        instance_count=instance_count,  
        instance_type=instance_type,  
        properties={"compute_specification": {"automatic": True}},  
    )  
    return component  
  
def optional_pipeline_input_provided(input: PipelineInput):  
    return input._data is not None  
  
@pipeline(default_compute="serverless")  
def uri_to_acs(  
    input_data: Input,  
    embeddings_model: str,  
    acs_config: str,  
    acs_connection_id: str,  
    asset_name: str,  
    chunk_size: int = 526,  
    chunk_overlap: int = 100,  
    data_source_glob: str = None,  
    aoai_connection_id: str = None,  
    embeddings_container: Input = None,  
):  
    generate_embeddings = generate_chunkembed_component(  
        input_data=input_data,  
        input_glob=data_source_glob,  
        chunk_size=chunk_size,  
        use_rcts=True,  
        chunk_overlap=chunk_overlap,  
        embeddings_connection_id=aoai_connection_id,  
        embeddings_container=embeddings_container,  
        embeddings_model=embeddings_model,  
    )  
    use_automatic_compute(generate_embeddings)  
    if optional_pipeline_input_provided(aoai_connection_id):  
        generate_embeddings.environment_variables[  
            "AZUREML_WORKSPACE_CONNECTION_ID_AOAI"  
        ] = aoai_connection_id  
    if optional_pipeline_input_provided(embeddings_container):  
        generate_embeddings.outputs.embeddings = Output(  
            type="uri_folder", path=f"{embeddings_container.path}/{{name}}"  
        )  
  
    update_acs_index = update_acs_index_component(  
        embeddings=generate_embeddings.outputs.embeddings,  
        acs_config=acs_config,  
    )  
    use_automatic_compute(update_acs_index)  
    if optional_pipeline_input_provided(acs_connection_id):  
        update_acs_index.environment_variables[  
            "AZUREML_WORKSPACE_CONNECTION_ID_ACS"  
        ] = acs_connection_id

In [None]:
aoai_connection = ml_client.connections.get("")  
acs_connection = ml_client.connections.get("")  

In [13]:
import json  
from azure.ai.ml import Input  
  
embeddings_model = "azure_open_ai://deployment/text-embedding-ada-002/model/text-embedding-ada-002"  
data_source = "azureml://datastores/workspaceblobstore/paths/shoreshdata"  
asset_name = f"shoresh_1"  
  
pipeline_job = uri_to_acs(  
    input_data=Input(type="uri_folder", path=data_source),  
    data_source_glob="**/*",  
    embeddings_model=embeddings_model,  
    aoai_connection_id=aoai_connection.id,  
    embeddings_container=Input(  
        type="uri_folder",  
        path=f"azureml://datastores/workspaceblobstore/paths/embeddings/{asset_name}",  
    ),  
    acs_config=json.dumps(  
        {  
            "index_name": asset_name,  
        }  
    ),  
    acs_connection_id=acs_connection.id,  
    asset_name=asset_name,  
)  
  
pipeline_job.display_name = asset_name  
pipeline_job.settings.force_rerun = True  
  
# Properties for Vector Index UI  
pipeline_job.properties["azureml.mlIndexAssetName"] = asset_name  
pipeline_job.properties["azureml.mlIndexAssetKind"] = "acs"  
pipeline_job.properties["azureml.mlIndexAssetSource"] = "AzureML Data"  

In [None]:
print(f"Submitting pipeline job to experiment: {asset_name}")  
running_pipeline_job = ml_client.jobs.create_or_update(pipeline_job, experiment_name=asset_name)  
print(f"Submitted run, url: {running_pipeline_job.studio_url}")  

# How create and register custome components


### 1- Create a registery


https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-registries?view=azureml-api-2&tabs=studio#create-a-registry

https://learn.microsoft.com/en-us/azure/machine-learning/how-to-share-models-pipelines-across-workspaces-with-registries?view=azureml-api-2&tabs=cli

https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/2e_image_classification_keras_minist_convnet


In [3]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential  
from azure.ai.ml import MLClient  
  
try:  
    credential = DefaultAzureCredential()  
    # Check if given credential can get token successfully.  
    credential.get_token("https://management.azure.com/.default")  
except Exception as ex:  
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work  
    credential = InteractiveBrowserCredential()  

In [None]:


ml_client_workspace = MLClient( credential=credential,
    subscription_id = "",
    resource_group_name = "",
    workspace_name = "")
print(ml_client_workspace)



In [None]:
ml_client_registry = MLClient(credential=credential,
                        registry_name="shoresh",
                        registry_location="swedencentral")
print(ml_client_registry)

### How create environment
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-environments-v2?view=azureml-api-2&tabs=python

https://learn.microsoft.com/en-us/azure/machine-learning/how-to-share-models-pipelines-across-workspaces-with-registries?view=azureml-api-2&tabs=python

In [4]:
from pathlib import Path
from azure.ai.ml import Input, Output, command
from azure.ai.ml.entities import BuildContext, Environment

In [None]:
llm_rag_embeddings_doc_intel_environment = Environment(
    name="shoresh_doc_intel",
    description="AzureML RAGs base crack_and_chunk environment with azure-ai-formrecognizer installed.",
    version=str(1),
    build=BuildContext(path=Path.cwd() / "doc_intel_env"),
)

ml_client_registry.environments.create_or_update(llm_rag_embeddings_doc_intel_environment)

In [None]:
ml_client_registry.environments.get("shoresh_doc_intel",version=1)

### How create component

https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-component-pipelines-cli?view=azureml-api-2#understand-the-component-definition-yaml

https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-component-pipeline-python?view=azureml-api-2

https://learn.microsoft.com/en-us/azure/machine-learning/how-to-create-component-pipeline-python?view=azureml-api-2



In [7]:
from azure.ai.ml import MLClient, load_component  
from azure.identity import DefaultAzureCredential 

component_path = "./crack_and_chunk_with_doc_intel/component.yml"  


In [None]:
print("Loading component...")  
component = load_component(source=component_path)  
print("Component loaded successfully.")  
  
print("Registering component...")  
try:  
    registered_component = ml_client_registry.components.create_or_update(component)  
    print(f"Component registered: {registered_component.name} with version {registered_component.version}")  
except HttpResponseError as e:  
    print(f"HttpResponseError: {e}")  


In [None]:
components = ml_client_registry.components.list()  
for component in components:  
    print(f"Component name: {component.name}, version: {component.version}")  

In [None]:
components = ml_client_registry.environments.list()  
for component in components:  
    print(f"Component name: {component.name}, version: {component.version}")  