In [0]:
# Databricks notebook source
# ==========================================================
# UDV - CATALOGO_EQUIPOS
# Proyecto: Liga 1 Perú
# Autor: Oscar García Del Águila
# ==========================================================

from env_setup import *
from pyspark.sql import SparkSession
from utils_liga1 import setup_adls, get_dbutils, get_abfss_path, read_parquet_adls, is_dataframe_empty,get_predecesor, get_pipeline_params,get_yaml_from_param,write_delta_udv, log
from m_catalogo_equipos import select_final
import traceback
import sys

# ----------------------------------------------------------
# CONFIGURACIÓN INICIAL
# ----------------------------------------------------------
entity_name = "m_catalogo_equipos"
log("Inicio de ejecución del pipeline UDV", "INFO", entity_name)

spark = SparkSession.builder.getOrCreate()
setup_adls()
dbutils = get_dbutils()

In [0]:
# ----------------------------------------------------------
# PARÁMETROS Y PREDECESORES
# ----------------------------------------------------------
try:
    dbutils.widgets.text("prm_pipelineid", "")
    prm_pipelineid = dbutils.widgets.get("prm_pipelineid")

    dict_predecesores = get_predecesor(prm_pipelineid)
    dict_params = get_pipeline_params(prm_pipelineid)

    prm_ruta_predecesor = dict_predecesores["Ruta_Predecesor"]
    ruta_abfss_origen = get_abfss_path(prm_ruta_predecesor)

    prm_filesystem = dict_params["FILESYSTEM"]
    prm_capa_udv = dict_params["CAPA_UDV"]
    prm_ruta_base = dict_params["RUTA_BASE"]
    prm_ruta_tabla = dict_params["RUTA_TABLA"]
    prm_formato = dict_params["FORMATO_SALIDA"]
    prm_schema_tb = dict_params["SCHEMA_TABLA"]
    prm_tabla_output = dict_params["NOMBRE_TABLA"]
    prm_ruta_yaml = dict_params["YAML_PATH"]

    prm_ruta_tabla_output = f"{prm_capa_udv}/{prm_ruta_base}/{prm_ruta_tabla}"
    ruta_delta_udv = get_abfss_path(prm_ruta_tabla_output)

    log("Parámetros cargados correctamente", "INFO", entity_name)

except Exception as e:
    log(f"Error al cargar parámetros o predecesores: {e}", "ERROR", entity_name)
    dbutils.notebook.exit(f"[FAILED] Error inicial en {entity_name}: {str(e)}")

In [0]:
# ----------------------------------------------------------
# LECTURA YAML
# ----------------------------------------------------------
try:
    yaml_conf = get_yaml_from_param(prm_ruta_yaml)
    prm_cols = yaml_conf[entity_name]["cols"]
    prm_schema = yaml_conf[entity_name]["schema"]
    prm_columns_sql = yaml_conf[entity_name]["columns_sql"]
    prm_table_comment = yaml_conf[entity_name]["table_comment"]
    log("YAML cargado correctamente", "INFO", entity_name)
except Exception as e:
    log(f"Error al leer YAML {prm_ruta_yaml}: {e}", "ERROR", entity_name)
    dbutils.notebook.exit(f"[FAILED] Error YAML en {entity_name}: {str(e)}")

In [0]:
# ----------------------------------------------------------
# EJECUCIÓN PRINCIPAL
# ----------------------------------------------------------
try:
    log("Lectura desde RAW/DATA", "INFO", entity_name)
    df = read_parquet_adls(spark, ruta_abfss_origen)

    if is_dataframe_empty(df):
        raise Exception(f"No se encontró data en la ruta origen: {ruta_abfss_origen}")

    log("Procesamiento en UDV", "INFO", entity_name)
    df_final = select_final(df, prm_cols, prm_schema)

    log("Escritura en capa UDV", "INFO", entity_name)
    write_delta_udv(
        spark,
        df_final,
        schema=prm_schema_tb,
        table_name=prm_tabla_output,
        abfss_path=ruta_delta_udv,
        formato=prm_formato,
        mode="overwrite",
        columns_sql=prm_columns_sql,
        table_comment=prm_table_comment
    )

    log("Proceso completado correctamente", "SUCCESS", entity_name)
    dbutils.notebook.exit(f"[OK] Ejecución satisfactoria - {entity_name}")

except Exception as e:
    log(f"Error en ejecución: {e}", "ERROR", entity_name)
    print(traceback.format_exc())
    dbutils.notebook.exit(f"[FAILED] Error en {entity_name}: {str(e)}")

log("Finalización del pipeline UDV", "INFO", entity_name)