In [1]:
import polars as pl
import pandas as pd
from datetime import date, datetime

### Definición de Rutas

In [2]:
cierre = "202506"
cierre_l = "Jun25"
cierre_d = date(2025,6,30)
cierre_d2 = datetime(2025,7,31)


path = "E:/Users/jhernandezr/DAR/garantias/reporte/fotos/"
wd_data_raw = path + "data/raw/"
wd_data_external = path + "data/external/"

wd_data_processed_dwh = "E:/Users/jhernandezr/DAR/garantias/data_pipeline_garantias/data/processed/dwh/"
wd_data_validations = "E:/Users/jhernandezr/DAR/garantias/data_pipeline_garantias/data/validations/"
wd_data_processed_curvarecup = "E:/Users/jhernandezr/DAR/garantias/data_pipeline_garantias/data/processed/curva_recup/"
wd_data_processed_fotos = "E:/Users/jhernandezr/DAR/garantias/data_pipeline_garantias/data/processed/fotos/"

# Inputs
fl_pagadas_detalle_vf = wd_data_processed_curvarecup + f"PAGADAS_DETALLE_VF_{cierre}.parquet"
fl_recupera_con_pagos_flujos = wd_data_processed_dwh + f"parquet/Recupera_con_Pagos_Flujos_{cierre}.parquet"
fl_pagadas_global_vf = wd_data_processed_dwh + f'parquet/Pagadas_Global_VF_{cierre}.parquet'
fl_db_dwh_r = wd_data_processed_fotos + f"parquet/DB_DWH_R_{cierre}.parquet"
fl_db_dwh_nr = wd_data_processed_fotos + f"parquet/DB_DWH_NR_{cierre}.parquet"
fl_repetidos_tp_base = wd_data_processed_fotos + f"parquet/Repetidos_TP_Base.xlsx"

fl_acumulado_saldos = ""


## Principal

In [3]:
pagadas_detalle_vf = pl.read_parquet(fl_pagadas_detalle_vf)
recuperadas_global_vf = pl.read_parquet(fl_recupera_con_pagos_flujos)
db_dwh_r = pl.read_parquet(fl_db_dwh_r)
db_dwh_nr = pl.read_parquet(fl_db_dwh_nr)

In [4]:
def une_pagadas(db_dwh, pagadas_detalle_vf):
    df_joined = db_dwh.join(
        pagadas_detalle_vf,
        left_on=["INTERMEDIARIO_ID", "NUMERO_CREDITO"],
        right_on=["Intermediario_Id", "Numero_Credito"],
        how="left"
    )

    monto_expr = (
        pl.coalesce([pl.col("Monto_Desembolso_Mn"), pl.lit(0)]) +
        pl.coalesce([pl.col("Interes_Desembolso_Mn"), pl.lit(0)]) +
        pl.coalesce([pl.col("Interes_Moratorios_Mn"), pl.lit(0)])
    )

    # Añadir columnas calculadas
    df_final = df_joined.with_columns([
        (monto_expr / 1_000_000).alias("MPAGADO (MDP)"),
        (monto_expr > 0).cast(pl.Int8).alias("PAGADAS"),
        (monto_expr > 0).cast(pl.Int8).alias("INCUMPLIDO"),
        pl.coalesce([
            pl.col("Fecha_Garantia_Honrada"),
            pl.date(1899,12,30)
        ]).alias("FECHA_PAGO")
    ])

    return df_final

# db_dwh_r_p = une_pagadas(db_dwh_r, pagadas_detalle_vf)
# db_dwh_nr_p = une_pagadas(db_dwh_nr, pagadas_detalle_vf)

In [5]:
def z3_recup(recuperadas_global_vf, db_dwh):
    # === Paso 1: Crear columna Concatenado2 en A ===
    recuperadas_global_vf = recuperadas_global_vf.with_columns(
        (pl.col("Numero_Credito").cast(pl.Utf8) + pl.col("Intermediario_Id").cast(pl.Utf8)).alias("Concatenado")
    )

    # === Paso 2: Crear columna Concatenado2 en B ===
    db_dwh = db_dwh.with_columns(
        (pl.col("NUMERO_CREDITO").cast(pl.Utf8) + pl.col("INTERMEDIARIO_ID").cast(pl.Utf8)).alias("Concatenado2"),
        pl.coalesce([
            pl.col("FECHA_PAGO"),
            pl.date(1899, 12, 30)  # Default fallback
        ]).alias("FECHA_PAGO")
    )

    # === Paso 3: JOIN A y B usando Concatenado ===
    result = recuperadas_global_vf.join(
        db_dwh.select(["Concatenado2", "FECHA_PAGO"]),
        left_on="Concatenado",
        right_on="Concatenado2",
        how="left"
    )

    # === Paso 4: Calcular montos ===
    monto_total_expr = (
        pl.coalesce([pl.col("Monto_Mn"), pl.lit(0)]) +
        pl.coalesce([pl.col("Interes_Mn"), pl.lit(0)]) +
        pl.coalesce([pl.col("Moratorios_Mn"), pl.lit(0)]) +
        pl.coalesce([pl.col("Excedente_Mn"), pl.lit(0)]) -
        pl.coalesce([pl.col("Gastos_Juicio_Mn"), pl.lit(0)])
    ) / 1_000_000

    # === Paso 5: Estatus lógicos ===
    estatus_recup = ["D", "E", "RI", "CR", "RAR", "RAC"]
    estatus_rescat = ["CJ", "CS", "R", "RJ", "RS"]

    # === Paso 6: Agregar columnas finales ===
    result = result.with_columns([
        (pl.col("Fecha") > pl.col("FECHA_PAGO")).cast(pl.Int8).alias("ENTRA_RECUP"),
        pl.when(
            (pl.col("Fecha") > pl.col("FECHA_PAGO")) &
            (pl.col("Estatus").is_in(estatus_recup + estatus_rescat))
        ).then(monto_total_expr).otherwise(0).alias("MONTOTOTAL (MDP)"),
        pl.when(
            (pl.col("Fecha") > pl.col("FECHA_PAGO")) &
            (pl.col("Estatus").is_in(estatus_recup))
        ).then(monto_total_expr).otherwise(0).alias("RECUPERADOS (MDP)"),
        pl.when(
            (pl.col("Fecha") > pl.col("FECHA_PAGO")) &
            (pl.col("Estatus").is_in(estatus_rescat))
        ).then(monto_total_expr).otherwise(0).alias("RESCATADOS (MDP)")
    ])

    return result

# z3_recup_cohort = z3_recup(recuperadas_global_vf, db_dwh_r_p)
# z3_recup(recuperadas_global_vf, db_dwh_nr_p)

In [6]:
def paso_2(z3_recup_cohort):
    result = z3_recup_cohort.group_by([
        "Numero_Credito",
        "Intermediario_Id",
        "NR_R",
        "Producto"
    ]).agg(
        pl.sum("MONTOTOTAL (MDP)"),
        pl.sum("RECUPERADOS (MDP)"),
        pl.sum("RESCATADOS (MDP)")
    ).sort([
        "Numero_Credito",
        "Intermediario_Id",
        "NR_R",
        "Producto"
    ])

    return result

# vf_recuperadas = paso_2(z3_recup_cohort)

In [7]:
def fecha_or_default(col):
    return pl.when(pl.col(col).is_null()) \
             .then(date(1899, 12, 30)) \
             .otherwise(pl.col(col)) \
             .alias(col + "_LIMPIA")

def foto_saldo(db_dwh):
    # === Aplicar transformaciones ===
    result = db_dwh.with_columns([
        # Campos calculados
        (pl.col("Monto _Credito_Mn") * pl.col("CAMBIO")).alias("MCrédito_MM_UDIS"),
        (pl.col("Monto_Garantizado_Mn") / 1_000_000).alias("MGI (MDP)"),
        (pl.col("Monto _Credito_Mn") / 1_000_000).alias("MONTO CREDITO (MDP)"),
        (pl.col("Saldo_Contingente_Mn") / 1_000_000).alias("SALDO (MDP)"),
        (pl.col("Monto_Garantizado_Mn_Original") / 1_000_000).alias("MGI (MDP) Original"),
        
        # Manejo de NULLs
        pl.col("Plazo Días").fill_null(0).alias("PLAZO_DIAS"),
        pl.col("NUMERO_CREDITO").is_not_null().cast(pl.Int8).alias("NUM_GAR"),

        # Cambios de nombre
        pl.col("Fecha de Apertura").alias("FECHA_VALOR"),
        pl.col("INTERMEDIARIO_ID").alias("INTER_CLAVE"),
        pl.col("Razón Social (Intermediario)").alias("BANCO"),
        pl.col("Nombre_v1").alias("NOMBRE"),
        pl.col("RFC Empresa / Acreditado").alias("RFC"),
        pl.col("NUMERO_CREDITO").alias("CLAVE_CREDITO"),
        pl.col("Fecha Registro Alta").alias("FECHA_REGISTRO_GARANTIA"),
        pl.col("Producto ID").alias("CLAVE_TAXO"),
        pl.col("Producto").alias("TAXONOMIA"),
        pl.col("Porcentaje de Comisión Garantia").alias("Porcentaje_Comision_Garantia"),
        pl.col("VALOR_TASA_INTERES").alias("Tasa_Interes"),
        
        # Clasificación de plazo
        pl.when(pl.col("PLAZO") <= 12).then(1)
        .when(pl.col("PLAZO") <= 24).then(2)
        .when(pl.col("PLAZO") <= 36).then(3)
        .otherwise(4)
        .alias("PLAZO_BUCKET"),

        # Manejo de fechas: si es 0 se vuelve NULL, si no se redondea a primer día del mes
        pl.when(pl.col("Fecha de Apertura") == 0).then(None)
        .otherwise(pl.col("Fecha de Apertura").dt.replace(day=1))
        .alias("FECHA_VALOR1"),
        
        pl.when(pl.col("Fecha Registro Alta") == 0).then(None)
        .otherwise(pl.col("Fecha Registro Alta").dt.replace(day=1))
        .alias("FECHA_REGISTRO1"),
        
        # Fecha de primer incumplimiento con valor default si NULL
        fecha_or_default("FECHA_PRIMER_INCUMPLIMIENTO").alias("FECHA_PRIMER_INCUM"),

        # Fecha de pago también reemplazada si NULL
        fecha_or_default("FECHA_PAGO").alias("FECHA_PAGO_LIMPIA")
    ])

    # === Selección final de columnas ===
    result = result.select([
    "BUCKET", "CAMBIO", "MCrédito_MM_UDIS", "MM_UDIS",
    "INTER_CLAVE", "NOMBRE", "RFC", "TIPO_PERSONA", "CLAVE_CREDITO",
    "FECHA_VALOR", "PLAZO_DIAS", "PLAZO", 
    "test", # FVTO_Riesgosd
    "FECHA_REGISTRO_GARANTIA",
    "MGI (MDP)", "Porcentaje Garantizado", "BANCO", "FECHA_PRIMER_INCUM",
    "MONTO CREDITO (MDP)", "SALDO (MDP)", "TPRO_CLAVE", "CLAVE_TAXO", "TAXONOMIA", "NR_R",
    "FECHA_VALOR1", "FECHA_REGISTRO1", "NUM_GAR", "CSG", "PLAZO_BUCKET", "MPAGADO (MDP)",
    "PAGADAS", "INCUMPLIDO", "FECHA_PAGO_LIMPIA",
    "Programa_Original", "Programa_Id", "ESTRATO_ID", "SECTOR_ID", "ESTADO_ID",
    "Tipo_Credito_Id", "Porcentaje_Comision_Garantia", "TASA_ID", "Tasa_Interes",
    "MGI (MDP) Original", "AGRUPAMIENTO_ID", "ESQUEMA", "SUBESQUEMA", "AGRUPAMIENTO",
    "FONDOS_CONTRAGARANTIA", "CONREC_CLAVE", "Describe_Desrec"
    ])

    return result

# vf_pagadas_r = foto_saldo(db_dwh_r_p)

In [8]:
def une_pagos_recuperaciones(vf_pagadas, vf_recuperadas):
    result = vf_pagadas.join(
        vf_recuperadas.select([
            "Intermediario_Id",
            "Numero_Credito",
            "MONTOTOTAL (MDP)", 
            "RECUPERADOS (MDP)", 
            "RESCATADOS (MDP)"
        ]),
        left_on=["INTER_CLAVE", "CLAVE_CREDITO"],
        right_on=["Intermediario_Id", "Numero_Credito"],
        how="left"
    )

    return result

# vf_foto_r = une_pagos_recuperaciones(vf_pagadas_r, vf_recuperadas)

In [9]:
def genera_saldos(vf_foto):
    result = (vf_foto
              .filter(pl.col("SALDO (MDP)") > 0)
              .with_columns(
                  pl.col("SALDO (MDP)").alias("SALDO_MDP"),
                 (pl.col("CLAVE_CREDITO") + pl.col("INTER_CLAVE")).alias("CONCATENAR_SALDOS")
              ).select([
                  "BUCKET",
                  "INTER_CLAVE",
                  "CLAVE_CREDITO",
                  "BANCO",
                  "SALDO_MDP",
                  "CONCATENAR_SALDOS"
              ])
              )
    return result

# genera_saldos(vf_foto_r)

In [10]:
def fotos_principal(db_dwh, pagadas_detalle_vf, recuperadas_global_vf):
    # Pagos
    db_dwh = une_pagadas(db_dwh, pagadas_detalle_vf)
    vf_pagadas = foto_saldo(db_dwh)

    # Recuperaciones
    z3_recup_cohort = z3_recup(recuperadas_global_vf, db_dwh)
    vf_recuperadas = paso_2(z3_recup_cohort)

    # Fotos
    vf_foto = une_pagos_recuperaciones(vf_pagadas, vf_recuperadas)
    #saldos = genera_saldos(vf_foto)

    # return db_dwh, vf_pagadas, vf_recuperadas, vf_foto, saldos
    return vf_foto

# Revolventes
# db_dwh_r, vf_pagadas_r, vf_recuperadas_r, vf_foto_r, saldos_r = fotos_principal(db_dwh_r, pagadas_detalle_vf, recuperadas_global_vf)
#vf_foto_r = fotos_principal(db_dwh_r, pagadas_detalle_vf, recuperadas_global_vf)
vf_foto_nr = fotos_principal(db_dwh_nr, pagadas_detalle_vf, recuperadas_global_vf)

# No Revolventes
# db_dwh_nr, vf_pagadas_nr, vf_recuperadas_nr, vf_foto_nr, saldos_nr = fotos_principal(db_dwh_nr, pagadas_detalle_vf, recuperadas_global_vf)

: 

## Genera TP

In [None]:
#repetidos_tp_base = pl.read_excel(fl_repetidos_tp_base)

In [None]:
#pfpm = (vf_foto_r
# .select("BANCO","NOMBRE","TAXONOMIA","AGRUPAMIENTO","TIPO_PERSONA"
# ).sort("BANCO","NOMBRE","TAXONOMIA","AGRUPAMIENTO","TIPO_PERSONA"
# ))

In [None]:
def revisa_repetidos(pfpm):
    # Generar columnas desplazadas (valores anteriores)
    df_shifted = pfpm.select([
        pl.col("BANCO").shift(1).alias("BANCO_ANT"),
        pl.col("NOMBRE").shift(1).alias("NOMBRE_ANT"),
        pl.col("TAXONOMIA").shift(1).alias("TAXONOMIA_ANT"),
        pl.col("AGRUPAMIENTO").shift(1).alias("AGRUPAMIENTO_ANT"),
        pl.col("TIPO_PERSONA").shift(1).alias("TIPO_PERSONA_ANT")
    ])

    # Unir el DataFrame original con las versiones desplazadas
    df_with_prev = pfpm.hstack(df_shifted)

    # Resultado: duplicados consecutivos
    repetidos_tp = df_with_prev.filter(
        (pl.col("BANCO") == pl.col("BANCO_ANT")) &
        (pl.col("NOMBRE") == pl.col("NOMBRE_ANT")) &
        (pl.col("TAXONOMIA") == pl.col("TAXONOMIA_ANT")) &
        (pl.col("AGRUPAMIENTO") == pl.col("AGRUPAMIENTO_ANT")) &
        (pl.col("TIPO_PERSONA") == pl.col("TIPO_PERSONA_ANT"))
    )
    return repetidos_tp

#repetidos_tp = revisa_repetidos(pfpm)

In [None]:
def marca_registros_sin_repetir(repetidos_tp, repetidos_tp_base):
    result = (repetidos_tp
              .join(repetidos_tp_base,
                    on=[
                        "BANCO",
                        "NOMBRE",
                        "TAXONOMIA",
                        "AGRUPAMIENTO",
                    ], how="left")
                .rename({"TIPO_PERSONA_right": "BANCO_2"})
              )

    return result

#repetidos_tp_concentrado = marca_registros_sin_repetir(repetidos_tp, repetidos_tp_base)

In [None]:
def cuenta_registros(df, variable):
    result = (df
              .select(variable)
              ).count().item()

    return result

#cuenta_registros(repetidos_tp_concentrado, "BANCO_2")
#cuenta_registros(repetidos_tp, "BANCO")

### Test de Genera TP con Regex

In [None]:
import re

In [None]:
def genera_tp(vf_foto):
    result = vf_foto.with_columns(
        pl.when(pl.col("RFC").str.contains(r"^[a-zA-Z0-9&]{4}-"))
        .then(pl.lit("M"))
        .when(pl.col("RFC").str.contains(r"^[a-zA-Z&]{4}\d"))
        .then(pl.lit("M"))
        .when(pl.col("RFC").str.contains(r"^[a-zA-Z0-9&]{3}-"))
        .then(pl.lit("F"))
        .when(pl.col("RFC").str.contains(r"^[a-zA-Z&]{3}\d"))
        .then(pl.lit("F"))
        .otherwise(pl.lit("M")).alias("TIPO_PERSONA")
    )

    return result

#vf_foto_r = genera_tp(vf_foto_r)


In [None]:
#(test_foto
# .filter(pl.col("TP_TEST")=="NULL")
# .select(["NOMBRE","RFC", "TIPO_PERSONA", "TP_TEST"])
# ).write_csv("test_tipo_persona.csv")

## Genera Estrato

In [None]:
def genera_estrato(vf_foto_r):
    linea = [
        "NOMBRE",
        "BANCO",
        "AGRUPAMIENTO",
        "AGRUPAMIENTO_ID",
        "TAXONOMIA",
        "INTER_CLAVE",
        "NR_R",
        "CSG"
    ]
    #linea = [
    #    "INTER_CLAVE",
    #    "CLAVE_CREDITO"
    #]
    #vf_foto_r = vf_foto_r.sample(n=1000)
    temp_estrato = vf_foto_r.group_by(linea).agg(
        pl.col("FECHA_VALOR").max().alias("FECHA_VALOR23")
    ).sort(linea)

    vf_estrato = (temp_estrato
              .join(vf_foto_r,
                    left_on=linea+["FECHA_VALOR23"],
                    right_on=linea+["FECHA_VALOR"])
            )
    
    vf_estrato = (vf_estrato.group_by(linea)
              .agg(
                  pl.col("FECHA_VALOR23").max().alias("Max_Fecha_Valor"),
                  pl.col("ESTRATO_ID").max().alias("Estrato_Id")
              ))
    
    result = (vf_foto_r
            .rename({"ESTRATO_ID": "Estrato_Id_Original"})
            .join(vf_estrato, 
                left_on=linea,
                right_on=linea,
                how="left")
            )

    return result

#vf_foto_r = genera_estrato(vf_foto_r)

## Genera Foto Revolvente

In [None]:
(vf_foto_r
 .group_by("TAXONOMIA")
 .agg(
     pl.col("MGI (MDP)").sum().alias("MGI_MDP"),
     pl.col("SALDO (MDP)").sum().alias("SALDO_MDP"),
     pl.col("MPAGADO (MDP)").sum().alias("MPAGADO_MDP"),
     pl.col("RECUPERADOS (MDP)").sum().alias("MRECUP_MDP"),
     pl.col("RESCATADOS (MDP)").sum().alias("MRESCAT_MDP"),
 )
 )

TAXONOMIA,MGI_MDP,SALDO_MDP,MPAGADO_MDP,MRECUP_MDP,MRESCAT_MDP
str,f64,f64,f64,f64,f64
"""GARANTIA SECTORIAL""",18517.857831,1114.471605,824.682683,276.970352,113.727756
"""GARANTIA MICROCREDITO""",0.048,0.0,0.0,0.0,0.0
"""GARANTIA SELECTIVA""",168.22007,0.0,5.0,0.0,0.0
"""GARANTIAS FEFA""",340.898804,214.796732,9.636996,0.0,0.0
"""GARANTIA ESPECIAL""",77.5,64.603449,0.0,0.0,0.0
"""EMPRESA MEDIANA""",18010.68355,261.94218,1102.159376,125.195081,75.084676
"""GARANTIA EMPRESARIAL""",376733.036457,24742.546599,13898.709355,3346.462545,1012.271716
"""GARANTIA SUBASTA""",4598.109303,0.0,2834.118755,649.766248,333.49744


In [None]:
(vf_foto_nr
 .group_by("TAXONOMIA")
 .agg(
     pl.col("MGI (MDP)").sum().alias("MGI_MDP"),
     pl.col("SALDO (MDP)").sum().alias("SALDO_MDP"),
     pl.col("MPAGADO (MDP)").sum().alias("MPAGADO_MDP"),
     pl.col("RECUPERADOS (MDP)").sum().alias("MRECUP_MDP"),
     pl.col("RESCATADOS (MDP)").sum().alias("MRESCAT_MDP"),
 )
 )

TAXONOMIA,MGI_MDP,SALDO_MDP,MPAGADO_MDP,MRECUP_MDP,MRESCAT_MDP
str,f64,f64,f64,f64,f64
"""GARANTIA SELECTIVA""",824.506595,0.0,2.604453,2.604453,0.0
"""GARANTIA FIANZA""",19391.464936,6516.591616,7.096223,0.0,2.112661
"""GARANTIA MICROCREDITO""",1921.673455,0.0,218.320828,57.541545,59.488832
"""GARANTIA EMPRESARIAL""",399607.567782,70248.342599,25718.87374,6494.690834,1250.922311
"""EMPRESA MEDIANA""",5061.008599,42.095458,461.045934,68.394259,34.131513
"""GARANTIA SECTORIAL""",112945.73856,16593.538016,7890.814287,1861.780878,416.451584
"""GARANTIA EMERGENTES""",19498.346399,471.754144,2220.52283,565.477154,174.369526
"""GARANTIA SUBASTA""",6843.017356,0.402383,5725.696843,1254.737782,629.389588
"""GARANTIAS FEFA""",256.935314,128.858849,66.879565,0.0,0.0


## Quita doble destino recurso

In [None]:
def quita_doble_destino(df):
    dobles = (df
    .group_by(['INTER_CLAVE', 'CLAVE_CREDITO'])
    .agg(pl.col("CONREC_CLAVE").count().alias("Dobles"))
    )
    aux = (df
           .join(dobles, on=["INTER_CLAVE", "CLAVE_CREDITO"], how="left")
           .with_columns(
               pl.when(pl.col("Dobles")>1).then(9999).otherwise("CONREC_CLAVE").alias("CONREC_CLAVE"),
               pl.when(pl.col("Dobles")>1).then(pl.lit("Doble clave de destino recurso")).otherwise("Describe_Desrec").alias("Describe_Desrec")
           ).unique()
           )

    return aux

#vf_foto_r = quita_doble_destino(vf_foto_r)
vf_foto_nr = quita_doble_destino(vf_foto_nr)

In [None]:
(vf_foto_nr
 .group_by("TAXONOMIA")
 .agg(
     pl.col("MGI (MDP)").sum().alias("MGI_MDP"),
     pl.col("SALDO (MDP)").sum().alias("SALDO_MDP"),
     pl.col("MPAGADO (MDP)").sum().alias("MPAGADO_MDP"),
     pl.col("RECUPERADOS (MDP)").sum().alias("MRECUP_MDP"),
     pl.col("RESCATADOS (MDP)").sum().alias("MRESCAT_MDP"),
 )
)

TAXONOMIA,MGI_MDP,SALDO_MDP,MPAGADO_MDP,MRECUP_MDP,MRESCAT_MDP
str,f64,f64,f64,f64,f64
"""GARANTIA SUBASTA""",6842.979856,0.402383,5725.145942,1253.635981,629.389588
"""GARANTIA EMPRESARIAL""",399605.541449,70248.342599,25718.849479,6494.670576,1250.922311
"""EMPRESA MEDIANA""",5061.008599,42.095458,461.045934,68.394259,34.131513
"""GARANTIA FIANZA""",19391.464936,6516.591616,7.096223,0.0,2.112661
"""GARANTIA MICROCREDITO""",1921.673455,0.0,218.320828,57.541545,59.488832
"""GARANTIA SECTORIAL""",112945.73856,16593.538016,7890.814287,1861.780878,416.451584
"""GARANTIAS FEFA""",256.935314,128.858849,66.879565,0.0,0.0
"""GARANTIA EMERGENTES""",19498.346399,471.754144,2220.52283,565.477154,174.369526
"""GARANTIA SELECTIVA""",824.506595,0.0,2.604453,2.604453,0.0


In [None]:
(vf_foto_r
 .group_by("TAXONOMIA")
 .agg(
     pl.col("MGI (MDP)").sum().alias("MGI_MDP"),
     pl.col("SALDO (MDP)").sum().alias("SALDO_MDP"),
     pl.col("MPAGADO (MDP)").sum().alias("MPAGADO_MDP"),
     pl.col("RECUPERADOS (MDP)").sum().alias("MRECUP_MDP"),
     pl.col("RESCATADOS (MDP)").sum().alias("MRESCAT_MDP"),
 )
)

TAXONOMIA,MGI_MDP,SALDO_MDP,MPAGADO_MDP,MRECUP_MDP,MRESCAT_MDP
str,f64,f64,f64,f64,f64
"""GARANTIAS FEFA""",340.898804,214.796732,9.636996,0.0,0.0
"""GARANTIA SUBASTA""",4598.046503,0.0,2831.985916,645.161697,333.49744
"""GARANTIA ESPECIAL""",77.5,64.603449,0.0,0.0,0.0
"""GARANTIA SELECTIVA""",168.22007,0.0,5.0,0.0,0.0
"""GARANTIA MICROCREDITO""",0.048,0.0,0.0,0.0,0.0
"""EMPRESA MEDIANA""",18010.68355,261.94218,1102.159376,125.195081,75.084676
"""GARANTIA EMPRESARIAL""",376732.636457,24742.546599,13898.283396,3346.062545,1012.271716
"""GARANTIA SECTORIAL""",18517.857831,1114.471605,824.682683,276.970352,113.727756


### Foto DWH Revolventes

In [None]:
def agrupa_taxonomia(vf_foto_r, condicion):
    result = (
        vf_foto_r
        # 1. Filtrar TAXONOMIA
        .filter(condicion) #"GARANTIA MICROCREDITO"

        # 2. Agrupar
        .group_by(
            [
                "BANCO", "AGRUPAMIENTO", "AGRUPAMIENTO_ID", "NOMBRE",
                "TAXONOMIA", "CLAVE_TAXO", "NR_R", "INTER_CLAVE", "CSG"
            ]
        )

        # 3. Agregaciones
        .agg([
            pl.min("FECHA_VALOR1").alias("FECHA_VALOR1"),
            pl.min("FECHA_REGISTRO_GARANTIA").alias("FECHA_REGISTRO_GARANTIA"),
            pl.sum("MGI (MDP)").alias("MGI (MDP)"),
            pl.mean("PLAZO").alias("PLAZO"),
            pl.mean("PLAZO_DIAS").alias("PLAZO_DIAS"),
            pl.max("test").alias("FVTO"),
            pl.min("FECHA_PAGO_LIMPIA").alias("FECHA_PAGO"),
            pl.sum("INCUMPLIDO").alias("PAGADAS"),
            pl.sum("MPAGADO (MDP)").alias("MPAGADO (MDP)"),
            pl.sum("MONTO CREDITO (MDP)").alias("MONTO CREDITO (MDP)"),
            pl.min("FECHA_VALOR").alias("FECHA_VALOR"),
            pl.sum("SALDO (MDP)").alias("SALDO (MDP)"),
            pl.min("FECHA_REGISTRO1").alias("FECHA_REGISTRO1"),

            # FECHA_PRIMER_INCUM -> reemplazar nulos con '1899-12-30'
            pl.min("FECHA_PRIMER_INCUM")
            .fill_null(pl.lit("1899-12-30").str.strptime(pl.Date, "%Y-%m-%d"))
            .alias("FECHA_PRIMER_INCUM"),

            pl.max("MM_UDIS").alias("MM_UDIS"),
            pl.count("NUM_GAR").alias("NUM_GAR"),
            pl.max("INCUMPLIDO").alias("INCUMPLIDO"),
            pl.first("ESQUEMA").alias("ESQUEMA"),
            pl.sum("MONTOTOTAL (MDP)").alias("MONTOTOTAL (MDP)"),
            pl.sum("RECUPERADOS (MDP)").alias("TOT RECUP (MDP)"),
            pl.sum("RESCATADOS (MDP)").alias("TOT RESCAT (MDP)"),
        ])

        # 4. Ordenar
        .sort(
            ["BANCO", "AGRUPAMIENTO", "AGRUPAMIENTO_ID", "NOMBRE",
            "TAXONOMIA", "CLAVE_TAXO", "NR_R", "INTER_CLAVE", "CSG"]
        )
    )
    return result

def genera_taxonomia_empresarial(vf_foto_r):
    df_empresarial = (
        vf_foto_r.filter(pl.col("TAXONOMIA") == "GARANTIA EMPRESARIAL")
        .with_columns([
            # Renombrar INCUMPLIDO → PAGADAS
            pl.col("INCUMPLIDO").alias("PAGADAS"),

            # Normalizar FECHA_PRIMER_INCUM:
            # Si es NULL, poner 1899-12-30
            pl.when(pl.col("FECHA_PRIMER_INCUM").is_null())
            .then(pl.lit("1899-12-30").str.strptime(pl.Date, "%Y-%m-%d"))
            .otherwise(pl.col("FECHA_PRIMER_INCUM"))
            .alias("FECHA_PRIMER_INCUM"),

            # Renombrar RECUPERADOS (MDP) → TOT RECUP (MDP)
            pl.col("RECUPERADOS (MDP)").alias("TOT RECUP (MDP)"),

            # Renombrar RESCATADOS (MDP) → TOT RESCAT (MDP)
            pl.col("RESCATADOS (MDP)").alias("TOT RESCAT (MDP)"),

            #
            pl.col("test").alias("FVTO")
        ])
        .select([
            "BANCO", "AGRUPAMIENTO", "AGRUPAMIENTO_ID", "NOMBRE",
            "TAXONOMIA", "CLAVE_TAXO", "NR_R", "INTER_CLAVE", "CSG",
            "FECHA_VALOR1", "FECHA_REGISTRO_GARANTIA", "MGI (MDP)",
            "PLAZO", "PLAZO_DIAS", "FVTO", "FECHA_PAGO_LIMPIA",
            "PAGADAS", "MPAGADO (MDP)", "MONTO CREDITO (MDP)",
            "FECHA_VALOR", "SALDO (MDP)", "FECHA_REGISTRO1",
            "FECHA_PRIMER_INCUM", "MM_UDIS", "NUM_GAR", "INCUMPLIDO",
            "MONTOTOTAL (MDP)", "TOT RECUP (MDP)", "TOT RESCAT (MDP)"
        ])
    )
    return df_empresarial

def agrega_campos_taxonomia(df, fecha_base):
    result = (df.with_columns([
        # 1. Indicador saldo > 0
        pl.when(pl.col("SALDO (MDP)") > 0).then(1).otherwise(0).alias("No_Acreditados_Saldo>0"),

        # 2. Saldo al cuadrado
        (pl.col("SALDO (MDP)") ** 2).alias("Saldo^2"),

        # 3. Conteo condicional
        pl.when(pl.col("SALDO (MDP)") > 0).then(1).otherwise(0).alias("Count"),

        # 4. Antigüedad cliente en meses
        pl.when(pl.col("SALDO (MDP)") > 0)
        .then((pl.lit(fecha_base).dt.year() - pl.col("FECHA_VALOR1").dt.year()) * 12 +
                (pl.lit(fecha_base).dt.month() - pl.col("FECHA_VALOR1").dt.month()))
        .otherwise(0)
        .alias("ANTIG_CLIENTE_MESES")
    ])

        .with_columns([
        # 5. Antigüedad cliente en años (clasificación)
        pl.when(pl.col("SALDO (MDP)") > 0)
        .then(
            pl.when(pl.col("ANTIG_CLIENTE_MESES") <= 12).then(1)
            .when(pl.col("ANTIG_CLIENTE_MESES") <= 24).then(2)
            .when(pl.col("ANTIG_CLIENTE_MESES") <= 36).then(3)
            .otherwise(4)
        )
        .otherwise(0)
        .alias("ANTIG_CLIENTE_AÑOS"),

        # 6. Restante meses
        pl.when(
            pl.when(pl.col("SALDO (MDP)") > 0)
            .then((pl.col("FVTO").dt.year() - pl.lit(fecha_base).dt.year()) * 12 +
                    (pl.col("FVTO").dt.month() - pl.lit(fecha_base).dt.month()))
            .otherwise(0) < 0
        ).then(0)
        .otherwise(
            pl.when(pl.col("SALDO (MDP)") > 0)
            .then((pl.col("FVTO").dt.year() - pl.lit(fecha_base).dt.year()) * 12 +
                    (pl.col("FVTO").dt.month() - pl.lit(fecha_base).dt.month()))
            .otherwise(0)
        ).alias("RESTANTE_MESES")])

        .with_columns([
        # 7. Restante ponderado
        (pl.col("RESTANTE_MESES") * pl.col("SALDO (MDP)")).alias("RESTANTE_POND"),

        # 8. Vigentes
        pl.when(pl.col("FVTO") + pl.duration(days=180) > pl.lit(fecha_base))
        .then(1).otherwise(0).alias("VIGENTES"),

        # 9. Remanente meses
        pl.when(pl.col("SALDO (MDP)") > 0)
        .then(
            pl.when(pl.col("FVTO") > pl.lit(fecha_base))
                .then((pl.col("FVTO").dt.year() - pl.lit(fecha_base).dt.year()) * 12 +
                    (pl.col("FVTO").dt.month() - pl.lit(fecha_base).dt.month()))
                .otherwise(0)
        ).otherwise(0)
        .alias("REMANENTE_MESES")])
        
        .with_columns([
        # 10. Remanente años (clasificación)
        pl.when(pl.col("REMANENTE_MESES") <= 12).then(1)
        .when(pl.col("REMANENTE_MESES") <= 24).then(2)
        .when(pl.col("REMANENTE_MESES") <= 36).then(3)
        .when(pl.col("REMANENTE_MESES") <= 48).then(4)
        .otherwise(5)
        .alias("REMANENTE_AÑOS"),

        # 11. Remanente meses + 180 días
        pl.when(pl.col("SALDO (MDP)") > 0)
        .then(
            pl.when(pl.col("FVTO") + pl.duration(days=180) > pl.lit(fecha_base))
                .then((pl.col("FVTO").dt.year() - pl.lit(fecha_base).dt.year()) * 12 +
                    (pl.col("FVTO").dt.month() - pl.lit(fecha_base).dt.month()) +
                    180/30)
                .otherwise(0)
        ).otherwise(0)
        .alias("REMANENTE_MESES+180")])

        .with_columns([
        # 12. Remanente años + 180
        pl.when(pl.col("REMANENTE_MESES+180") <= 12).then(1)
        .when(pl.col("REMANENTE_MESES+180") <= 24).then(2)
        .when(pl.col("REMANENTE_MESES+180") <= 36).then(3)
        .when(pl.col("REMANENTE_MESES+180") <= 48).then(4)
        .otherwise(5)
        .alias("REMANENTE_AÑOS+180"),

        # 13. Antig cliente meses ponderado
        (pl.col("ANTIG_CLIENTE_MESES") * pl.col("SALDO (MDP)")).alias("Antig_Cliente_Meses_Pond"),

        # 14. Restante días
        pl.when(pl.col("SALDO (MDP)") > 0)
        .then(((pl.col("FVTO") - fecha_base).dt.total_days()))
        .otherwise(0)
        .alias("RESTANTE_DIAS")])

        .with_columns([
        # 15. Restante días ponderado
        (pl.col("RESTANTE_DIAS") * pl.col("SALDO (MDP)")).alias("RESTANTE_DIAS_POND")
    ]))

    return result

In [None]:
def foto_dwh_revolventes(vf_foto_r):
    cond_microcredito = pl.col("TAXONOMIA")=="GARANTIA MICROCREDITO"
    cond_resto = (pl.col("TAXONOMIA")!="GARANTIA EMPRESARIAL") & (pl.col("TAXONOMIA")!="EMPRESA MEDIANA") & (pl.col("TAXONOMIA")!="GARANTIA MICROCREDITO")
    cond_empresarial =(pl.col("TAXONOMIA")=="GARANTIA EMPRESARIAL") | (pl.col("TAXONOMIA")=="EMPRESA MEDIANA")

    # Define tablas
    df_microcredito = agrupa_taxonomia(vf_foto_r, cond_microcredito)
    df_empresarial = agrupa_taxonomia(vf_foto_r, cond_empresarial)
    df_resto = agrupa_taxonomia(vf_foto_r, cond_resto)
    df_empresarial_2 = genera_taxonomia_empresarial(vf_foto_r)

    # Crea nuevas variables
    df_microcredito = agrega_campos_taxonomia(df_microcredito, cierre_d2)
    df_empresarial = agrega_campos_taxonomia(df_empresarial, cierre_d2)
    df_resto = agrega_campos_taxonomia(df_resto, cierre_d2)
    df_empresarial_2 = agrega_campos_taxonomia(df_empresarial_2, cierre_d2)

    # Exporta
    print(df_microcredito.head())
    print(df_empresarial.head())
    print(df_resto.head())
    print(df_empresarial_2.head())
    
foto_dwh_revolventes(vf_foto_r)



shape: (2, 45)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ BANCO     ┆ AGRUPAMIE ┆ AGRUPAMIE ┆ NOMBRE    ┆ … ┆ REMANENTE ┆ Antig_Cli ┆ RESTANTE_ ┆ RESTANTE │
│ ---       ┆ NTO       ┆ NTO_ID    ┆ ---       ┆   ┆ _AÑOS+180 ┆ ente_Mese ┆ DIAS      ┆ _DIAS_PO │
│ str       ┆ ---       ┆ ---       ┆ str       ┆   ┆ ---       ┆ s_Pond    ┆ ---       ┆ ND       │
│           ┆ str       ┆ f64       ┆           ┆   ┆ i32       ┆ ---       ┆ i64       ┆ ---      │
│           ┆           ┆           ┆           ┆   ┆           ┆ f64       ┆           ┆ f64      │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ CREDITOS  ┆ Pronegoci ┆ 1680.0    ┆ GLORIA    ┆ … ┆ 1         ┆ 0.0       ┆ 0         ┆ 0.0      │
│ PRONEGOCI ┆ o         ┆           ┆ LEGARIA   ┆   ┆           ┆           ┆           ┆          │
│ OS        ┆           ┆           ┆ GONZALEZ  ┆   ┆           ┆           

### Foto DWH Simples

In [109]:
    
fecha_fallback = datetime(1899, 12, 30)

def base_simple_taxonomia(df, condicion):
    result = (
        df.filter(condicion)
        .with_columns([
            # FECHA_PRIMER_INCUM: si es null, usar 30/12/1899
            pl.when(pl.col("FECHA_PRIMER_INCUM").is_null())
            .then(pl.lit(fecha_fallback))
            .otherwise(pl.col("FECHA_PRIMER_INCUM"))
            .alias("FECHA_PRIMER_INCUM"),

            # NUM_GAR: si CLAVE_CREDITO es null => 0, si no => 1
            pl.when(pl.col("CLAVE_CREDITO").is_null())
            .then(pl.lit(0))
            .otherwise(pl.lit(1))
            .alias("NUM_GAR"),
        ])
        .select([
            "CLAVE_CREDITO",
            "FECHA_VALOR1",
            "TIPO_PERSONA",
            "NOMBRE",
            "RFC",
            "FECHA_REGISTRO_GARANTIA",
            "MGI (MDP)",
            "PLAZO",
            "PLAZO_DIAS",
            pl.col("test").alias("FVTO"),
            "BANCO",
            pl.col("FECHA_PAGO_LIMPIA").alias("FECHA_PAGO"),
            pl.col("INCUMPLIDO").alias("PAGADAS"),
            "MPAGADO (MDP)",
            "FECHA_REGISTRO1",
            "MONTO CREDITO (MDP)",
            "FECHA_VALOR",
            "INTER_CLAVE",
            "TPRO_CLAVE",
            "NR_R",
            "CSG",
            "SALDO (MDP)",
            "FECHA_PRIMER_INCUM",
            "CLAVE_TAXO",
            "TAXONOMIA",
            "MM_UDIS",
            "NUM_GAR",
            "INCUMPLIDO",
            "ESQUEMA",
            "MONTOTOTAL (MDP)",
            "RECUPERADOS (MDP)",
            "RESCATADOS (MDP)",
            "AGRUPAMIENTO",
            "AGRUPAMIENTO_ID",
            "Porcentaje Garantizado",
            "PLAZO_BUCKET",
            "Programa_Original",
            "Programa_Id"
        ])
    )
    return result


def campos_extras_simples(df, fecha_base):
    df = df.with_columns([
        # VIVOS y CADUCOS
        ((pl.col("FVTO") + pl.duration(days=180) > fecha_base).cast(pl.Int8) * pl.col("MGI (MDP)")).alias("MGI_VIVOS"),
        ((pl.col("FVTO") + pl.duration(days=180) > fecha_base).cast(pl.Int8) * pl.col("MGI (MDP)") * pl.col("INCUMPLIDO")).alias("MGI_MALOS_VIVOS"),
        ((pl.col("FVTO") + pl.duration(days=180) > fecha_base).cast(pl.Int8) * pl.col("MPAGADO (MDP)")).alias("MPAGADO_VIVOS"),
        ((pl.col("FVTO") + pl.duration(days=180) > fecha_base).cast(pl.Int8) * pl.col("RECUPERADOS (MDP)")).alias("MRECUP_VIVOS"),

        ((pl.col("FVTO") + pl.duration(days=180) <= fecha_base).cast(pl.Int8) * pl.col("MGI (MDP)")).alias("MGI_CAD"),
        ((pl.col("FVTO") + pl.duration(days=180) <= fecha_base).cast(pl.Int8) * pl.col("MGI (MDP)") * pl.col("INCUMPLIDO")).alias("MGI_MALOS_CAD"),
        ((pl.col("FVTO") + pl.duration(days=180) <= fecha_base).cast(pl.Int8) * pl.col("MPAGADO (MDP)")).alias("MPAGADO_CAD"),
        ((pl.col("FVTO") + pl.duration(days=180) <= fecha_base).cast(pl.Int8) * pl.col("RECUPERADOS (MDP)")).alias("MRECUP_CAD"),

        ((pl.col("FVTO") + pl.duration(days=180) > fecha_base).cast(pl.Int8) * pl.col("SALDO (MDP)")).alias("SALDO_VIVOS"),
        ((pl.col("FVTO") + pl.duration(days=180) <= fecha_base).cast(pl.Int8) * pl.col("SALDO (MDP)")).alias("SALDO_CADUCOS"),

        # Otras columnas derivadas
        (pl.col("MGI_VIVOS") * pl.col("MGI_VIVOS")).alias("MGI_VIVOS^2"),
        ((pl.col("FVTO") + pl.duration(days=180) > fecha_base).cast(pl.Int8)).alias("#VIVAS"),
        (pl.col("MGI (MDP)") * pl.col("INCUMPLIDO")).alias("MGI_INCMPL"),
        pl.lit(1).alias("Count"),

        # FECHA_PAGO1 (si es 0 => NULL, si no redondea al primer día del mes)
        pl.when(pl.col("FECHA_PAGO") == 0).then(pl.lit(None))
        .otherwise(pl.col("FECHA_PAGO").dt.truncate("1mo"))
        .alias("FECHA_PAGO1"),

        # AñoOtor
        pl.col("FECHA_VALOR1").dt.year().alias("AñoOtor"),

        # PTRANSCURRIDO
        pl.min_horizontal([
            ((fecha_base - pl.col("FECHA_VALOR")) / (pl.col("FVTO") - pl.col("FECHA_VALOR") + pl.duration(days=180))),
            pl.lit(1)
        ]).alias("PTRANSCURRIDO"),

        # PTRANS_PON
        (pl.col("PTRANSCURRIDO") * pl.col("MGI (MDP)")).alias("PTRANS_PON"),

        # Semestre
        pl.when(pl.col("FECHA_VALOR").dt.month() < 7)
        .then(pl.col("FECHA_VALOR").dt.replace(month=1, day=1))
        .otherwise(pl.col("FECHA_VALOR").dt.replace(month=7, day=1))
        .alias("Semestre"),

        # MESES_REM_POND
        ((1 - pl.col("PTRANSCURRIDO")) * (
            (pl.col("FVTO").dt.year() - pl.col("FECHA_VALOR1").dt.year()) * 12 +
            (pl.col("FVTO").dt.month() - pl.col("FECHA_VALOR1").dt.month()) + 6
        ) * pl.col("SALDO (MDP)")).alias("MESES_REM_POND"),

        # Con_Saldo
        (pl.col("SALDO (MDP)") > 0).cast(pl.Int8).alias("Con_Saldo"),

        # RESTANTE_MESES
        pl.when(((pl.col("FVTO").dt.year() - fecha_base.dt.year())*12 + (pl.col("FVTO").dt.month() - fecha_base.dt.month())) > 0)
        .then((pl.col("FVTO").dt.year() - fecha_base.dt.year())*12 + (pl.col("FVTO").dt.month() - fecha_base.dt.month()))
        .otherwise(0)
        .alias("RESTANTE_MESES"),

        # RESTANTE_POND
        (pl.col("RESTANTE_MESES") * pl.col("SALDO (MDP)")).alias("RESTANTE_POND"),

        # SALDO^2
        (pl.col("SALDO (MDP)") * pl.col("SALDO (MDP)")).alias("SALDO^2"),

        # RESTANTE_DIAS
        pl.when(pl.col("SALDO (MDP)") > 0)
        .then(pl.col("FVTO") - fecha_base)
        .otherwise(0)
        .alias("RESTANTE_DIAS"),

        (pl.col("RESTANTE_DIAS") * pl.col("SALDO (MDP)")).alias("RESTANTE_DIAS_POND"),
    ])


In [110]:
cond_microcredito = pl.col("TAXONOMIA")=="GARANTIA MICROCREDITO"
cond_resto = (pl.col("TAXONOMIA")!="GARANTIA EMPRESARIAL") & (pl.col("TAXONOMIA")!="GARANTIA MICROCREDITO")
cond_empresarial =(pl.col("TAXONOMIA")=="GARANTIA EMPRESARIAL") 

base_simple_taxonomia(vf_foto_nr, cond_empresarial)


CLAVE_CREDITO,FECHA_VALOR1,TIPO_PERSONA,NOMBRE,RFC,FECHA_REGISTRO_GARANTIA,MGI (MDP),PLAZO,PLAZO_DIAS,FVTO,BANCO,FECHA_PAGO,PAGADAS,MPAGADO (MDP),FECHA_REGISTRO1,MONTO CREDITO (MDP),FECHA_VALOR,INTER_CLAVE,TPRO_CLAVE,NR_R,CSG,SALDO (MDP),FECHA_PRIMER_INCUM,CLAVE_TAXO,TAXONOMIA,MM_UDIS,NUM_GAR,INCUMPLIDO,ESQUEMA,MONTOTOTAL (MDP),RECUPERADOS (MDP),RESCATADOS (MDP),AGRUPAMIENTO,AGRUPAMIENTO_ID,Porcentaje Garantizado,PLAZO_BUCKET,Programa_Original,Programa_Id
str,datetime[μs],str,str,str,datetime[μs],f64,i32,i32,datetime[μs],str,datetime[μs],i8,f64,datetime[μs],f64,datetime[μs],str,i32,str,str,f64,datetime[μs],i32,str,i32,i32,i8,str,f64,f64,f64,str,f64,f32,i32,i32,i32
"""R7176365025""",2020-12-01 00:00:00,,"""GINA JAJATI YEDID""","""JAYG-730104-4L3""",2021-01-25 11:09:43,0.207064,60,0,2025-12-17 00:00:00,"""BANAMEX""",1899-12-30 00:00:00,0,0.0,2021-01-01 11:09:43,0.591612,2020-12-18 00:00:00,"""10000233""",310008,"""NR""","""SG""",0.037012,1899-12-30 00:00:00,591250,"""GARANTIA EMPRESARIAL""",0,1,0,"""PP""",,,,"""Producto NAFIN""",10000.0,35.0,4,,
"""9617861311""",2008-03-01 00:00:00,,"""DENTAL PLUSSA DE CV""","""DPL-911128-7I8""",2008-03-07 16:40:05,0.105,36,0,2011-03-03 00:00:00,"""BBV BANCOMER""",1899-12-30 00:00:00,0,0.0,2008-03-01 16:40:05,0.15,2008-03-03 00:00:00,"""10040012""",3155,"""NR""","""SG""",0.0,1899-12-30 00:00:00,591250,"""GARANTIA EMPRESARIAL""",0,1,0,"""PP""",,,,"""PyMe""",1690.0,70.0,3,,
"""05009111975""",2019-04-01 00:00:00,"""M""","""PUNTO ESTOCASTICO EN IMPRESION…","""PEI-130704-S8A""",2019-06-27 11:08:44,1.5,36,0,2022-04-11 00:00:00,"""BANCO SANTANDER""",2019-11-27 00:00:00,1,1.469065,2019-06-01 11:08:44,3.0,2019-04-12 00:00:00,"""10000144""",310000,"""NR""","""SG""",0.0,2019-07-12 00:00:00,591250,"""GARANTIA EMPRESARIAL""",0,1,1,"""PP""",0.0,0.0,0.0,"""Producto NAFIN""",10000.0,50.0,3,3999,310000
"""692609""",1999-06-01 00:00:00,,"""PYMZAC SA DE CV""","""PYM-940719-6W5""",2001-08-21 10:20:18,0.121528,36,16,2002-07-02 00:00:00,"""BANCO MERCANTIL DEL NORTE""",1899-12-30 00:00:00,0,0.0,2001-08-01 10:20:18,0.243056,1999-06-17 00:00:00,"""10040072""",3003,"""NR""","""SG""",0.0,1899-12-30 00:00:00,591250,"""GARANTIA EMPRESARIAL""",0,1,0,"""PP""",,,,"""Crediactivo Diana""",1275.0,50.0,3,,
"""28415565012""",2012-04-01 00:00:00,,"""SERVICIOS DE MAQUINADOS Y AFIL…","""SMA-981104-B83""",2012-05-18 14:09:08,0.36,36,0,2015-04-27 00:00:00,"""BANAMEX""",1899-12-30 00:00:00,0,0.0,2012-05-01 14:09:08,0.72,2012-04-27 00:00:00,"""10000233""",31355,"""NR""","""SG""",0.0,1899-12-30 00:00:00,591250,"""GARANTIA EMPRESARIAL""",0,1,0,"""PP""",,,,"""PyMe""",1690.0,50.0,3,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""05009298647""",2019-08-01 00:00:00,,"""OPERADORA HOTEL CINCO INN SA D""","""OHC-101217-F80""",2019-08-30 14:10:42,0.415532,52,0,2023-12-13 00:00:00,"""BANCO SANTANDER""",1899-12-30 00:00:00,0,0.0,2019-08-01 14:10:42,0.831064,2019-08-14 00:00:00,"""10000144""",310000,"""NR""","""SG""",0.0,1899-12-30 00:00:00,591250,"""GARANTIA EMPRESARIAL""",0,1,0,"""PP""",,,,"""Producto NAFIN""",10000.0,50.0,4,,
"""79383742016""",2020-03-01 00:00:00,,"""OLVERA AZPILCUETA PAULIN""","""OEAP-571010-AKA""",2020-04-29 17:45:02,0.1065,36,0,2023-03-27 00:00:00,"""BANAMEX""",1899-12-30 00:00:00,0,0.0,2020-04-01 17:45:02,0.213,2020-03-27 00:00:00,"""10000233""",310018,"""NR""","""SG""",0.0,1899-12-30 00:00:00,591250,"""GARANTIA EMPRESARIAL""",0,1,0,"""PP""",,,,"""Producto NAFIN""",10000.0,50.0,3,,
"""43036898015""",2020-01-01 00:00:00,,"""JAZMIN ALEJANDRA IBARRA OSORIO""","""IAOJ-970108-2H0""",2020-03-13 14:58:43,0.4,42,0,2023-07-21 00:00:00,"""BANAMEX""",1899-12-30 00:00:00,0,0.0,2020-03-01 14:58:43,0.8,2020-01-20 00:00:00,"""10000233""",310018,"""NR""","""SG""",0.0,1899-12-30 00:00:00,591250,"""GARANTIA EMPRESARIAL""",0,1,0,"""PP""",,,,"""Producto NAFIN""",10000.0,50.0,4,,
"""9817424353""",2008-12-01 00:00:00,,"""FRANCISCO BRIAN ROJAS CANO""","""ROCF-810828-R85""",2009-03-19 12:36:39,0.315,36,0,2011-12-22 00:00:00,"""BBV BANCOMER""",1899-12-30 00:00:00,0,0.0,2009-03-01 12:36:39,0.45,2008-12-22 00:00:00,"""10040012""",3155,"""NR""","""SG""",0.0,1899-12-30 00:00:00,591250,"""GARANTIA EMPRESARIAL""",0,1,0,"""PP""",,,,"""PyMe""",1690.0,70.0,3,,


In [108]:
vf_foto_nr.columns

['BUCKET',
 'CAMBIO',
 'MCrédito_MM_UDIS',
 'MM_UDIS',
 'INTER_CLAVE',
 'NOMBRE',
 'RFC',
 'TIPO_PERSONA',
 'CLAVE_CREDITO',
 'FECHA_VALOR',
 'PLAZO_DIAS',
 'PLAZO',
 'test',
 'FECHA_REGISTRO_GARANTIA',
 'MGI (MDP)',
 'Porcentaje Garantizado',
 'BANCO',
 'FECHA_PRIMER_INCUM',
 'MONTO CREDITO (MDP)',
 'SALDO (MDP)',
 'TPRO_CLAVE',
 'CLAVE_TAXO',
 'TAXONOMIA',
 'NR_R',
 'FECHA_VALOR1',
 'FECHA_REGISTRO1',
 'NUM_GAR',
 'CSG',
 'PLAZO_BUCKET',
 'MPAGADO (MDP)',
 'PAGADAS',
 'INCUMPLIDO',
 'FECHA_PAGO_LIMPIA',
 'Programa_Original',
 'Programa_Id',
 'ESTRATO_ID',
 'SECTOR_ID',
 'ESTADO_ID',
 'Tipo_Credito_Id',
 'Porcentaje_Comision_Garantia',
 'TASA_ID',
 'Tasa_Interes',
 'MGI (MDP) Original',
 'AGRUPAMIENTO_ID',
 'ESQUEMA',
 'SUBESQUEMA',
 'AGRUPAMIENTO',
 'FONDOS_CONTRAGARANTIA',
 'CONREC_CLAVE',
 'Describe_Desrec',
 'MONTOTOTAL (MDP)',
 'RECUPERADOS (MDP)',
 'RESCATADOS (MDP)',
 'Dobles']

### Cierre

In [25]:
pagadas_global_vf = pl.read_parquet(fl_pagadas_global_vf)

In [26]:
# 1) UNION ALL entre VF_Foto_NR y VF_Foto_R
fotos = pl.concat([vf_foto_nr, vf_foto_nr], how="vertical")

In [28]:
# 2) Agrupación de Recupera_con_Pagos_Flujos
recup_agrup = (
    recuperadas_global_vf
    .filter(pl.col("Estatus").is_in(["CR", "D", "E", "RAC", "RAR", "RI"]))
    .group_by(["Intermediario_Id", "Numero_Credito"])
    .agg([
        (pl.col("Monto_Total_Mn").sum() / 1_000_000).alias("recup_aux"),
        pl.col("Fecha").max().alias("fecha_recup"),
        ((pl.col("Monto_Total_Mn") / pl.col("Tipo_Cambio_Cierre")).sum() / 1_000_000).alias("recup_or")
    ])
)

In [29]:
# 3) JOIN con recup_agrup
fotos_recup = fotos.join(
    recup_agrup,
    left_on=["INTER_CLAVE", "CLAVE_CREDITO"],
    right_on=["Intermediario_Id", "Numero_Credito"],
    how="left"
)

In [32]:
# 4) Agrupación de Pagadas_Global_VF
pagos_agrup = (
    pagadas_global_vf
    .group_by(["Intermediario_Id", "Numero_Credito"])
    .agg([
        (pl.col("Monto_Pagado_Mn").sum() / 1_000_000).alias("pagos_aux"),
        pl.col("Moneda_Id").max().alias("monedaux"),
        ((pl.col("Monto_Pagado_Mn") / pl.col("TC")).sum() / 1_000_000).alias("pago_or")
    ])
)

In [33]:
# 5) JOIN con pagos_agrup
foto_vf = fotos_recup.join(
    pagos_agrup,
    left_on=["INTER_CLAVE", "CLAVE_CREDITO"],
    right_on=["Intermediario_Id", "Numero_Credito"],
    how="left"
)

In [41]:
# 6) Selección final de columnas (similar al SELECT de SQL)
foto_vf = foto_vf.select([
    "BUCKET", "CAMBIO", "MCrédito_MM_UDIS", "MM_UDIS",
    "INTER_CLAVE", "NOMBRE", "RFC", "TIPO_PERSONA",
    "CLAVE_CREDITO", "FECHA_VALOR", "PLAZO_DIAS", "PLAZO",
    "test", "FECHA_REGISTRO_GARANTIA", "MGI (MDP)",
    "Porcentaje Garantizado", "BANCO", "FECHA_PRIMER_INCUM",
    "MONTO CREDITO (MDP)", "SALDO (MDP)", "TPRO_CLAVE",
    "CLAVE_TAXO", "TAXONOMIA", "NR_R", "FECHA_VALOR1",
    "FECHA_REGISTRO1", "NUM_GAR", "CSG", "PLAZO_BUCKET",
    pl.col("pagos_aux").alias("MPAGADO (MDP)"),
    "PAGADAS", "INCUMPLIDO", 
    pl.col("FECHA_PAGO_LIMPIA").alias("FECHA_PAGO"), "Programa_Original",
    "Programa_Id", "ESTRATO_ID", "SECTOR_ID", "ESTADO_ID",
    "Tipo_Credito_Id", "Porcentaje_Comision_Garantia",
    "TASA_ID", "Tasa_Interes", "MGI (MDP) Original",
    "AGRUPAMIENTO_ID", "ESQUEMA", "SUBESQUEMA",
    "AGRUPAMIENTO", "FONDOS_CONTRAGARANTIA", "CONREC_CLAVE",
    "Describe_Desrec", "MONTOTOTAL (MDP)",
    pl.col("recup_aux").alias("RECUPERADOS (MDP)"),
    "RESCATADOS (MDP)", "fecha_recup",
    pl.col("monedaux").alias("Moneda_Id"),
    "pago_or", "recup_or"
])

In [42]:
foto_vf

BUCKET,CAMBIO,MCrédito_MM_UDIS,MM_UDIS,INTER_CLAVE,NOMBRE,RFC,TIPO_PERSONA,CLAVE_CREDITO,FECHA_VALOR,PLAZO_DIAS,PLAZO,test,FECHA_REGISTRO_GARANTIA,MGI (MDP),Porcentaje Garantizado,BANCO,FECHA_PRIMER_INCUM,MONTO CREDITO (MDP),SALDO (MDP),TPRO_CLAVE,CLAVE_TAXO,TAXONOMIA,NR_R,FECHA_VALOR1,FECHA_REGISTRO1,NUM_GAR,CSG,PLAZO_BUCKET,MPAGADO (MDP),PAGADAS,INCUMPLIDO,FECHA_PAGO,Programa_Original,Programa_Id,ESTRATO_ID,SECTOR_ID,ESTADO_ID,Tipo_Credito_Id,Porcentaje_Comision_Garantia,TASA_ID,Tasa_Interes,MGI (MDP) Original,AGRUPAMIENTO_ID,ESQUEMA,SUBESQUEMA,AGRUPAMIENTO,FONDOS_CONTRAGARANTIA,CONREC_CLAVE,Describe_Desrec,MONTOTOTAL (MDP),RECUPERADOS (MDP),RESCATADOS (MDP),fecha_recup,Moneda_Id,pago_or,recup_or
i16,f64,f64,i32,str,str,str,str,str,datetime[μs],i32,i32,datetime[μs],datetime[μs],f64,f32,str,datetime[μs],f64,f64,i32,i32,str,str,datetime[μs],datetime[μs],i8,str,i32,f64,i8,i8,datetime[μs],i32,i32,i16,i16,i32,i16,f32,i16,f32,f64,f64,str,str,str,str,i32,str,f64,f64,f64,datetime[μs],i16,f64,f64
,,,0,"""10000144""","""ALEJANDRO CASTOR VERA TREJO""","""VETA-610328-TL8""",,"""5003873746""",2013-03-12 00:00:00,0,48,2017-03-11 00:00:00,2013-05-06 13:54:54,0.04781,70.0,"""BANCO SANTANDER""",1899-12-30 00:00:00,0.0683,0.0,31083,591200,"""GARANTIA SECTORIAL""","""NR""",2013-03-01 00:00:00,2013-05-01 13:54:54,1,"""SG""",4,,0,0,1899-12-30 00:00:00,,,1,10,1015000,,6.3,7,22.0,0.04781,1720.0,"""PP""","""Tradicional""","""Repeco""",,,,,,,,,,
1,,,0,"""10000144""","""JOSE PASTOR DE LOS ANGELES COC…","""COTP-580718-H96""",,"""5003266603""",2012-06-05 00:00:00,0,36,2015-06-05 00:00:00,2012-08-30 14:18:42,0.4,50.0,"""BANCO SANTANDER""",1899-12-30 00:00:00,0.8,0.0,31211,591250,"""GARANTIA EMPRESARIAL""","""NR""",2012-06-01 00:00:00,2012-08-01 14:18:42,1,"""SG""",3,,0,0,1899-12-30 00:00:00,,,2,10,1021000,,2.5,7,8.75,0.4,1690.0,"""PP""","""Tradicional""","""PyMe""",,3009,"""CAPITAL DE TRABAJO""",,,,,,,
,,,0,"""10000144""","""MANCILLA GRUPO DE COMERCIO INT…","""MGC-960130-FZ1""",,"""5003665382""",2012-12-14 00:00:00,0,36,2015-12-14 00:00:00,2013-01-18 15:14:52,0.021,7.0,"""BANCO SANTANDER""",1899-12-30 00:00:00,0.3,0.0,31655,591230,"""GARANTIA SUBASTA""","""NR""",2012-12-01 00:00:00,2013-01-01 15:14:52,1,"""SG""",3,,0,0,1899-12-30 00:00:00,,,2,10,1002000,,0.0,7,8.0,0.021,1885.0,"""1P""","""Subasta""","""Subasta Octubre 2012""",,3010,"""ACTIVOS FIJOS""",,,,,,,
4,,,0,"""10040012""","""ESTRUC MOLDES METALICOS SA DE …","""EMM-131023-KF2""",,"""9626665700""",2017-11-27 00:00:00,3,39,2021-02-28 00:00:00,2017-12-29 10:01:23,0.6,50.0,"""BBV BANCOMER""",1899-12-30 00:00:00,1.2,0.0,33810,591250,"""GARANTIA EMPRESARIAL""","""NR""",2017-11-01 00:00:00,2017-12-01 10:01:23,1,"""SG""",4,,0,0,1899-12-30 00:00:00,,,1,3,1009000,,2.5,7,17.700001,0.6,1690.0,"""PP""","""Tradicional""","""PyMe""",,3009,"""CAPITAL DE TRABAJO""",,,,,,,
,4.87129,9.1751e6,0,"""10040072""","""HG CONSTRUCCION Y DE SARROLLO …","""HCD-990826-FF8""","""M""","""52005238R""",2012-12-27 00:00:00,1,36,2015-12-28 00:00:00,2013-01-30 11:43:53,1.318449,70.0,"""BANCO MERCANTIL DEL NORTE""",2015-02-27 00:00:00,1.883498,0.0,31096,591200,"""GARANTIA SECTORIAL""","""NR""",2012-12-01 00:00:00,2013-01-01 11:43:53,1,"""SG""",3,0.446108,1,1,2015-07-15 00:00:00,3999,31096,1,10,1013000,25,2.0,7,12.0,1.318449,1250.0,"""PP""","""Tradicional""","""Compras de Gobierno""",,,,0.446108,,0.446108,,1,0.446108,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
-1,,,0,"""10040087""","""APICAL SA DE CV""","""API-851021-AJ8""",,"""5000774372""",2004-01-30 00:00:00,0,18,2005-07-31 00:00:00,2004-02-17 17:19:39,0.075,75.0,"""BANCO SERFIN SA""",1899-12-30 00:00:00,0.1,0.0,3033,591250,"""GARANTIA EMPRESARIAL""","""NR""",2004-01-01 00:00:00,2004-02-01 17:19:39,1,"""SG""",2,,0,0,1899-12-30 00:00:00,,,2,10,1009000,,2.0,7,16.18,0.075,1710.0,"""PP""","""Tradicional""","""PyMe-SE""",,,,,,,,,,
-1,,,0,"""465666078""","""LUIS GUILLERMO LOPEZ HERNANDEZ""","""LOHL-490621-""",,"""24143328""",2007-04-30 00:00:00,25,17,2008-10-23 00:00:00,2007-07-13 18:26:09,0.032,80.0,"""CREDITOS PRONEGOCIOS""",1899-12-30 00:00:00,0.04,0.0,3146,591150,"""GARANTIA MICROCREDITO""","""NR""",2007-04-01 00:00:00,2007-07-01 18:26:09,1,"""SG""",2,,0,0,1899-12-30 00:00:00,,,1,6,1012000,,6.61,7,39.0,0.032,1680.0,"""PP""","""Tradicional""","""Pronegocio""",,,,,,,,,,
,,,0,"""476524151""","""CAZAREZ MEDINA ALMA VERONICA""","""CAMA-700828-000""",,"""22819913""",2013-03-13 00:00:00,0,12,2014-03-13 00:00:00,2013-04-03 19:53:54,0.000361,9.64,"""INFONACOT""",1899-12-30 00:00:00,0.003748,0.0,31513,591150,"""GARANTIA MICROCREDITO""","""NR""",2013-03-01 00:00:00,2013-04-01 19:53:54,1,"""SG""",1,,0,0,1899-12-30 00:00:00,,,1,10,1025000,,0.0,7,12.0,0.000361,1260.0,"""1P""","""Subasta""","""Compu Apoyo""",,,,,,,,,,
,,,0,"""279105038""","""SANDRA ADRIANA CRUZ CASTAÑON""","""CUCS-890406-RY2""",,"""308448300019""",2016-06-01 00:00:00,0,48,2020-05-31 00:00:00,2016-07-08 09:40:46,0.006783,1.8,"""NR FINANCE MEXICO SA DE CV SOF…",1899-12-30 00:00:00,0.376857,0.0,33380,591230,"""GARANTIA SUBASTA""","""NR""",2016-06-01 00:00:00,2016-07-01 09:40:46,1,"""CG""",4,,0,0,1899-12-30 00:00:00,,,1,10,1015000,,0.0,7,12.9,0.006783,1910.0,"""1P""","""Subasta""","""Subasta Automotriz Oct 2015""",,3030,"""ADQUISICION DE TRANSPORTE DE C…",,,,,,,
