# 04_VALIDATE_TRANSFORMATIONS.ipynb
==================================

Objetivo: Validar que las transformaciones de dbt son correctas
- ¬øSe eliminaron duplicados correctamente?
- ¬øLos c√°lculos de revenue son correctos?
- ¬øLos totales cuadran entre raw y processed?
- ¬øLos pr√©stamos sin pagos est√°n bien manejados?

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

# Configurar paths
base_path = Path.cwd().parent if 'analisis_adhoc' in str(Path.cwd()) else Path.cwd()
raw_path = base_path / 'data' / 'raw'
exports_path = base_path / 'exports'

print("="*80)
print("üîç VALIDACI√ìN DE TRANSFORMACIONES DBT")
print("="*80)

üîç VALIDACI√ìN DE TRANSFORMACIONES DBT


In [3]:
"""
TEST 1: ¬øSe eliminaron correctamente los snapshots duplicados?
"""

import pandas as pd
import numpy as np
from pathlib import Path

# Configurar paths
base_path = Path.cwd().parent if 'analisis_adhoc' in str(Path.cwd()) else Path.cwd()
raw_path = base_path / 'data' / 'raw'
exports_path = base_path / 'exports'

print("="*80)
print("TEST 1: ELIMINACI√ìN DE DUPLICADOS EN LOANS")
print("="*80)

# Cargar datos
loans_raw = pd.read_csv(raw_path / 'AE_challenge_loans.csv')
loans_processed = pd.read_csv(exports_path / 'fct_loan_financials.csv')

print(f"\nüìä LOANS - Comparaci√≥n:")
print(f"   Raw (con duplicados):        {len(loans_raw):,} filas")
print(f"   Processed (sin duplicados):  {len(loans_processed):,} filas")
print(f"   Pr√©stamos √∫nicos en raw:     {loans_raw['loan_id'].nunique():,}")
print(f"   Pr√©stamos √∫nicos en processed: {loans_processed['loan_id'].nunique():,}")

# Validaci√≥n
prestamos_unicos_raw = loans_raw['loan_id'].nunique()
filas_processed = len(loans_processed)

test1_pass = (filas_processed == prestamos_unicos_raw)

print(f"\nüîç VALIDACI√ìN:")
print(f"   ¬øUna fila por pr√©stamo? {test1_pass}")
print(f"   Esperado: {prestamos_unicos_raw:,} filas")
print(f"   Obtenido: {filas_processed:,} filas")

if test1_pass:
    print(f"\n‚úÖ TEST 1: PASS")
    print(f"   dbt elimin√≥ correctamente los {len(loans_raw) - prestamos_unicos_raw:,} snapshots duplicados")
else:
    print(f"\n‚ùå TEST 1: FAIL")
    print(f"   ERROR: Deber√≠an haber {prestamos_unicos_raw:,} filas pero hay {filas_processed:,}")

TEST 1: ELIMINACI√ìN DE DUPLICADOS EN LOANS

üìä LOANS - Comparaci√≥n:
   Raw (con duplicados):        202,294 filas
   Processed (sin duplicados):  9,396 filas
   Pr√©stamos √∫nicos en raw:     29,222
   Pr√©stamos √∫nicos en processed: 9,396

üîç VALIDACI√ìN:
   ¬øUna fila por pr√©stamo? False
   Esperado: 29,222 filas
   Obtenido: 9,396 filas

‚ùå TEST 1: FAIL
   ERROR: Deber√≠an haber 29,222 filas pero hay 9,396


In [4]:
print("\n" + "="*80)
print("üîç INVESTIGANDO LA DIFERENCIA")
print("="*80)

# Ver qu√© pr√©stamos est√°n en raw pero NO en processed
loans_raw_unique = set(loans_raw['loan_id'].unique())
loans_processed_unique = set(loans_processed['loan_id'].unique())

prestamos_filtrados = loans_raw_unique - loans_processed_unique

print(f"\nPr√©stamos en raw:       {len(loans_raw_unique):,}")
print(f"Pr√©stamos en processed: {len(loans_processed_unique):,}")
print(f"Diferencia (filtrados): {len(prestamos_filtrados):,}")

# ¬øPOR QU√â se filtraron? Ver sus fechas de desembolso
loans_raw['disbursed_date'] = pd.to_datetime(loans_raw['disbursed_date'])
loans_raw['vintage_month'] = loans_raw['disbursed_date'].dt.to_period('M')

print(f"\nüìÖ DISTRIBUCI√ìN DE VINTAGES EN RAW:")
print(loans_raw.groupby('vintage_month')['loan_id'].nunique().sort_index())

print(f"\nüí° HIP√ìTESIS:")
print(f"   dbt filtr√≥ por Q1 2025 (Enero, Febrero, Marzo)")
print(f"   Los {len(prestamos_filtrados):,} pr√©stamos filtrados son de otros meses")

# Verificar hip√≥tesis
q1_2025 = loans_raw[loans_raw['vintage_month'].isin([
    pd.Period('2025-01', 'M'),
    pd.Period('2025-02', 'M'),
    pd.Period('2025-03', 'M')
])]

q1_loans_unicos = q1_2025['loan_id'].nunique()

print(f"\n‚úÖ VERIFICACI√ìN:")
print(f"   Pr√©stamos √∫nicos en Q1 2025 (raw): {q1_loans_unicos:,}")
print(f"   Pr√©stamos en processed:            {len(loans_processed):,}")
print(f"   ¬øCoinciden? {q1_loans_unicos == len(loans_processed)}")


üîç INVESTIGANDO LA DIFERENCIA

Pr√©stamos en raw:       29,222
Pr√©stamos en processed: 9,396
Diferencia (filtrados): 19,826

üìÖ DISTRIBUCI√ìN DE VINTAGES EN RAW:
vintage_month
2025-01    2140
2025-02    3694
2025-03    3562
2025-04    2699
2025-05    2786
2025-06    2277
2025-07    2294
2025-08    2087
2025-09    1800
2025-10    1978
2025-11    2155
2025-12    1750
Freq: M, Name: loan_id, dtype: int64

üí° HIP√ìTESIS:
   dbt filtr√≥ por Q1 2025 (Enero, Febrero, Marzo)
   Los 19,826 pr√©stamos filtrados son de otros meses

‚úÖ VERIFICACI√ìN:
   Pr√©stamos √∫nicos en Q1 2025 (raw): 9,396
   Pr√©stamos en processed:            9,396
   ¬øCoinciden? True


In [5]:
print("\n" + "="*80)
print("‚úÖ TEST 1: PASS (ACTUALIZADO)")
print("="*80)
print(f"""
dbt correctamente:
1. Elimin√≥ {len(loans_raw) - loans_raw['loan_id'].nunique():,} snapshots duplicados
2. Filtr√≥ SOLO Q1 2025 seg√∫n el requerimiento del challenge
3. Resultado: {len(loans_processed):,} pr√©stamos √∫nicos de Q1 2025
""")


‚úÖ TEST 1: PASS (ACTUALIZADO)

dbt correctamente:
1. Elimin√≥ 173,072 snapshots duplicados
2. Filtr√≥ SOLO Q1 2025 seg√∫n el requerimiento del challenge
3. Resultado: 9,396 pr√©stamos √∫nicos de Q1 2025



In [6]:
print("\n" + "="*80)
print("TEST 2: C√ÅLCULO DE REVENUE")
print("="*80)

# Cargar repayments
repayments_raw = pd.read_csv(raw_path / 'AE_challenge_repayments.csv')

# Calcular revenue manualmente desde raw
repayments_raw['revenue_manual'] = (
    repayments_raw['interestamount_trans'] + 
    repayments_raw['feesamount_trans'] + 
    repayments_raw['penaltyamount_trans'] + 
    repayments_raw['taxoninterestamount_trans'] +
    repayments_raw['taxonfeesamount_trans'] +
    repayments_raw['taxonpenaltyamount_trans']
)

revenue_raw_total = repayments_raw['revenue_manual'].sum()
revenue_processed_total = loans_processed['revenue_total'].sum()

print(f"\nüí∞ REVENUE - Comparaci√≥n:")
print(f"   Revenue calculado desde raw:     ${revenue_raw_total:,.2f}")
print(f"   Revenue en tabla processed:      ${revenue_processed_total:,.2f}")
print(f"   Diferencia:                      ${abs(revenue_raw_total - revenue_processed_total):,.2f}")

# Validaci√≥n (permitimos 0.1% de diferencia por redondeos)
diff_pct = abs(revenue_raw_total - revenue_processed_total) / revenue_raw_total * 100

print(f"\nüîç VALIDACI√ìN:")
print(f"   Diferencia porcentual: {diff_pct:.4f}%")
print(f"   Margen aceptable: < 0.1%")

test2_pass = (diff_pct < 0.1)

if test2_pass:
    print(f"\n‚úÖ TEST 2: PASS")
    print(f"   Los c√°lculos de revenue son correctos")
else:
    print(f"\n‚ùå TEST 2: FAIL")
    print(f"   ERROR: Diferencia de {diff_pct:.4f}% excede el margen aceptable")


TEST 2: C√ÅLCULO DE REVENUE

üí∞ REVENUE - Comparaci√≥n:
   Revenue calculado desde raw:     $5,511,756.15
   Revenue en tabla processed:      $1,793,677.18
   Diferencia:                      $3,718,078.97

üîç VALIDACI√ìN:
   Diferencia porcentual: 67.4572%
   Margen aceptable: < 0.1%

‚ùå TEST 2: FAIL
   ERROR: Diferencia de 67.4572% excede el margen aceptable


In [7]:
print("\n" + "="*80)
print("üîç INVESTIGANDO LA DIFERENCIA DE REVENUE")
print("="*80)

# Hip√≥tesis: repayments_raw tiene pagos de TODOS los meses, 
# pero processed solo tiene pr√©stamos de Q1 2025

# 1. ¬øCu√°ntos loan_ids est√°n en repayments vs processed?
repayments_loan_ids = set(repayments_raw['loan_id'].unique())
processed_loan_ids = set(loans_processed['loan_id'].unique())

print(f"\nüìä LOAN IDS:")
print(f"   Pr√©stamos con pagos en raw:  {len(repayments_loan_ids):,}")
print(f"   Pr√©stamos en processed:      {len(processed_loan_ids):,}")
print(f"   Pr√©stamos de processed CON pagos: {len(processed_loan_ids & repayments_loan_ids):,}")

# 2. Filtrar repayments SOLO para pr√©stamos de Q1 2025
repayments_q1 = repayments_raw[repayments_raw['loan_id'].isin(processed_loan_ids)]

revenue_q1_only = repayments_q1['revenue_manual'].sum()

print(f"\nüí∞ REVENUE - Comparaci√≥n corregida:")
print(f"   Revenue de TODOS los pr√©stamos (raw):     ${revenue_raw_total:,.2f}")
print(f"   Revenue de SOLO pr√©stamos Q1 2025:        ${revenue_q1_only:,.2f}")
print(f"   Revenue en tabla processed:               ${revenue_processed_total:,.2f}")
print(f"   Diferencia:                               ${abs(revenue_q1_only - revenue_processed_total):,.2f}")

# Validaci√≥n
diff_pct_corrected = abs(revenue_q1_only - revenue_processed_total) / revenue_q1_only * 100

print(f"\nüîç VALIDACI√ìN CORREGIDA:")
print(f"   Diferencia porcentual: {diff_pct_corrected:.4f}%")
print(f"   Margen aceptable: < 0.1%")

test2_pass = (diff_pct_corrected < 0.1)

if test2_pass:
    print(f"\n‚úÖ TEST 2: PASS")
    print(f"   Los c√°lculos de revenue son correctos para pr√©stamos Q1 2025")
else:
    print(f"\n‚ö†Ô∏è  TEST 2: FAIL")
    print(f"   Diferencia: {diff_pct_corrected:.4f}%")
    # Ver si hay diferencia por redondeos o por alg√∫n filtro adicional
    print(f"\n   Diferencia absoluta: ${abs(revenue_q1_only - revenue_processed_total):,.2f}")
    print(f"   Como porcentaje del revenue: {abs(revenue_q1_only - revenue_processed_total)/revenue_q1_only*100:.4f}%")


üîç INVESTIGANDO LA DIFERENCIA DE REVENUE

üìä LOAN IDS:
   Pr√©stamos con pagos en raw:  26,497
   Pr√©stamos en processed:      9,396
   Pr√©stamos de processed CON pagos: 8,419

üí∞ REVENUE - Comparaci√≥n corregida:
   Revenue de TODOS los pr√©stamos (raw):     $5,511,756.15
   Revenue de SOLO pr√©stamos Q1 2025:        $1,793,677.18
   Revenue en tabla processed:               $1,793,677.18
   Diferencia:                               $0.00

üîç VALIDACI√ìN CORREGIDA:
   Diferencia porcentual: 0.0000%
   Margen aceptable: < 0.1%

‚úÖ TEST 2: PASS
   Los c√°lculos de revenue son correctos para pr√©stamos Q1 2025


In [8]:
print("\n" + "="*80)
print("TEST 3: PR√âSTAMOS SIN PAGOS")
print("="*80)

# ¬øCu√°ntos pr√©stamos de Q1 2025 NO tienen pagos?
loans_with_payments = set(repayments_q1['loan_id'].unique())
loans_without_payments = loans_processed[~loans_processed['loan_id'].isin(loans_with_payments)]

print(f"\nüìä DISTRIBUCI√ìN:")
print(f"   Total pr√©stamos Q1 2025:       {len(loans_processed):,}")
print(f"   Pr√©stamos CON pagos:           {len(loans_processed) - len(loans_without_payments):,} ({(len(loans_processed) - len(loans_without_payments))/len(loans_processed)*100:.1f}%)")
print(f"   Pr√©stamos SIN pagos:           {len(loans_without_payments):,} ({len(loans_without_payments)/len(loans_processed)*100:.1f}%)")

# Verificar que pr√©stamos sin pagos tengan revenue = 0
print(f"\nüí∞ REVENUE de pr√©stamos SIN pagos:")
print(f"   - M√≠nimo:  ${loans_without_payments['revenue_total'].min():.2f}")
print(f"   - M√°ximo:  ${loans_without_payments['revenue_total'].max():.2f}")
print(f"   - Total:   ${loans_without_payments['revenue_total'].sum():.2f}")

# Verificar que pr√©stamos CON pagos tengan revenue > 0
loans_with_payments_df = loans_processed[loans_processed['loan_id'].isin(loans_with_payments)]

print(f"\nüí∞ REVENUE de pr√©stamos CON pagos:")
print(f"   - M√≠nimo:  ${loans_with_payments_df['revenue_total'].min():.2f}")
print(f"   - Promedio: ${loans_with_payments_df['revenue_total'].mean():.2f}")
print(f"   - Total:   ${loans_with_payments_df['revenue_total'].sum():.2f}")

# Validaci√≥n
revenue_sin_pagos_es_cero = (loans_without_payments['revenue_total'].sum() == 0)
revenue_con_pagos_positivo = (loans_with_payments_df['revenue_total'].sum() > 0)

test3_pass = revenue_sin_pagos_es_cero and revenue_con_pagos_positivo

print(f"\nüîç VALIDACI√ìN:")
print(f"   ¬øRevenue de pr√©stamos sin pagos = 0? {revenue_sin_pagos_es_cero}")
print(f"   ¬øRevenue de pr√©stamos con pagos > 0? {revenue_con_pagos_positivo}")

if test3_pass:
    print(f"\n‚úÖ TEST 3: PASS")
    print(f"   Los pr√©stamos sin pagos est√°n correctamente manejados")
else:
    print(f"\n‚ùå TEST 3: FAIL")
    if not revenue_sin_pagos_es_cero:
        print(f"   ERROR: Pr√©stamos sin pagos tienen revenue > 0")
    if not revenue_con_pagos_positivo:
        print(f"   ERROR: Pr√©stamos con pagos tienen revenue = 0")


TEST 3: PR√âSTAMOS SIN PAGOS

üìä DISTRIBUCI√ìN:
   Total pr√©stamos Q1 2025:       9,396
   Pr√©stamos CON pagos:           8,419 (89.6%)
   Pr√©stamos SIN pagos:           977 (10.4%)

üí∞ REVENUE de pr√©stamos SIN pagos:
   - M√≠nimo:  $0.00
   - M√°ximo:  $0.00
   - Total:   $0.00

üí∞ REVENUE de pr√©stamos CON pagos:
   - M√≠nimo:  $0.00
   - Promedio: $213.05
   - Total:   $1793677.18

üîç VALIDACI√ìN:
   ¬øRevenue de pr√©stamos sin pagos = 0? True
   ¬øRevenue de pr√©stamos con pagos > 0? True

‚úÖ TEST 3: PASS
   Los pr√©stamos sin pagos est√°n correctamente manejados


In [9]:
print("\n" + "="*80)
print("TEST 4: COHORTS SUMAN AL TOTAL")
print("="*80)

# Cargar tabla de cohorts/P&L
pnl = pd.read_csv(exports_path / 'fct_portfolio_pnl.csv')

print(f"\nüìä TABLA P&L:")
print(f"   Total filas (cohorts): {len(pnl)}")
print(f"\n   Cohorts disponibles:")
display(pnl[['vintage_month', 'risk_segment', 'total_loans', 'total_revenue']])

# VALIDACI√ìN 1: Sumar loans por cohort
cohort_total_loans = pnl['total_loans'].sum()
direct_total_loans = len(loans_processed)

print(f"\nüí° LOANS - Suma por cohorts vs directo:")
print(f"   Suma de cohorts (P&L):              {cohort_total_loans:,}")
print(f"   Total directo (fct_loan_financials): {direct_total_loans:,}")
print(f"   Diferencia:                         {abs(cohort_total_loans - direct_total_loans):,}")

test4a_pass = (cohort_total_loans == direct_total_loans)

# VALIDACI√ìN 2: Sumar revenue por cohort
cohort_total_revenue = pnl['total_revenue'].sum()
direct_total_revenue = loans_processed['revenue_total'].sum()

print(f"\nüí∞ REVENUE - Suma por cohorts vs directo:")
print(f"   Suma de cohorts (P&L):              ${cohort_total_revenue:,.2f}")
print(f"   Total directo (fct_loan_financials): ${direct_total_revenue:,.2f}")
print(f"   Diferencia:                         ${abs(cohort_total_revenue - direct_total_revenue):,.2f}")

# Permitir diferencia de $1 por redondeos
test4b_pass = (abs(cohort_total_revenue - direct_total_revenue) < 1.0)

print(f"\nüîç VALIDACI√ìN:")
print(f"   ¬øLoans suman correctamente? {test4a_pass}")
print(f"   ¬øRevenue suma correctamente? {test4b_pass}")

if test4a_pass and test4b_pass:
    print(f"\n‚úÖ TEST 4: PASS")
    print(f"   Los cohorts agregan correctamente al total del portfolio")
else:
    print(f"\n‚ùå TEST 4: FAIL")
    if not test4a_pass:
        print(f"   ERROR: La suma de loans por cohort no coincide")
    if not test4b_pass:
        print(f"   ERROR: La suma de revenue por cohort no coincide")


TEST 4: COHORTS SUMAN AL TOTAL

üìä TABLA P&L:
   Total filas (cohorts): 14

   Cohorts disponibles:


Unnamed: 0,vintage_month,risk_segment,total_loans,total_revenue
0,2025-01,High Risk,1457,258500.46
1,2025-01,Low Risk,103,8808.08
2,2025-01,Medium Risk,571,107121.92
3,2025-01,Other,1,63.63
4,2025-01,Unknown,8,1832.51
5,2025-02,High Risk,2368,496462.91
6,2025-02,Low Risk,224,17472.51
7,2025-02,Medium Risk,1080,231028.51
8,2025-02,Other,4,813.32
9,2025-02,Unknown,18,4404.94



üí° LOANS - Suma por cohorts vs directo:
   Suma de cohorts (P&L):              9,396
   Total directo (fct_loan_financials): 9,396
   Diferencia:                         0

üí∞ REVENUE - Suma por cohorts vs directo:
   Suma de cohorts (P&L):              $1,793,677.18
   Total directo (fct_loan_financials): $1,793,677.18
   Diferencia:                         $0.00

üîç VALIDACI√ìN:
   ¬øLoans suman correctamente? True
   ¬øRevenue suma correctamente? True

‚úÖ TEST 4: PASS
   Los cohorts agregan correctamente al total del portfolio


In [10]:
# analisis de cohorts

print("\n" + "="*80)
print("üìä AN√ÅLISIS DE COHORTS")
print("="*80)

# Por vintage (mes)
print("\n1Ô∏è‚É£ PERFORMANCE POR MES:")
por_mes = pnl.groupby('vintage_month').agg({
    'total_loans': 'sum',
    'total_revenue': 'sum'
}).round(2)
por_mes['revenue_per_loan'] = (por_mes['total_revenue'] / por_mes['total_loans']).round(2)
print(por_mes)

# Por risk segment
print("\n2Ô∏è‚É£ PERFORMANCE POR SEGMENTO DE RIESGO:")
por_riesgo = pnl.groupby('risk_segment').agg({
    'total_loans': 'sum',
    'total_revenue': 'sum'
}).round(2)
por_riesgo['revenue_per_loan'] = (por_riesgo['total_revenue'] / por_riesgo['total_loans']).round(2)
print(por_riesgo)

# Top 5 cohorts por revenue
print("\n3Ô∏è‚É£ TOP 5 COHORTS POR REVENUE:")
top5 = pnl.nlargest(5, 'total_revenue')[['vintage_month', 'risk_segment', 'total_loans', 'total_revenue']]
print(top5)

print("\nüí° INSIGHTS:")
print(f"""
- Mes con m√°s pr√©stamos: {por_mes['total_loans'].idxmax()}
- Mes con m√°s revenue: {por_mes['total_revenue'].idxmax()}
- Segmento con m√°s pr√©stamos: {por_riesgo['total_loans'].idxmax()}
- Segmento con m√°s revenue: {por_riesgo['total_revenue'].idxmax()}
""")


üìä AN√ÅLISIS DE COHORTS

1Ô∏è‚É£ PERFORMANCE POR MES:
               total_loans  total_revenue  revenue_per_loan
vintage_month                                              
2025-01               2140      376326.60            175.85
2025-02               3694      750182.19            203.08
2025-03               3562      667168.39            187.30

2Ô∏è‚É£ PERFORMANCE POR SEGMENTO DE RIESGO:
              total_loans  total_revenue  revenue_per_loan
risk_segment                                              
High Risk            5991     1185248.85            197.84
Low Risk              567       51653.60             91.10
Medium Risk          2785      546580.39            196.26
Other                   5         876.95            175.39
Unknown                48        9317.39            194.11

3Ô∏è‚É£ TOP 5 COHORTS POR REVENUE:
   vintage_month risk_segment  total_loans  total_revenue
5        2025-02    High Risk         2368      496462.91
10       2025-03    High Risk    

In [11]:
print("\n" + "="*80)
print("TEST 5: FILTRO Q1 2025")
print("="*80)

# Verificar que SOLO haya pr√©stamos de Q1 2025 (Enero, Febrero, Marzo)
loans_processed['disbursed_date'] = pd.to_datetime(loans_processed['disbursed_date'])
loans_processed['vintage_month'] = loans_processed['disbursed_date'].dt.to_period('M')

print(f"\nüìÖ DISTRIBUCI√ìN DE VINTAGES EN PROCESSED:")
vintage_counts = loans_processed['vintage_month'].value_counts().sort_index()
print(vintage_counts)

# Definir vintages v√°lidos para Q1 2025
valid_vintages = [
    pd.Period('2025-01', 'M'),
    pd.Period('2025-02', 'M'),
    pd.Period('2025-03', 'M')
]

# Verificar que todos los pr√©stamos sean de Q1
all_in_q1 = loans_processed['vintage_month'].isin(valid_vintages).all()

# Contar pr√©stamos por mes
print(f"\nüìä DESGLOSE:")
for vintage in valid_vintages:
    count = (loans_processed['vintage_month'] == vintage).sum()
    pct = count / len(loans_processed) * 100
    print(f"   {vintage}: {count:>6,} pr√©stamos ({pct:>5.1f}%)")

# Buscar pr√©stamos FUERA de Q1 (si existen)
prestamos_fuera_q1 = loans_processed[~loans_processed['vintage_month'].isin(valid_vintages)]

print(f"\nüîç VALIDACI√ìN:")
print(f"   Total pr√©stamos:              {len(loans_processed):,}")
print(f"   Pr√©stamos en Q1 2025:         {loans_processed['vintage_month'].isin(valid_vintages).sum():,}")
print(f"   Pr√©stamos FUERA de Q1:        {len(prestamos_fuera_q1):,}")
print(f"   ¬øTodos est√°n en Q1? {all_in_q1}")

if all_in_q1:
    print(f"\n‚úÖ TEST 5: PASS")
    print(f"   El filtro de Q1 2025 se aplic√≥ correctamente")
    print(f"   Solo hay pr√©stamos de Enero, Febrero y Marzo 2025")
else:
    print(f"\n‚ùå TEST 5: FAIL")
    print(f"   ERROR: Hay {len(prestamos_fuera_q1):,} pr√©stamos fuera de Q1 2025")
    print(f"\n   Vintages inv√°lidos encontrados:")
    print(prestamos_fuera_q1['vintage_month'].value_counts().sort_index())


TEST 5: FILTRO Q1 2025

üìÖ DISTRIBUCI√ìN DE VINTAGES EN PROCESSED:
vintage_month
2025-01    2140
2025-02    3694
2025-03    3562
Freq: M, Name: count, dtype: int64

üìä DESGLOSE:
   2025-01:  2,140 pr√©stamos ( 22.8%)
   2025-02:  3,694 pr√©stamos ( 39.3%)
   2025-03:  3,562 pr√©stamos ( 37.9%)

üîç VALIDACI√ìN:
   Total pr√©stamos:              9,396
   Pr√©stamos en Q1 2025:         9,396
   Pr√©stamos FUERA de Q1:        0
   ¬øTodos est√°n en Q1? True

‚úÖ TEST 5: PASS
   El filtro de Q1 2025 se aplic√≥ correctamente
   Solo hay pr√©stamos de Enero, Febrero y Marzo 2025


In [12]:
print("\n" + "="*80)
print("üìã RESUMEN FINAL DE VALIDACIONES")
print("="*80)

all_tests = [
    ("TEST 1: Eliminaci√≥n de duplicados + Filtro Q1", "‚úÖ PASS", "De 202K filas a 9,396 pr√©stamos √∫nicos de Q1 2025"),
    ("TEST 2: C√°lculo de revenue", "‚úÖ PASS", "$1,793,677.18 calculado correctamente"),
    ("TEST 3: Pr√©stamos sin pagos", "‚úÖ PASS", "977 pr√©stamos sin pagos con revenue = $0"),
    ("TEST 4: Cohorts suman al total", "‚úÖ PASS", "14 cohorts agregan correctamente"),
    ("TEST 5: Filtro Q1 2025", "‚úÖ PASS", "Solo Enero, Febrero, Marzo 2025")
]

print()
for i, (test_name, status, detail) in enumerate(all_tests, 1):
    print(f"{status}  TEST {i}: {test_name}")
    print(f"         {detail}")
    print()

print("="*80)
print("üéâ ¬°TODAS LAS TRANSFORMACIONES SON CORRECTAS!")
print("="*80)
print("""
‚úÖ 100% en los datos procesados
‚úÖ dbt hizo todas las transformaciones correctamente
‚úÖ todo listo para hacer el an√°lisis y presentaci√≥n

PR√ìXIMOS PASOS:
1. An√°lisis exploratorio en analisis.ipynb
2. Crear visualizaciones (Tableau)
3. Escribir recomendaci√≥n ejecutiva
4. Preparar presentaci√≥n para CEO/CFO
""")


üìã RESUMEN FINAL DE VALIDACIONES

‚úÖ PASS  TEST 1: TEST 1: Eliminaci√≥n de duplicados + Filtro Q1
         De 202K filas a 9,396 pr√©stamos √∫nicos de Q1 2025

‚úÖ PASS  TEST 2: TEST 2: C√°lculo de revenue
         $1,793,677.18 calculado correctamente

‚úÖ PASS  TEST 3: TEST 3: Pr√©stamos sin pagos
         977 pr√©stamos sin pagos con revenue = $0

‚úÖ PASS  TEST 4: TEST 4: Cohorts suman al total
         14 cohorts agregan correctamente

‚úÖ PASS  TEST 5: TEST 5: Filtro Q1 2025
         Solo Enero, Febrero, Marzo 2025

üéâ ¬°TODAS LAS TRANSFORMACIONES SON CORRECTAS!

‚úÖ 100% en los datos procesados
‚úÖ dbt hizo todas las transformaciones correctamente
‚úÖ todo listo para hacer el an√°lisis y presentaci√≥n

PR√ìXIMOS PASOS:
1. An√°lisis exploratorio en analisis.ipynb
2. Crear visualizaciones (Tableau)
3. Escribir recomendaci√≥n ejecutiva
4. Preparar presentaci√≥n para CEO/CFO

