## Validaciones RAW

In [18]:
import json
import great_expectations as gx
from great_expectations.core.expectation_suite import ExpectationSuite
import pandas as pd

# Ruta al archivo JSON de expectations
FILE = "raw/2025-07-12_20-22-34_articulos_espaciales_test.csv"
SUITE_PATH = "expectations/raw_articles.json"

df = pd.read_csv(FILE, delimiter=";")

# Cargar el JSON de expectations

with open(SUITE_PATH, 'r') as f:
    suite_dict = json.load(f)

# Convertir el dict a un objeto suite de GX
suite = ExpectationSuite(**suite_dict)
# Crear un Validator de GX (con contexto de DataFrame)
gx_df = gx.from_pandas(df)

# Validar el DataFrame con las expectativas
results = gx_df.validate(expectation_suite=suite)

# Imprimir los resultados de la validación
for r in results['results']:
    print("Expectativa:", r["expectation_config"])
    print("Parámetros:", r.get("kwargs"))
    print("¿Cumple?:", "Sí" if r["success"] else "No")
    print("Detalles:", r["result"])
    print("-" * 50)

Expectativa: {
  "expectation_type": "expect_column_values_to_not_be_null",
  "kwargs": {
    "column": "id",
    "mostly": 1.0
  },
  "meta": {
    "notes": "El campo ID no debe contener valores nulos",
    "stop_pipeline_on_failure": true
  }
}
Parámetros: None
¿Cumple?: Sí
Detalles: {'element_count': 500, 'unexpected_count': 0, 'unexpected_percent': 0.0, 'unexpected_percent_total': 0.0, 'partial_unexpected_list': []}
--------------------------------------------------
Expectativa: {
  "expectation_type": "expect_column_values_to_be_unique",
  "kwargs": {
    "column": "id"
  },
  "meta": {
    "notes": "El campo ID debe contener valores \u00c3\u00banicos",
    "stop_pipeline_on_failure": true
  }
}
Parámetros: None
¿Cumple?: Sí
Detalles: {'element_count': 500, 'missing_count': 0, 'missing_percent': 0.0, 'unexpected_count': 0, 'unexpected_percent': 0.0, 'unexpected_percent_total': 0.0, 'unexpected_percent_nonmissing': 0.0, 'partial_unexpected_list': []}
-------------------------------

In [24]:
import json
import great_expectations as gx
from great_expectations.core.expectation_suite import ExpectationSuite
import pandas as pd


def validate_csv_with_expectations(file, suite_path):
    df = pd.read_csv(file, delimiter=";")

    # Cargar el JSON de expectations

    with open(suite_path, 'r') as f:
        suite_dict = json.load(f)

    # Convertir el dict a un objeto suite de GX
    suite = ExpectationSuite(**suite_dict)
    # Crear un Validator de GX (con contexto de DataFrame)
    gx_df = gx.from_pandas(df)

    # Validar el DataFrame con las expectativas
    results = gx_df.validate(expectation_suite=suite)
    # Imprimir los resultados de la validación
    for r in results['results']:
        print("Expectativa:", r["expectation_config"])
        print("Parámetros:", r.get("kwargs"))
        print("¿Cumple?:", "Sí" if r["success"] else "No")
        print("Detalles:", r["result"])
        if not r["success"]:
            raise ValueError(f"Validation failed for expectation: {r['expectation_config']}")
        print("-" * 50)

In [26]:
FILE2 = "staging/2025-07-12_20-39-59_articulos_espaciales_test.csv"
SUITE_PATH2 = "expectations/staging_articles.json"


In [28]:
validate_csv_with_expectations(FILE2, SUITE_PATH2)

Expectativa: {
  "expectation_type": "expect_column_values_to_not_be_null",
  "kwargs": {
    "column": "id"
  },
  "meta": {}
}
Parámetros: None
¿Cumple?: Sí
Detalles: {'element_count': 500, 'unexpected_count': 0, 'unexpected_percent': 0.0, 'unexpected_percent_total': 0.0, 'partial_unexpected_list': []}
--------------------------------------------------
Expectativa: {
  "expectation_type": "expect_column_values_to_be_unique",
  "kwargs": {
    "column": "id"
  },
  "meta": {}
}
Parámetros: None
¿Cumple?: Sí
Detalles: {'element_count': 500, 'missing_count': 0, 'missing_percent': 0.0, 'unexpected_count': 0, 'unexpected_percent': 0.0, 'unexpected_percent_total': 0.0, 'unexpected_percent_nonmissing': 0.0, 'partial_unexpected_list': []}
--------------------------------------------------
Expectativa: {
  "expectation_type": "expect_column_values_to_match_strftime_format",
  "kwargs": {
    "column": "published_at",
    "strftime_format": "%Y/%m/%d"
  },
  "meta": {}
}
Parámetros: None
¿Cump

ValueError: Validation failed for expectation: {
  "expectation_type": "expect_column_values_to_match_strftime_format",
  "kwargs": {
    "column": "published_at",
    "strftime_format": "%Y/%m/%d"
  },
  "meta": {}
}