## Real-World Case Studies

### Finance - Fraud Detection Models:
**Description**: Analyze a financial dataset, define SLAs for data accuracy and
completeness, and ensure high data quality for fraud detection models.

In [1]:
# write your code from here
import pandas as pd

def evaluate_data_quality(df, required_columns, value_validators={}, duplicate_check_cols=None):
    """
    Evaluate data quality against SLAs for fraud detection use case.

    Parameters:
        df (pd.DataFrame): Dataset to evaluate.
        required_columns (list): List of essential columns to check for completeness.
        value_validators (dict): Optional dict of {col: validation_function}.
        duplicate_check_cols (list): Columns to check for duplicates.

    Returns:
        dict: Dictionary of SLA violations and summary metrics.
    """
    results = {}
    total_rows = len(df)

    # Completeness check
    missing_summary = df[required_columns].isnull().mean()
    for col, rate in missing_summary.items():
        results[f"missing_rate_{col}"] = rate
        if rate > 0.01:
            results[f"SLA_violation_missing_{col}"] = True

    # Accuracy check (custom validators per column)
    for col, validator in value_validators.items():
        if col in df.columns:
            invalid_rate = (~df[col].apply(validator)).mean()
            results[f"invalid_rate_{col}"] = invalid_rate
            if invalid_rate > 0.005:
                results[f"SLA_violation_invalid_{col}"] = True

    # Duplicate check
    if duplicate_check_cols:
        dup_count = df.duplicated(subset=duplicate_check_cols).sum()
        results["duplicate_count"] = dup_count
        if dup_count > 0:
            results["SLA_violation_duplicates"] = True

    return results
