### Implement Data Quality with ISO 8000 Framework
**Description**: Understand the key elements of ISO 8000 and how to apply them to ensure data quality.

**Conceptual**:
1. Data Governance: Set up policies and procedures to ensure high data quality.
2. Data Profiling: Use profiling tools to understand and monitor data quality.
3. Data Standards: Establish data standards that align with ISO 8000, ensuring consistency.

**NOTE**: Assuming data is profiled using pandas_profiling

In [1]:
# Write your code from here

import pandas as pd
from pandas_profiling import ProfileReport

# Sample data simulating customer info
data = {
    "CustomerID": [1, 2, 3, 4, None, 6],
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve", "Frank"],
    "Email": ["alice@example.com", "bob@example.com", None, "david@", "eve@example.com", "frank@example.com"],
    "Age": [25, 30, 22, 35, None, 28]
}
df = pd.DataFrame(data)

# ----------- Data Governance -----------
def enforce_governance(df, required_columns):
    """
    Check that all required columns exist and contain no nulls.
    """
    missing_cols = [col for col in required_columns if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")
    for col in required_columns:
        if df[col].isnull().any():
            print(f"Warning: Column '{col}' contains null values.")
    print("Governance checks passed or warnings issued.")

required_columns = ["CustomerID", "Name", "Email", "Age"]
enforce_governance(df, required_columns)

# ----------- Data Profiling -----------
print("Generating data profiling report...")
profile = ProfileReport(df, title="Data Quality Profiling Report", explorative=True)
profile.to_file("data_quality_report.html")
print("Profiling report saved as 'data_quality_report.html'")

# ----------- Data Standards -----------
def check_data_standards(df):
    """
    Check basic data standards aligned with ISO 8000:
    - CustomerID: Unique and non-null
    - Email: Valid email format (basic check for '@')
    - Age: Must be positive integers
    """
    # CustomerID unique and not null
    if df["CustomerID"].isnull().any():
        print("Standard violation: CustomerID contains nulls.")
    if df["CustomerID"].duplicated().any():
        print("Standard violation: CustomerID contains duplicates.")
    
    # Email validity
    invalid_emails = df[~df["Email"].str.contains("@", na=False)]
    if not invalid_emails.empty:
        print(f"Standard violation: Invalid emails found:\n{invalid_emails[['CustomerID', 'Email']]}")
    
    # Age validity
    invalid_age = df[(df["Age"].notnull()) & (df["Age"] <= 0)]
    if not invalid_age.empty:
        print(f"Standard violation: Invalid ages found:\n{invalid_age[['CustomerID', 'Age']]}")
    
    print("Data standards validation completed.")

check_data_standards(df)

ModuleNotFoundError: No module named 'pandas_profiling'