### Task 1: Validate Data with a Custom Expectation in Great Expectations
**Description**: Create a custom expectation and validate data with Great Expectations.

**Load a sample DataFrame**

data = {
'age': [25, 30, 35, 40, 45],
'income': [50000, 60000, 75000, None, 100000]
}

In [None]:
# Write your code from here
import great_expectations as ge
import pandas as pd

# Sample data
data = {
    'age': [25, 30, 35, 40, 45],
    'income': [50000, 60000, 75000, None, 100000]
}

# Load into GE-enabled DataFrame
df = ge.from_pandas(pd.DataFrame(data))

# Validate: All ages must be between 18 and 99
age_check = df.expect_column_values_to_be_between("age", min_value=18, max_value=99)

# Custom: income should not be null
income_check = df.expect_column_values_to_not_be_null("income")

# Print results
print("Age Expectation Result:", age_check["success"])
print("Income Not Null Result:", income_check["success"])


### Task 2: Implement a Basic Alert System for Data Quality Drops
**Description**: Set up a basic alert system that triggers when data quality drops.

In [None]:
# Write your code from here
def check_data_quality(dqi, threshold=90):
    if dqi < threshold:
        print(f"ALERT: Data Quality dropped below threshold! DQI = {dqi}%")
        return True
    else:
        print(f"Data Quality is acceptable. DQI = {dqi}%")
        return False

# Example usage
check_data_quality(87)  # Triggers alert
check_data_quality(95)  # Does not trigger alert


### Task 3: Real-time Data Quality Monitoring with Python and Great Expectations
**Description**: Implement a system that monitors data quality in real-time.

In [None]:
# Write your code from here
import pandas as pd
import great_expectations as ge
import time

def simulate_streaming_data():
    # Simulated batches of real-time data
    return [
        {'id': 1, 'age': 25, 'income': 50000},
        {'id': 2, 'age': 30, 'income': None},       # Income missing
        {'id': 3, 'age': -5, 'income': 60000},      # Invalid age
        {'id': 4, 'age': 45, 'income': 70000}
    ]

def validate_record(record):
    df = pd.DataFrame([record])
    gdf = ge.from_pandas(df)

    results = gdf.expect_column_values_to_not_be_null('income')
    if not results.success:
        return False, "Missing income"

    results = gdf.expect_column_values_to_be_between('age', min_value=0, max_value=100)
    if not results.success:
        return False, "Invalid age range"

    return True, "Record is valid"

def monitor():
    for record in simulate_streaming_data():
        is_valid, message = validate_record(record)
        print(f"Record: {record} → Valid: {is_valid} → Message: {message}")
        time.sleep(1)

monitor()
