## Using AI for Anomalies Detection in Data Quality
**Description**: Implement an AI-based approach to detect anomalies in data quality.

**Steps**:
1. Use an Anomaly Detection Algorithm:
    - Use sklearn's Isolation Forest for anomaly detection.

**Example data:**

data = np.array([[25, 50000], [30, 60000], [35, 75000], [40, None], [45, 100000]])

2. Integrate with Great Expectations:
    - Generate alerts if anomalies are detected:

In [None]:
# Write your code from here

In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
import great_expectations as ge

# Example data (age, income), replace None with np.nan
data = np.array([[25, 50000], [30, 60000], [35, 75000], [40, None], [45, 100000]], dtype=object)

# Convert to DataFrame and handle missing values (fill or drop)
df = pd.DataFrame(data, columns=["age", "income"])

# Replace None with np.nan for sklearn compatibility
df['income'] = pd.to_numeric(df['income'], errors='coerce')

# Fill missing income with median or a placeholder value (e.g., median)
median_income = df['income'].median()
df['income'] = df['income'].fillna(median_income)

# === Step 2: Fit Isolation Forest ===

# Initialize Isolation Forest
iso_forest = IsolationForest(contamination=0.2, random_state=42)

# Fit model
iso_forest.fit(df)

# Predict anomalies (-1 means anomaly, 1 means normal)
df['anomaly'] = iso_forest.predict(df)

# === Step 3: Integrate with Great Expectations (optional) ===

# Wrap in GE dataframe
ge_df = ge.from_pandas(df)

# Basic check - income should be >= 0
validation_result = ge_df.expect_column_values_to_be_between("income", min_value=0)

# === Step 4: Alert if anomalies detected ===

def alert_anomalies(df):
    anomalies = df[df['anomaly'] == -1]
    if len(anomalies) > 0:
        print(f"ALERT: {len(anomalies)} anomalies detected in data quality!")
        print(anomalies)
    else:
        print("No anomalies detected. Data quality is good.")

def alert_ge(validation_result):
    if not validation_result["success"]:
        print("ALERT: Data validation failed!")
    else:
        print("Data validation passed.")

# Run alerts
alert_anomalies(df)
alert_ge(validation_result)


AttributeError: module 'great_expectations' has no attribute 'from_pandas'