## Real-World Case Studies

### Finance - Fraud Detection Models:
**Description**: Analyze a financial dataset, define SLAs for data accuracy and
completeness, and ensure high data quality for fraud detection models.

In [1]:
# write your code from here

import pandas as pd
import numpy as np
import unittest

# -----------------------------
# 1. Simulate Financial Data
# -----------------------------
def generate_financial_data(n=1000, include_errors=False):
    np.random.seed(42)
    data = {
        'transaction_id': range(1, n + 1),
        'amount': np.random.exponential(100, n),
        'transaction_type': np.random.choice(['withdrawal', 'deposit', 'transfer'], size=n),
        'account_age_days': np.random.randint(1, 2000, n),
        'is_fraud': np.random.choice([0, 1], size=n, p=[0.97, 0.03])
    }
    df = pd.DataFrame(data)
    
    # Introduce errors if requested
    if include_errors:
        df.loc[0:20, 'amount'] = -1  # Negative amounts (invalid)
        df.loc[21:25, 'transaction_type'] = 'unknown'  # Invalid type
        df.loc[30:35, 'is_fraud'] = 2  # Invalid fraud labels
        df.loc[40:50, 'account_age_days'] = None  # Missing values
    return df

# -----------------------------
# 2. SLA Checks
# -----------------------------

def check_data_completeness(df, threshold=0.98):
    total = df.size
    non_null = df.count().sum()
    completeness = non_null / total
    print(f"[SLA: Completeness] Non-null ratio: {completeness:.2%}")
    return completeness >= threshold

def check_data_accuracy(df):
    issues = {}

    # Accuracy: amount should be non-negative
    neg_amounts = df[df['amount'] < 0]
    issues['invalid_amount'] = len(neg_amounts)

    # Accuracy: transaction_type should be valid
    valid_types = {'withdrawal', 'deposit', 'transfer'}
    invalid_types = df[~df['transaction_type'].isin(valid_types)]
    issues['invalid_type'] = len(invalid_types)

    # Accuracy: is_fraud should be 0 or 1
    invalid_fraud = df[~df['is_fraud'].isin([0, 1])]
    issues['invalid_fraud_label'] = len(invalid_fraud)

    # Display issues
    for k, v in issues.items():
        print(f"[SLA: Accuracy] {k} = {v} rows")

    return all(v == 0 for v in issues.values())

# -----------------------------
# 3. Run SLA Evaluation
# -----------------------------
if __name__ == "__main__":
    print("=== Running SLA Checks on Financial Transactions ===\n")

    df = generate_financial_data(include_errors=True)

    completeness_pass = check_data_completeness(df)
    accuracy_pass = check_data_accuracy(df)

    print("\n=== SLA Results ===")
    print(f"✅ Completeness SLA Passed? {'Yes' if completeness_pass else 'No'}")
    print(f"✅ Accuracy SLA Passed? {'Yes' if accuracy_pass else 'No'}")

# -----------------------------
# 4. Unit Tests
# -----------------------------
class TestFinancialSLAs(unittest.TestCase):

    def test_completeness_pass(self):
        df = generate_financial_data(include_errors=False)
        self.assertTrue(check_data_completeness(df))

    def test_completeness_fail(self):
        df = generate_financial_data(include_errors=True)
        self.assertFalse(check_data_completeness(df))

    def test_accuracy_pass(self):
        df = generate_financial_data(include_errors=False)
        self.assertTrue(check_data_accuracy(df))

    def test_accuracy_fail(self):
        df = generate_financial_data(include_errors=True)
        self.assertFalse(check_data_accuracy(df))

if __name__ == '__main__':
    print("\n=== Running SLA Unit Tests ===")
    unittest.main(argv=[''], exit=False)

..F.
FAIL: test_completeness_fail (__main__.TestFinancialSLAs)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_75955/3820697904.py", line 88, in test_completeness_fail
    self.assertFalse(check_data_completeness(df))
AssertionError: np.True_ is not false

----------------------------------------------------------------------
Ran 4 tests in 0.010s

FAILED (failures=1)


=== Running SLA Checks on Financial Transactions ===

[SLA: Completeness] Non-null ratio: 99.78%
[SLA: Accuracy] invalid_amount = 21 rows
[SLA: Accuracy] invalid_type = 5 rows
[SLA: Accuracy] invalid_fraud_label = 6 rows

=== SLA Results ===
✅ Completeness SLA Passed? Yes
✅ Accuracy SLA Passed? No

=== Running SLA Unit Tests ===
[SLA: Accuracy] invalid_amount = 21 rows
[SLA: Accuracy] invalid_type = 5 rows
[SLA: Accuracy] invalid_fraud_label = 6 rows
[SLA: Accuracy] invalid_amount = 0 rows
[SLA: Accuracy] invalid_type = 0 rows
[SLA: Accuracy] invalid_fraud_label = 0 rows
[SLA: Completeness] Non-null ratio: 99.78%
[SLA: Completeness] Non-null ratio: 100.00%
