<a href="https://colab.research.google.com/github/poisonkissedsk/Production-Grade-AI-QA-Suite/blob/main/CI_CD_Automated_Testing_Scripts_Simulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 📒 Notebook 6: CI/CD Automated Testing Scripts Simulation (Google Colab)
# Focus: Bias Audit, Robustness Check, Performance Check

# -- SETUP --
!pip install fairlearn scikit-learn pandas numpy --quiet

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from fairlearn.metrics import demographic_parity_difference

# -- FUNCTION: Load and Prepare Data --
def load_preprocess_data():
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
    columns = [
        'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
        'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
        'hours-per-week', 'native-country', 'income'
    ]
    df = pd.read_csv(url, names=columns, na_values=' ?', skipinitialspace=True)
    df.dropna(inplace=True)
    df['label'] = (df['income'] == '>50K').astype(int)

    categorical = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
    numeric = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']

    for col in categorical:
        df[col] = LabelEncoder().fit_transform(df[col])

    X = df[categorical + numeric]
    y = df['label']

    X[numeric] = StandardScaler().fit_transform(X[numeric])
    return train_test_split(X, y, stratify=y, test_size=0.25, random_state=42)

# -- FUNCTION: Train Model --
def train_model(X_train, y_train):
    model = LogisticRegression(max_iter=500)
    model.fit(X_train, y_train)
    return model

# -- CI/CD TEST 1: Performance Check --
def test_model_performance(model, X_test, y_test, threshold=0.80):
    accuracy = accuracy_score(y_test, model.predict(X_test))
    print(f"[PERFORMANCE CHECK] Accuracy: {accuracy:.4f}")
    if accuracy >= threshold:
        print("[PASS] Model meets performance threshold.")
        return True
    else:
        print("[FAIL] Model below acceptable accuracy threshold!")
        return False

# -- CI/CD TEST 2: Bias Audit (Demographic Parity) --
def test_bias_audit(model, X_test, y_test, feature_name='sex', max_dpd=0.2):
    sensitive_feature = X_test[feature_name]
    preds = model.predict(X_test)
    dpd = demographic_parity_difference(y_test, preds, sensitive_features=sensitive_feature)
    print(f"[BIAS AUDIT] Demographic Parity Difference ({feature_name}): {dpd:.4f}")
    if abs(dpd) <= max_dpd:
        print("[PASS] Bias within acceptable limits.")
        return True
    else:
        print("[FAIL] Detected bias above threshold!")
        return False

# -- CI/CD TEST 3: Robustness Check (Perturbation Stability) --
def test_prediction_robustness(model, X_test, numeric_cols, max_flips=10):
    sample = X_test.sample(n=100, random_state=42)
    orig_preds = model.predict(sample)

    perturbed_sample = sample.copy()
    perturbed_sample[numeric_cols] += np.random.normal(0, 0.02, perturbed_sample[numeric_cols].shape)
    perturbed_preds = model.predict(perturbed_sample)

    flips = (orig_preds != perturbed_preds).sum()
    print(f"[ROBUSTNESS TEST] Prediction flips after perturbation: {flips}/100")

    if flips <= max_flips:
        print("[PASS] Model predictions are stable under small perturbations.")
        return True
    else:
        print("[FAIL] Model unstable under perturbations!")
        return False

# -- RUN ALL TESTS (CI/CD Simulation) --

X_train, X_test, y_train, y_test = load_preprocess_data()
numeric_cols = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']

model = train_model(X_train, y_train)

results = {
    'Performance Check': test_model_performance(model, X_test, y_test),
    'Bias Audit': test_bias_audit(model, X_test, y_test, feature_name='sex'),
    'Robustness Check': test_prediction_robustness(model, X_test, numeric_cols)
}

# -- FINAL SUMMARY --
print("\n--- CI/CD TEST SUITE RESULT ---")
for test, result in results.items():
    print(f"{test}: {'PASS' if result else 'FAIL'}")

if all(results.values()):
    print("\n[DEPLOYMENT STATUS: APPROVED ✅]")
else:
    print("\n[DEPLOYMENT STATUS: BLOCKED ❌]")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[numeric] = StandardScaler().fit_transform(X[numeric])


[PERFORMANCE CHECK] Accuracy: 0.8261
[PASS] Model meets performance threshold.
[BIAS AUDIT] Demographic Parity Difference (sex): 0.1797
[PASS] Bias within acceptable limits.
[ROBUSTNESS TEST] Prediction flips after perturbation: 1/100
[PASS] Model predictions are stable under small perturbations.

--- CI/CD TEST SUITE RESULT ---
Performance Check: PASS
Bias Audit: PASS
Robustness Check: PASS

[DEPLOYMENT STATUS: APPROVED ✅]
