### Detect Data Drift using Kolmogorov-Smirnov Test
**Description**: Use statistical tests to detect data drift between two datasets.

In [1]:
# write your code from here

import numpy as np
from scipy.stats import ks_2samp

def validate_input(data):
    """
    Validates the input data.
    Raises:
        ValueError if input is not a 1D numpy array or is empty.
    """
    if not isinstance(data, np.ndarray):
        raise TypeError("Input data must be a numpy array.")
    if data.ndim != 1:
        raise ValueError("Input data must be one-dimensional.")
    if data.size == 0:
        raise ValueError("Input data must not be empty.")

def detect_data_drift_ks(data_ref, data_new, alpha=0.05):
    """
    Detects data drift between two datasets using Kolmogorov-Smirnov test.

    Parameters:
    - data_ref (np.ndarray): Reference dataset (e.g., training data)
    - data_new (np.ndarray): New dataset to compare (e.g., production data)
    - alpha (float): Significance level (default 0.05)

    Returns:
    - ks_statistic (float): KS test statistic
    - p_value (float): p-value from KS test
    - drift_detected (bool): True if drift is detected, False otherwise
    """
    # Validate inputs
    validate_input(data_ref)
    validate_input(data_new)

    # Run KS test
    ks_statistic, p_value = ks_2samp(data_ref, data_new)

    # Drift is detected if p-value is less than significance level alpha
    drift_detected = p_value < alpha

    return ks_statistic, p_value, drift_detected


def main():
    # Simulate reference data (normal distribution)
    np.random.seed(42)
    data_ref = np.random.normal(loc=0, scale=1, size=1000)

    # Simulate new data without drift
    data_new_no_drift = np.random.normal(loc=0, scale=1, size=1000)

    # Simulate new data with drift (shifted mean and larger variance)
    data_new_drift = np.random.normal(loc=0.5, scale=1.5, size=1000)

    print("=== Testing with NO drift ===")
    ks_stat, p_val, drift = detect_data_drift_ks(data_ref, data_new_no_drift)
    print(f"KS Statistic: {ks_stat:.4f}")
    print(f"p-value: {p_val:.4f}")
    print(f"Drift detected? {'Yes' if drift else 'No'}\n")

    print("=== Testing WITH drift ===")
    ks_stat, p_val, drift = detect_data_drift_ks(data_ref, data_new_drift)
    print(f"KS Statistic: {ks_stat:.4f}")
    print(f"p-value: {p_val:.4f}")
    print(f"Drift detected? {'Yes' if drift else 'No'}")

if __name__ == "__main__":
    main()

=== Testing with NO drift ===
KS Statistic: 0.0450
p-value: 0.2635
Drift detected? No

=== Testing WITH drift ===
KS Statistic: 0.2360
p-value: 0.0000
Drift detected? Yes
