### Detect Data Drift in ML Models
**Objective**: Monitor and detect changes in data distributions that impact ML model performance.

**Task**: Feature Correlation Drift

**Steps**:
1. Compute the correlation matrix of features in your training dataset.
2. Compute the correlation matrix of the same features in your production data.
3. Assess changes in the correlation matrix over time to identify any significant deviations.
4. Investigate any significant changes in correlation as they may indicate issues in the data collection process or model assumptions.

In [None]:
# write your code from here
import pandas as pd
import numpy as np

try:
    # Sample training dataset
    train_data = pd.DataFrame({
        'feature1': np.random.normal(0, 1, 100),
        'feature2': np.random.normal(5, 2, 100),
        'feature3': np.random.normal(-2, 1, 100)
    })

    # Sample production dataset with some drift in feature correlations
    prod_data = pd.DataFrame({
        'feature1': np.random.normal(0, 1, 100),
        'feature2': np.random.normal(5, 2, 100),
        'feature3': np.random.normal(-2, 1, 100) + prod_data['feature1'] * 0.5 if 'prod_data' in locals() else np.random.normal(-2, 1, 100)
    })

    # Compute correlation matrices
    corr_train = train_data.corr()
    corr_prod = prod_data.corr()

    # Calculate absolute difference between correlation matrices
    corr_diff = (corr_train - corr_prod).abs()

    # Threshold for significant correlation drift (e.g., 0.3)
    threshold = 0.3
    drifted_pairs = []

    for col1 in corr_diff.columns:
        for col2 in corr_diff.index:
            if col1 != col2 and corr_diff.loc[col2, col1] > threshold:
                drifted_pairs.append((col1, col2, corr_train.loc[col2, col1], corr_prod.loc[col2, col1], corr_diff.loc[col2, col1]))

    if drifted_pairs:
        print("Significant correlation drift detected in feature pairs:")
        for pair in drifted_pairs:
            print(f"Features: {pair[0]} & {pair[1]}, Train Corr: {pair[2]:.3f}, Prod Corr: {pair[3]:.3f}, Diff: {pair[4]:.3f}")
    else:
        print("No significant correlation drift detected.")

except Exception as e:
    print(f"Error: {e}")
