### Using SHAP for Feature Drift Analysis
**Description**: Utilize SHapley Additive exPlanations (SHAP) values to analyze feature
importance changes over time, indicating feature drift.

In [None]:
# write your code from here
import shap
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

try:
    np.random.seed(42)
    n = 1000
    X = pd.DataFrame({
        'feature1': np.concatenate([np.random.normal(0, 1, n//2), np.random.normal(0.5, 1, n//2)]),
        'feature2': np.concatenate([np.random.normal(5, 2, n//2), np.random.normal(6, 2, n//2)]),
        'time_period': np.concatenate([np.array(['T1']*(n//2)), np.array(['T2']*(n//2))])
    })
    y = np.random.randint(0, 2, n)
    X_train, X_test, y_train, y_test = train_test_split(X.drop('time_period', axis=1), y, test_size=0.3, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_test)
    X_test_reset = X.loc[X_test.index].copy()
    shap_vals_class1 = np.array(shap_values[1])
    shap_df = pd.DataFrame(shap_vals_class1, columns=X_test_reset.drop('time_period', axis=1).columns)
    shap_df['time_period'] = X_test_reset['time_period'].values
    t1_mean = shap_df[shap_df['time_period'] == 'T1'].mean(numeric_only=True)
    t2_mean = shap_df[shap_df['time_period'] == 'T2'].mean(numeric_only=True)
    diff = t2_mean - t1_mean
    print("Mean SHAP values for T1:", t1_mean.to_dict())
    print("Mean SHAP values for T2:", t2_mean.to_dict())
    print("Difference in SHAP values:", diff.to_dict())
    diff.plot.bar(figsize=(8,6))
    plt.title("Difference in Mean SHAP Values Between Time Periods")
    plt.ylabel("SHAP Value Difference")
    plt.xlabel("Feature")
    plt.tight_layout()
    plt.show()
except Exception as e:
    print(f"Error: {e}")
