### Detect Data Drift
**Description**: Data drift can occur when the statistical properties of your data change over time. Learn to detect data drift using visualizations.

In [1]:
# Write your code from here
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming you have two pandas DataFrames:
# historical_data: DataFrame containing historical data
# current_data: DataFrame containing the most recent data

# Let's assume these DataFrames have a common column named 'feature_to_analyze'

def detect_numerical_drift(historical_df, current_df, column_name):
    """
    Visualizes the distribution of a numerical feature in historical and current data
    to detect drift.
    """
    plt.figure(figsize=(10, 6))
    sns.histplot(historical_df[column_name], label='Historical Data', kde=True)
    sns.histplot(current_df[column_name], color='orange', label='Current Data', kde=True)
    plt.title(f'Distribution of {column_name}')
    plt.xlabel(column_name)
    plt.ylabel('Density')
    plt.legend()
    plt.show()

    plt.figure(figsize=(8, 6))
    sns.boxplot(data=[historical_df[column_name], current_df[column_name]],
                palette="Set2",
                names=['Historical Data', 'Current Data'])
    plt.title(f'Box Plot of {column_name}')
    plt.ylabel(column_name)
    plt.show()

def detect_categorical_drift(historical_df, current_df, column_name):
    """
    Visualizes the distribution of a categorical feature in historical and current data
    to detect drift.
    """
    historical_counts = historical_df[column_name].value_counts(normalize=True).sort_index()
    current_counts = current_df[column_name].value_counts(normalize=True).sort_index()

    comparison_df = pd.DataFrame({'Historical': historical_counts, 'Current': current_counts})
    comparison_df.plot(kind='bar', figsize=(10, 6))
    plt.title(f'Distribution of {column_name}')
    plt.ylabel('Proportion')
    plt.xlabel(column_name)
    plt.xticks(rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.show()

# Example Usage (assuming you have loaded your data into historical_data and current_data)
# Replace 'feature_to_analyze' with the actual name of the column you want to inspect

# For a numerical feature:
if 'numerical_feature' in historical_data.columns and 'numerical_feature' in current_data.columns:
    detect_numerical_drift(historical_data, current_data, 'numerical_feature')

# For a categorical feature:
if 'categorical_feature' in historical_data.columns and 'categorical_feature' in current_data.columns:
    detect_categorical_drift(historical_data, current_data, 'categorical_feature')

NameError: name 'historical_data' is not defined