In [1]:
import numpy as np
import pandas as pd
from scipy.stats import chi2
from numpy.linalg import inv

def mahalanobis_distance(x, data, cov_inv):
    x_mu = x - np.mean(data, axis=0)
    return np.sqrt(x_mu.T @ cov_inv @ x_mu)

def detect_multivariate_outliers(data, threshold=0.99):
    """
    Detects multivariate outliers using Mahalanobis distance.

    Parameters:
        data (DataFrame): A DataFrame with numeric features.
        threshold (float): Confidence level (default 0.99 corresponds to 99% confidence region).

    Returns:
        List[int]: Indices of detected outliers.
    """
    data = data.select_dtypes(include=[np.number])  # Keep numeric columns only
    cov_matrix = np.cov(data.values, rowvar=False)
    cov_inv = inv(cov_matrix)
    distances = []

    for i in range(len(data)):
        dist = mahalanobis_distance(data.iloc[i].values, data.values, cov_inv)
        distances.append(dist)

    distances = np.array(distances)
    chi2_threshold = np.sqrt(chi2.ppf(threshold, df=data.shape[1]))

    outliers = np.where(distances > chi2_threshold)[0]

    return outliers.tolist(), distances

# Example usage
df = pd.DataFrame({
    'X': [10, 12, 13, 11, 14, 110],
    'Y': [20, 22, 23, 21, 25, 190]
})

outliers, dists = detect_multivariate_outliers(df)
print("Outlier indices:", outliers)

Outlier indices: []
