## Model Introspection

In [1]:
import numpy as np
import pandas as pd

In [3]:
# Full Isolation Forest scores (all rows)
df_if = pd.read_csv("data/processed/if_scores.csv")

# LOF scores (only subsample rows)
df_lof = pd.read_csv("data/processed/lof_scores.csv")

# Full labels (source of truth)
y = pd.read_csv("data/processed/y_train_1.csv")["isFraud"]

# Subsample indices used by LOF
idx = np.load("data/processed/lof_subsample_idx.npy")

print("IF scores:", df_if.shape)
print("LOF scores:", df_lof.shape)
print("Labels:", y.shape)
print("Subsample size:", idx.shape)

IF scores: (590540, 3)
LOF scores: (100000, 3)
Labels: (590540,)
Subsample size: (100000,)


In [4]:
len(df_lof) == len(idx)

True

In [5]:
df_if_sub = df_if.iloc[idx].reset_index(drop=True)
y_sub = y.iloc[idx].reset_index(drop=True)

assert len(df_if_sub) == len(df_lof) == len(y_sub)

In [6]:
df_compare = pd.DataFrame({
    "if_score": df_if_sub["anomaly_score"],
    "lof_score": df_lof["lof_score"],
    "isFraud": y_sub.values
})

df_compare.head()

Unnamed: 0,if_score,lof_score,isFraud
0,0.360532,0.977001,0
1,0.326089,0.984578,0
2,0.33312,0.985754,0
3,0.407142,1.029077,0
4,0.327719,0.985974,0


In [7]:
df = df_compare.copy()

df["if_rank"] = df["if_score"].rank(method="first", ascending=False)
df["lof_rank"] = df["lof_score"].rank(method="first", ascending=False)

df.head()

Unnamed: 0,if_score,lof_score,isFraud,if_rank,lof_rank
0,0.360532,0.977001,0,28491.0,87745.0
1,0.326089,0.984578,0,68753.0,77026.0
2,0.33312,0.985754,0,55218.0,75210.0
3,0.407142,1.029077,0,13562.0,21246.0
4,0.327719,0.985974,0,65095.0,74882.0


In [8]:
df_compare.to_csv("data/processed/if_lof_score_comparison.csv")

In [9]:
K_PCT = 0.5   # try 0.1 or 1.0 later
K = int(len(df) * K_PCT / 100)

print(f"Top-{K_PCT}% review size:", K)

Top-0.5% review size: 500


In [10]:
df["if_top"] = df["if_rank"] <= K
df["lof_top"] = df["lof_rank"] <= K

def assign_bucket(row):
    if row.if_top and row.lof_top:
        return "IF & LOF"
    if row.if_top and not row.lof_top:
        return "IF only"
    if not row.if_top and row.lof_top:
        return "LOF only"
    return "Neither"

df["bucket"] = df.apply(assign_bucket, axis=1)

df["bucket"].value_counts()

bucket
Neither     99150
IF only       350
LOF only      350
IF & LOF      150
Name: count, dtype: int64

### Disagreement Structure at 0.5% Review Capacity

At a 0.5% review threshold, Isolation Forest and LOF exhibit limited agreement, with only 150 transactions flagged by both models. The majority of flagged cases are model-specific, split evenly between IF-only and LOF-only buckets.

This structure indicates that global and local anomaly detectors prioritize distinct subsets of transactions, reinforcing the presence of multiple fraud regimes rather than a single dominant anomaly pattern.

In [11]:
bucket_stats = (
    df.groupby("bucket")["isFraud"]
    .agg(["mean", "count"])
    .rename(columns={"mean": "fraud_rate"})
    .sort_values("fraud_rate", ascending=False)
)

bucket_stats

Unnamed: 0_level_0,fraud_rate,count
bucket,Unnamed: 1_level_1,Unnamed: 2_level_1
LOF only,0.274286,350
IF & LOF,0.26,150
IF only,0.1,350
Neither,0.034352,99150


In [12]:
base_rate = df["isFraud"].mean()

bucket_stats["lift_vs_base"] = bucket_stats["fraud_rate"] / base_rate

bucket_stats

Unnamed: 0_level_0,fraud_rate,count,lift_vs_base
bucket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LOF only,0.274286,350,7.670182
IF & LOF,0.26,150,7.270694
IF only,0.1,350,2.796421
Neither,0.034352,99150,0.960626


### Fraud Concentration by Disagreement Bucket

The LOF-only bucket exhibits the highest fraud concentration (27.4%), exceeding even the consensus bucket where both Isolation Forest and LOF agree. This indicates that locally anomalous behavior—while not globally extreme—is a particularly strong indicator of fraud.

In contrast, IF-only cases show substantially lower fraud rates, suggesting that extreme global anomalies often reflect rare but legitimate behavior. Transactions flagged by neither model closely match the baseline fraud rate, validating the alignment and bucket construction.

In [13]:
df[df["bucket"] == "LOF only"].head(10)

Unnamed: 0,if_score,lof_score,isFraud,if_rank,lof_rank,if_top,lof_top,bucket
658,0.571657,1.947422,0,1334.0,154.0,False,True,LOF only
957,0.601896,1.387763,0,692.0,464.0,False,True,LOF only
1440,0.599903,1.580319,0,722.0,297.0,False,True,LOF only
1585,0.604093,2.316285,0,655.0,85.0,False,True,LOF only
1607,0.503738,1.549347,0,4157.0,314.0,False,True,LOF only
1661,0.573125,1.580322,0,1297.0,296.0,False,True,LOF only
1856,0.571744,2.115533,0,1331.0,117.0,False,True,LOF only
2453,0.580749,1.418644,1,1098.0,423.0,False,True,LOF only
2740,0.570716,1.428892,1,1363.0,410.0,False,True,LOF only
3106,0.535091,2.617378,0,2625.0,62.0,False,True,LOF only


In [15]:
for K_PCT in [0.1, 0.5, 1.0]:
    K = int(len(df) * K_PCT / 100)

    df["if_top"] = df["if_rank"] <= K
    df["lof_top"] = df["lof_rank"] <= K

    df["bucket"] = df.apply(assign_bucket, axis=1)

    stats = df.groupby("bucket")["isFraud"].mean()
    print(f"\nK = {K_PCT}%")
    print(stats)


K = 0.1%
bucket
IF & LOF    0.000000
IF only     0.000000
LOF only    0.228261
Neither     0.035618
Name: isFraud, dtype: float64

K = 0.5%
bucket
IF & LOF    0.260000
IF only     0.100000
LOF only    0.274286
Neither     0.034352
Name: isFraud, dtype: float64

K = 1.0%
bucket
IF & LOF    0.371014
IF only     0.184733
LOF only    0.163359
Neither     0.032742
Name: isFraud, dtype: float64


In [16]:
df.groupby("bucket")["isFraud"].sum()

bucket
IF & LOF     128
IF only      121
LOF only     107
Neither     3220
Name: isFraud, dtype: int64

### Stability and Coverage Analysis

Across multiple review capacities, the disagreement structure between Isolation Forest and LOF remains stable. At very tight review budgets, locally anomalous transactions dominate fraud detection, while at broader capacities consensus anomalies become increasingly valuable. Although most fraud instances lie outside the top-ranked anomalies, each model contributes uniquely to fraud coverage, highlighting the necessity of multi-perspective risk ranking under operational constraints.

In [18]:
df.to_csv("data/processed/disagreement_analysis.csv", index=False)
bucket_stats.to_csv("data/processed/disagreement_bucket_stats.csv")