# Proxy Bias Detection
Inter-proxy agreement and entropy/variance imbalance flags.

In [0]:
import pandas as pd, numpy as np
from pathlib import Path
df = pd.read_csv('data/interim/indicators_normalized.csv')
sigma_ids = ['ucdp_conflict','happiness_inverse','diversity_index','capability_deprivation']
wide = None
for sid in sigma_ids:
    sub = df[df['id']==sid][['region','year','norm']].rename(columns={'norm':sid})
    wide = sub if wide is None else wide.merge(sub, on=['region','year'], how='outer')
wide = wide.dropna()
corr = wide[sigma_ids].corr(method='spearman')
var = wide[sigma_ids].var()
entropy_share = (var / var.sum()).sort_values(ascending=False)
Path('validation').mkdir(exist_ok=True)
corr.to_csv('validation/proxy_corr.csv', index=False)
entropy_share.to_csv('validation/proxy_entropy_share.csv')
flags = []
if (np.abs(corr.values[np.triu_indices(len(sigma_ids),1)]) < 0.5).any():
    flags.append('Low inter-proxy agreement (<0.5).')
if (entropy_share.iloc[0] > 0.6):
    flags.append('Dominant proxy variance (>60%).')
Path('validation/proxy_flags.txt').write_text('\n'.join(flags or ['No flags.']), encoding='utf-8')
print('Wrote validation/proxy_corr.csv, proxy_entropy_share.csv, proxy_flags.txt')
