In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import logging
import re
from datetime import datetime
import json
from typing import Dict, List, Tuple, Any
from dataclasses import dataclass, field
from enum import Enum
import warnings
from collections import defaultdict
import argparse


if len(sys.argv) > 1 and sys.argv[1] == "-f":
    sys.argv = [sys.argv[0]]

warnings.filterwarnings("ignore")


class AlertLevel(Enum):
    INFO = "INFO"
    WARNING = "WARNING"
    CRITICAL = "CRITICAL"
    EMERGENCY = "EMERGENCY"

@dataclass
class Thresholds:
    load_warning: float = 1.2
    load_high: float = 1.8
    shortfall_warning: float = 0.35
    shortfall_severe: float = 0.60
    critical_district_limit: int = 5
    max_load_anomaly: float = 3.0
    min_enrolments_for_analysis: int = 100

@dataclass
class Config:
    data_dir: Path = Path("../data")
    output_dir: Path = Path("../outputs")
    log_dir: Path = Path("../logs")

    thresholds: Thresholds = field(default_factory=Thresholds)

    state_map: Dict[str, str] = field(default_factory=lambda: {
        "TN": "Tamil Nadu", "AP": "Andhra Pradesh", "KA": "Karnataka",
        "MH": "Maharashtra", "DL": "Delhi", "UP": "Uttar Pradesh",
        "WB": "West Bengal", "GJ": "Gujarat", "RJ": "Rajasthan",
        "PB": "Punjab", "KL": "Kerala", "MP": "Madhya Pradesh",
        "BR": "Bihar", "OR": "Odisha", "HR": "Haryana"
    })

    age_cols: List[str] = field(default_factory=lambda: [
        "age_0_5", "age_5_17", "age_18_greater"
    ])

    filename_regex: str = r"^([A-Z]{2}),\s*(.+?)(?:\s*aadhaar.*)?$"

    def __post_init__(self):
        for d in [self.data_dir, self.output_dir, self.log_dir]:
            d.mkdir(exist_ok=True, parents=True)


def setup_logging(config: Config) -> logging.Logger:
    log_file = config.log_dir / f"aadhaar_health_{datetime.now():%Y%m%d}.log"
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s | %(levelname)-8s | %(message)s",
        handlers=[
            logging.StreamHandler(sys.stdout),
            logging.FileHandler(log_file, encoding="utf-8")
        ]
    )
    return logging.getLogger("AadhaarHealth")


class AadhaarProcessor:
    def __init__(self, config: Config, logger: logging.Logger):
        self.config = config
        self.logger = logger

    def parse_filename(self, filename: str) -> Tuple[str, str]:
        stem = Path(filename).stem
        match = re.match(self.config.filename_regex, stem, re.IGNORECASE)
        if match:
            return match.group(1).upper(), match.group(2).title()
        return "UNK", "Unknown"

    def read_csv(self, path: Path) -> pd.DataFrame:
        try:
            return pd.read_csv(path, low_memory=False)
        except Exception as e:
            self.logger.error(f"Failed reading {path.name}: {e}")
            return pd.DataFrame()

    def process_enrolments(self) -> pd.DataFrame:
        files = list(self.config.data_dir.glob("*aadhaar_enrolments.csv"))
        self.logger.info(f"Found {len(files)} enrolment files")

        rows = []
        for f in files:
            df = self.read_csv(f)
            if df.empty:
                continue

            state, district = self.parse_filename(f.name)
            if all(c in df.columns for c in self.config.age_cols):
                total = df[self.config.age_cols].sum(axis=1)
            else:
                total = df.select_dtypes(include=np.number).sum(axis=1)

            rows.append(pd.DataFrame({
                "state": state,
                "district": district,
                "total_enrolments": total.sum()
            }, index=[0]))

        return pd.concat(rows, ignore_index=True)

    def process_updates(self, pattern: str, col_name: str) -> pd.DataFrame:
        files = list(self.config.data_dir.glob(pattern))
        totals = defaultdict(float)

        for f in files:
            df = self.read_csv(f)
            if df.empty:
                continue

            state, district = self.parse_filename(f.name)
            numeric = df.select_dtypes(include=np.number)
            totals[(state, district)] += numeric.sum().sum()

        return pd.DataFrame([
            {"state": s, "district": d, col_name: v}
            for (s, d), v in totals.items()
        ])

   
    def calculate_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
        result = df.copy()

        avg_updates = result["total_updates"].mean()
        avg_enrol   = result["total_enrolments"].mean()

        result["updates_norm"] = result["total_updates"] / avg_updates
        result["enrol_norm"]   = result["total_enrolments"] / avg_enrol

        result["load_score"] = (
            result["updates_norm"] / result["enrol_norm"]
        ).replace([np.inf, -np.inf], 0).round(3)

        result["shortfall_pct"] = (
            (avg_enrol - result["total_enrolments"]) / avg_enrol
        ).clip(lower=0).round(3)

        result["load_pctile"] = (result["load_score"].rank(pct=True) * 100).round(1)
        result["shortfall_pctile"] = (result["shortfall_pct"].rank(pct=True) * 100).round(1)

        return result

    def classify(self, row):
        t = self.config.thresholds
        if row["load_score"] > t.load_high and row["shortfall_pctile"] / 100 > t.shortfall_severe:
            return 5, "Critical", "Emergency response: mobile vans + staff surge"
        if row["load_score"] > t.load_high:
            return 4, "Overloaded", "Increase capacity and staffing"
        if row["shortfall_pctile"] / 100 > t.shortfall_severe:
            return 3, "Under-covered", "Outreach and awareness campaigns"
        if row["load_score"] > t.load_warning:
            return 2, "Moderate", "Monitor closely"
        return 1, "Healthy", "Maintain operations"

    def build_master(self) -> pd.DataFrame:
        enrol = self.process_enrolments()
        demo = self.process_updates("*aadhaar_demographic_updates.csv", "demo_updates")
        bio  = self.process_updates("*aadhaar_biometric_updates.csv", "bio_updates")

        updates = demo.merge(bio, on=["state", "district"], how="outer").fillna(0)
        updates["total_updates"] = updates["demo_updates"] + updates["bio_updates"]

        master = enrol.merge(updates, on=["state", "district"], how="left").fillna(0)
        master = self.calculate_metrics(master)

        master[["priority", "status", "action"]] = pd.DataFrame(
            master.apply(self.classify, axis=1).tolist(),
            index=master.index
        )

        master["state_name"] = master["state"].map(self.config.state_map).fillna(master["state"])

        return master.sort_values("priority", ascending=False).reset_index(drop=True)



def main():
    parser = argparse.ArgumentParser(description="Aadhaar System Health Monitor")
    parser.add_argument("--format", choices=["csv"], default="csv")
    args = parser.parse_args()

    config = Config()
    logger = setup_logging(config)

    logger.info("Starting Aadhaar Health Analysis")

    processor = AadhaarProcessor(config, logger)
    master = processor.build_master()

    print("\nAADHAAR SYSTEM HEALTH SUMMARY")
    print(master["status"].value_counts().to_string())

    out = config.output_dir / f"aadhaar_health_{datetime.now():%Y%m%d_%H%M}.csv"
    master.to_csv(out, index=False)
    logger.info(f"CSV saved to {out}")

    print("\nTop Priority Districts:")
    print(master.head(10)[
        ["state_name", "district", "status", "priority", "load_score", "shortfall_pct"]
    ].to_string(index=False))

if __name__ == "__main__":
    main()


2026-01-17 00:11:55,467 | INFO    | Starting Aadhaar Health Analysis
2026-01-17 00:11:55,468 | INFO    | Found 6 enrolment files

AADHAAR SYSTEM HEALTH SUMMARY
status
Under-covered    3
Healthy          3
2026-01-17 00:11:55,581 | INFO    | CSV saved to ..\outputs\aadhaar_health_20260117_0011.csv

Top Priority Districts:
    state_name      district        status  priority  load_score  shortfall_pct
Andhra Pradesh       Krishna Under-covered         3       1.611          0.050
Andhra Pradesh    Srikakulam Under-covered         3       1.618          0.398
    Tamil Nadu    Dharmapuri Under-covered         3       0.228          0.234
Andhra Pradesh Visakhapatnam       Healthy         1       1.149          0.000
    Tamil Nadu       Chennai       Healthy         1       1.068          0.000
    Tamil Nadu    Coimbatore       Healthy         1       0.478          0.000
