In [None]:
# state_level_prioritization.py
import pandas as pd
import numpy as np

file_path = "ongoing_projects2_clean.csv"
df = pd.read_csv(file_path).rename(columns=lambda c: c.strip())

# Standardize names used below if necessary
for c in df.columns:
    if "Cumulative" in c:
        df = df.rename(columns={c: "Cumulative_Expenditure_RsCr"})
    if "Latest Revised Cost" in c:
        df = df.rename(columns={c: "Latest_Revised_Cost_RsCr"})
    if "Project Count" in c:
        df = df.rename(columns={c: "Project_Count"})
    if "Original Cost" in c:
        df = df.rename(columns={c: "Original_Cost_RsCr"})

# Force numeric
df["Project_Count"] = pd.to_numeric(df.get("Project_Count"), errors="coerce").fillna(0)
df["Cumulative_Expenditure_RsCr"] = pd.to_numeric(df.get("Cumulative_Expenditure_RsCr"), errors="coerce").fillna(0)
df["Latest_Revised_Cost_RsCr"] = pd.to_numeric(df.get("Latest_Revised_Cost_RsCr"), errors="coerce").fillna(0)
# compute escalation if missing
if "Cost_Escalation_Pct" not in df.columns:
    df["Cost_Escalation_Pct"] = np.where(
        (df.get("Original_Cost_RsCr").notna()) & (df.get("Original_Cost_RsCr") != 0),
        (df["Latest_Revised_Cost_RsCr"] - df["Original_Cost_RsCr"]) / df["Original_Cost_RsCr"] * 100,
        np.nan
    )

state_agg = df.groupby("STATE NAME" if "STATE NAME" in df.columns else "State", dropna=False).agg(
    Total_Projects=("Project_Count","sum"),
    Total_Cumulative_Expenditure_RsCr=("Cumulative_Expenditure_RsCr","sum"),
    Total_Latest_Revised_Cost_RsCr=("Latest_Revised_Cost_RsCr","sum"),
    Avg_Escalation_Pct=("Cost_Escalation_Pct","mean")
).reset_index().sort_values("Total_Latest_Revised_Cost_RsCr", ascending=False)

print("Top 20 states by Total Latest Revised Cost:\n", state_agg.head(20).to_string(index=False))
state_agg.to_csv("state_level_aggregation.csv", index=False)
print("\nSaved state_level_aggregation.csv")

Top 20 states by Total Latest Revised Cost:
                            STATE NAME  Total_Projects  Total_Cumulative_Expenditure_RsCr  Total_Latest_Revised_Cost_RsCr  Avg_Escalation_Pct
                          Maharashtra             108                          357609.40                       527681.32           19.115681
                              Gujarat              73                          394515.63                       525463.04           17.766339
                        Uttar Pradesh              62                          266754.07                       336421.92           15.825256
                            Rajasthan              54                          237371.17                       325116.26           15.902185
                            PAN India               6                           69165.67                       264213.00           86.207548
                              Haryana              18                          171957.43                     