In [1]:
import pandas as pd
import numpy as np


In [4]:
enrol = pd.read_csv("../output/tables/enrolment_clean_5states_25districts_2025.csv")
demo  = pd.read_csv("../output/tables/demographic_clean_5states_25districts_2025.csv")
bio   = pd.read_csv("../output/tables/biometric_clean_5states_25districts_2025.csv")


In [5]:
for df in [enrol, demo, bio]:
    df["date"] = pd.to_datetime(
        df["date"],
        format="mixed",
        dayfirst=True,
        errors="coerce"
    )
    df["month"] = df["date"].dt.to_period("M")


In [6]:
enrol["date"].isna().sum(), demo["date"].isna().sum(), bio["date"].isna().sum()


(np.int64(0), np.int64(0), np.int64(0))

In [7]:
enrol_monthly = (
    enrol
    .groupby(["state", "district_final", "pincode", "month"], as_index=False)
    .agg({
        "age_0_5": "sum",
        "age_5_17": "sum",
        "age_18_greater": "sum"
    })
)


In [8]:
demo_monthly = (
    demo
    .groupby(["state", "district_final", "pincode", "month"], as_index=False)
    .agg({
        "demo_age_5_17": "sum",
        "demo_age_17_": "sum"
    })
)


In [9]:
bio_monthly = (
    bio
    .groupby(["state", "district_final", "pincode", "month"], as_index=False)
    .agg({
        "bio_age_5_17": "sum",
        "bio_age_17_": "sum"
    })
)


In [10]:
monthly = (
    enrol_monthly
    .merge(demo_monthly, on=["state", "district_final", "pincode", "month"], how="left")
    .merge(bio_monthly,  on=["state", "district_final", "pincode", "month"], how="left")
)


In [11]:
monthly[[
    "demo_age_5_17",
    "demo_age_17_",
    "bio_age_5_17",
    "bio_age_17_"
]] = monthly[[
    "demo_age_5_17",
    "demo_age_17_",
    "bio_age_5_17",
    "bio_age_17_"
]].fillna(0)


In [12]:
monthly["total_demo_updates"] = (
    monthly["demo_age_5_17"] + monthly["demo_age_17_"]
)

monthly["total_bio_updates"] = (
    monthly["bio_age_5_17"] + monthly["bio_age_17_"]
)


In [13]:
monthly.shape


(5916, 13)

In [14]:
monthly.head()


Unnamed: 0,state,district_final,pincode,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,total_demo_updates,total_bio_updates
0,chhattisgarh,bilaspur,495001,2025-09,296,44,0,154.0,1664.0,444.0,1236.0,1818.0,1680.0
1,chhattisgarh,bilaspur,495001,2025-10,201,26,0,106.0,1479.0,1327.0,929.0,1585.0,2256.0
2,chhattisgarh,bilaspur,495001,2025-11,210,49,3,238.0,2426.0,2763.0,2065.0,2664.0,4828.0
3,chhattisgarh,bilaspur,495001,2025-12,132,12,1,460.0,3684.0,3509.0,2475.0,4144.0,5984.0
4,chhattisgarh,bilaspur,495004,2025-09,96,10,0,51.0,432.0,185.0,424.0,483.0,609.0


In [15]:
monthly_totals = (
    monthly
    .groupby("month", as_index=False)[
        ["total_bio_updates", "total_demo_updates"]
    ]
    .sum()
)

monthly_totals


Unnamed: 0,month,total_bio_updates,total_demo_updates
0,2025-03,10543.0,13606.0
1,2025-04,87206.0,54159.0
2,2025-05,53469.0,57128.0
3,2025-06,43135.0,32220.0
4,2025-07,158781.0,95918.0
5,2025-09,671973.0,481858.0
6,2025-10,402184.0,372973.0
7,2025-11,760106.0,838582.0
8,2025-12,849166.0,869839.0


In [16]:
monthly_totals["bio_share"] = (
    monthly_totals["total_bio_updates"] /
    (monthly_totals["total_bio_updates"] + monthly_totals["total_demo_updates"])
)

monthly_totals[["month", "bio_share"]]


Unnamed: 0,month,bio_share
0,2025-03,0.436581
1,2025-04,0.616885
2,2025-05,0.483458
3,2025-06,0.572424
4,2025-07,0.623406
5,2025-09,0.582384
6,2025-10,0.518842
7,2025-11,0.475456
8,2025-12,0.493987


In [17]:
active_pincodes = (
    monthly
    .groupby("month")["pincode"]
    .nunique()
    .reset_index(name="active_pincodes")
)

active_pincodes


Unnamed: 0,month,active_pincodes
0,2025-03,5
1,2025-04,65
2,2025-05,41
3,2025-06,29
4,2025-07,98
5,2025-09,1436
6,2025-10,1407
7,2025-11,1423
8,2025-12,1396


In [18]:
district_monthly = (
    monthly
    .groupby(["month", "state", "district_final"], as_index=False)
    .agg({
        "total_bio_updates": "sum",
        "total_demo_updates": "sum"
    })
)


In [19]:
district_monthly.sort_values(
    ["month", "total_bio_updates"],
    ascending=[True, False]
).groupby("month").head(5)


Unnamed: 0,month,state,district_final,total_bio_updates,total_demo_updates
3,2025-03,uttar pradesh,kanpur nagar,2728.0,4109.0
4,2025-03,uttar pradesh,lucknow,2618.0,2953.0
0,2025-03,chhattisgarh,raipur,2616.0,2145.0
1,2025-03,maharashtra,thane,1630.0,2077.0
2,2025-03,uttar pradesh,gorakhpur,951.0,2322.0
10,2025-04,maharashtra,thane,20866.0,11979.0
12,2025-04,rajasthan,jaipur,11203.0,12753.0
18,2025-04,uttar pradesh,kanpur nagar,10987.0,4928.0
19,2025-04,uttar pradesh,lucknow,10598.0,4589.0
13,2025-04,rajasthan,jodhpur,7668.0,3223.0
