In [None]:
import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt

In [265]:
pd.set_option("display.max_rows", 2000)
pd.set_option("display.max_columns", None)

In [None]:
enrol_df = pd.read_csv(r"D:\UIDAI hackathon\North\delhi\enrollment.csv")
demo_df = pd.read_csv(r"D:\UIDAI hackathon\North\delhi\demographic.csv")
bio_df = pd.read_csv(r"D:\UIDAI hackathon\North\delhi\biometric.csv")

In [267]:
all_dfs = [enrol_df, demo_df, bio_df]

In [268]:
enrol_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater
0,01-09-2025,Delhi,Central Delhi,110005,4,5,0
1,01-09-2025,Delhi,East Delhi,110031,9,3,1
2,01-09-2025,Delhi,East Delhi,110053,2,1,0
3,01-09-2025,Delhi,New Delhi,110001,2,0,0
4,01-09-2025,Delhi,North Delhi,110007,10,2,0


In [269]:
enrol_df["date"] = pd.to_datetime(enrol_df["date"], dayfirst=True)
demo_df["date"] = pd.to_datetime(demo_df["date"], dayfirst=True)
bio_df["date"] = pd.to_datetime(bio_df["date"], dayfirst=True)

In [270]:
print(enrol_df["district"].nunique(), demo_df["district"].nunique(), bio_df["district"].nunique())

14 13 14


In [271]:
enrol_df["district"].value_counts().sort_index()

district
Central Delhi        579
East Delhi           683
Najafgarh             30
New Delhi            128
North Delhi          340
North East           252
North East   *         1
North East Delhi     364
North West Delhi    1410
Shahdara             242
South Delhi         1554
South East Delhi      40
South West Delhi    1646
West Delhi           814
Name: count, dtype: int64

In [272]:
bio_df["district"].value_counts().sort_index()

district
Central Delhi       2138
East Delhi          2106
Najafgarh            219
New Delhi            733
North Delhi          896
North East           670
North East   *         3
North East Delhi    1023
North West Delhi    4057
Shahdara            1010
South Delhi         4606
South East Delhi     235
South West Delhi    5811
West Delhi          2504
Name: count, dtype: int64

In [273]:
demo_df["district"].value_counts().sort_index()

district
Central Delhi        904
East Delhi           932
Najafgarh             81
New Delhi            331
North Delhi          413
North East           300
North East Delhi     435
North West Delhi    1849
Shahdara             461
South Delhi         2077
South East Delhi      95
South West Delhi    2572
West Delhi          1142
Name: count, dtype: int64

In [274]:
cleanup_map = {
    "North East": "North East Delhi",
    "North East   *": "North East Delhi",
    "North East Delhi *": "North East Delhi"
}

for df in all_dfs:
    df["district"] = df["district"].replace(cleanup_map)
    
print(enrol_df["district"].nunique(), demo_df["district"].nunique(), bio_df["district"].nunique())

12 12 12


In [275]:
enrol_df["district"] = enrol_df["district"].str.lower()
demo_df["district"] = demo_df["district"].str.lower()
bio_df["district"] = bio_df["district"].str.lower()

In [276]:
print(enrol_df["pincode"].nunique(), demo_df["pincode"].nunique(), bio_df["pincode"].nunique())

93 93 94


In [277]:
# Analyze pincode differences
enrol_pincodes = set(enrol_df["pincode"].dropna())
demo_pincodes = set(demo_df["pincode"].dropna())
bio_pincodes = set(bio_df["pincode"].dropna())

print("Enrollment pincodes:", len(enrol_pincodes))
print("Demographic pincodes:", len(demo_pincodes))
print("Biometric pincodes:", len(bio_pincodes))
print()

# Find differences
only_in_enrol = enrol_pincodes - demo_pincodes - bio_pincodes
only_in_demo = demo_pincodes - enrol_pincodes - bio_pincodes
only_in_bio = bio_pincodes - enrol_pincodes - demo_pincodes
in_all = enrol_pincodes & demo_pincodes & bio_pincodes

print(f"Pincodes only in enrollment: {len(only_in_enrol)}")
print(f"Pincodes only in demographic: {len(only_in_demo)}")
print(f"Pincodes only in biometric: {len(only_in_bio)}")
print(f"Pincodes in all three: {len(in_all)}")
print()

# Check for NULL/NaN values
print("NULL pincodes:")
print(f"Enrollment: {enrol_df["pincode"].isna().sum()}")
print(f"Demographic: {demo_df["pincode"].isna().sum()}")
print(f"Biometric: {bio_df["pincode"].isna().sum()}")
print()

# Show some examples of unique pincodes
print("Sample pincodes only in enrollment:", list(only_in_enrol))
print("Sample pincodes only in demographic:", list(only_in_demo))
print("Sample pincodes only in biometric:", list(only_in_bio))

Enrollment pincodes: 93
Demographic pincodes: 93
Biometric pincodes: 94

Pincodes only in enrollment: 0
Pincodes only in demographic: 0
Pincodes only in biometric: 1
Pincodes in all three: 93

NULL pincodes:
Enrollment: 0
Demographic: 0
Biometric: 0

Sample pincodes only in enrollment: []
Sample pincodes only in demographic: []
Sample pincodes only in biometric: [110069]


In [278]:
for df in all_dfs:
    df["month"] = df["date"].dt.month

In [279]:
enrol_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8083 entries, 0 to 8082
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            8083 non-null   datetime64[ns]
 1   state           8083 non-null   object        
 2   district        8083 non-null   object        
 3   pincode         8083 non-null   int64         
 4   age_0_5         8083 non-null   int64         
 5   age_5_17        8083 non-null   int64         
 6   age_18_greater  8083 non-null   int64         
 7   month           8083 non-null   int32         
dtypes: datetime64[ns](1), int32(1), int64(4), object(2)
memory usage: 473.7+ KB


In [280]:
demo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11592 entries, 0 to 11591
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           11592 non-null  datetime64[ns]
 1   state          11592 non-null  object        
 2   district       11592 non-null  object        
 3   pincode        11592 non-null  int64         
 4   demo_age_5_17  11592 non-null  int64         
 5   demo_age_17_   11592 non-null  int64         
 6   month          11592 non-null  int32         
dtypes: datetime64[ns](1), int32(1), int64(3), object(2)
memory usage: 588.8+ KB


In [281]:
bio_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26011 entries, 0 to 26010
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          26011 non-null  datetime64[ns]
 1   state         26011 non-null  object        
 2   district      26011 non-null  object        
 3   pincode       26011 non-null  int64         
 4   bio_age_5_17  26011 non-null  int64         
 5   bio_age_17_   26011 non-null  int64         
 6   month         26011 non-null  int32         
dtypes: datetime64[ns](1), int32(1), int64(3), object(2)
memory usage: 1.3+ MB


In [282]:
enrol_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater,month
0,2025-09-01,Delhi,central delhi,110005,4,5,0,9
1,2025-09-01,Delhi,east delhi,110031,9,3,1,9
2,2025-09-01,Delhi,east delhi,110053,2,1,0,9
3,2025-09-01,Delhi,new delhi,110001,2,0,0,9
4,2025-09-01,Delhi,north delhi,110007,10,2,0,9


In [283]:
demo_df.head()

Unnamed: 0,date,state,district,pincode,demo_age_5_17,demo_age_17_,month
0,2025-03-01,Delhi,north west delhi,110088,78,1319,3
1,2025-03-01,Delhi,east delhi,110053,22,182,3
2,2025-03-01,Delhi,south west delhi,110072,45,497,3
3,2025-03-01,Delhi,south west delhi,110038,60,417,3
4,2025-03-01,Delhi,north delhi,110006,196,2469,3


In [284]:
bio_df.head()

Unnamed: 0,date,state,district,pincode,bio_age_5_17,bio_age_17_,month
0,2026-01-02,Delhi,central delhi,110008,21,24,1
1,2026-01-02,Delhi,east delhi,110051,24,35,1
2,2026-01-02,Delhi,north delhi,110054,11,20,1
3,2026-01-02,Delhi,north west delhi,110033,34,78,1
4,2026-01-02,Delhi,north west delhi,110042,69,111,1


In [285]:
enrol_agg = enrol_df.groupby(["state", "district", "month"])[["age_0_5", "age_5_17", "age_18_greater"]].sum().reset_index()
demo_agg = demo_df.groupby(["state", "district", "month"])[["demo_age_5_17", "demo_age_17_"]].sum().reset_index()
bio_agg = bio_df.groupby(["state", "district", "month"])[["bio_age_5_17", "bio_age_17_"]].sum().reset_index()

combined_df = enrol_agg.merge(demo_agg, on = ["state", "district", "month"], how = "left").merge(bio_agg, on = ["state", "district", "month"], how = "left")
combined_df.fillna(0, inplace=True)
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_
0,Delhi,central delhi,1,79,27,3,26.0,566.0,533,732
1,Delhi,central delhi,4,80,41,12,932.0,8203.0,5628,5728
2,Delhi,central delhi,5,208,105,20,681.0,6032.0,5276,8060
3,Delhi,central delhi,6,325,156,24,262.0,2850.0,2423,4540
4,Delhi,central delhi,7,773,328,51,473.0,3346.0,2125,4285


In [286]:
combined_df["E"] = combined_df["age_0_5"] + combined_df["age_5_17"] + combined_df["age_18_greater"]
combined_df["DU"] = combined_df["demo_age_5_17"] + combined_df["demo_age_17_"]
combined_df["BU"] = combined_df["bio_age_5_17"] + combined_df["bio_age_17_"]
combined_df["U"] = combined_df["DU"] + combined_df["BU"]
combined_df["T"] = combined_df["E"] + combined_df["U"]
combined_df.head(20)

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T
0,Delhi,central delhi,1,79,27,3,26.0,566.0,533,732,109,592.0,1265,1857.0,1966.0
1,Delhi,central delhi,4,80,41,12,932.0,8203.0,5628,5728,133,9135.0,11356,20491.0,20624.0
2,Delhi,central delhi,5,208,105,20,681.0,6032.0,5276,8060,333,6713.0,13336,20049.0,20382.0
3,Delhi,central delhi,6,325,156,24,262.0,2850.0,2423,4540,505,3112.0,6963,10075.0,10580.0
4,Delhi,central delhi,7,773,328,51,473.0,3346.0,2125,4285,1152,3819.0,6410,10229.0,11381.0
5,Delhi,central delhi,9,494,195,36,509.0,3109.0,2571,3836,725,3618.0,6407,10025.0,10750.0
6,Delhi,central delhi,10,295,80,18,164.0,1995.0,1673,2768,393,2159.0,4441,6600.0,6993.0
7,Delhi,central delhi,11,442,128,35,366.0,4404.0,1779,2534,605,4770.0,4313,9083.0,9688.0
8,Delhi,central delhi,12,356,67,8,515.0,5768.0,2468,2699,431,6283.0,5167,11450.0,11881.0
9,Delhi,east delhi,1,257,95,4,121.0,1300.0,1144,1218,356,1421.0,2362,3783.0,4139.0


In [287]:
district_monthly_counts = combined_df.groupby(["district", "month"]).agg(total_months = ("month", "count"), active_months = ("T", lambda x : (x > 0).sum())).reset_index()
district_monthly_counts.head()

Unnamed: 0,district,month,total_months,active_months
0,central delhi,1,1,1
1,central delhi,4,1,1
2,central delhi,5,1,1
3,central delhi,6,1,1
4,central delhi,7,1,1


In [288]:
district_monthly_counts["zero_months"] = district_monthly_counts["total_months"] - district_monthly_counts["active_months"]
district_monthly_counts["activity_ratio"] = district_monthly_counts["active_months"] / district_monthly_counts["total_months"]
district_monthly_counts["zero_month_ratio"] = district_monthly_counts["zero_months"] / district_monthly_counts["total_months"]

combined_df = combined_df.merge(district_monthly_counts[["district", "month", "activity_ratio", "zero_month_ratio"]], on = ["district", "month"], how = "left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio
0,Delhi,central delhi,1,79,27,3,26.0,566.0,533,732,109,592.0,1265,1857.0,1966.0,1.0,0.0
1,Delhi,central delhi,4,80,41,12,932.0,8203.0,5628,5728,133,9135.0,11356,20491.0,20624.0,1.0,0.0
2,Delhi,central delhi,5,208,105,20,681.0,6032.0,5276,8060,333,6713.0,13336,20049.0,20382.0,1.0,0.0
3,Delhi,central delhi,6,325,156,24,262.0,2850.0,2423,4540,505,3112.0,6963,10075.0,10580.0,1.0,0.0
4,Delhi,central delhi,7,773,328,51,473.0,3346.0,2125,4285,1152,3819.0,6410,10229.0,11381.0,1.0,0.0


In [289]:
combined_df = combined_df.merge(district_monthly_counts[["district", "month", "activity_ratio", "zero_month_ratio"]], on = ["district", "month"], how = "left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y
0,Delhi,central delhi,1,79,27,3,26.0,566.0,533,732,109,592.0,1265,1857.0,1966.0,1.0,0.0,1.0,0.0
1,Delhi,central delhi,4,80,41,12,932.0,8203.0,5628,5728,133,9135.0,11356,20491.0,20624.0,1.0,0.0,1.0,0.0
2,Delhi,central delhi,5,208,105,20,681.0,6032.0,5276,8060,333,6713.0,13336,20049.0,20382.0,1.0,0.0,1.0,0.0
3,Delhi,central delhi,6,325,156,24,262.0,2850.0,2423,4540,505,3112.0,6963,10075.0,10580.0,1.0,0.0,1.0,0.0
4,Delhi,central delhi,7,773,328,51,473.0,3346.0,2125,4285,1152,3819.0,6410,10229.0,11381.0,1.0,0.0,1.0,0.0


In [290]:
district_volume_metrics = combined_df.groupby(["state", "district"]).agg(avg_monthly_enrolment = ("E", "mean"),
                                               monthly_valatility = ("T", lambda x: x.std(ddof=0) / x.mean() if x.mean() > 0 else 0),
                                               peak_load_ratio = ("T", lambda x: x.max() / x.mean() if x.mean() > 0 else 0)).reset_index()


combined_df = combined_df.merge(district_volume_metrics, on=["state", "district"], how="left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y,avg_monthly_enrolment,monthly_valatility,peak_load_ratio
0,Delhi,central delhi,1,79,27,3,26.0,566.0,533,732,109,592.0,1265,1857.0,1966.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575
1,Delhi,central delhi,4,80,41,12,932.0,8203.0,5628,5728,133,9135.0,11356,20491.0,20624.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575
2,Delhi,central delhi,5,208,105,20,681.0,6032.0,5276,8060,333,6713.0,13336,20049.0,20382.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575
3,Delhi,central delhi,6,325,156,24,262.0,2850.0,2423,4540,505,3112.0,6963,10075.0,10580.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575
4,Delhi,central delhi,7,773,328,51,473.0,3346.0,2125,4285,1152,3819.0,6410,10229.0,11381.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575


In [291]:
district_update_burden = combined_df.groupby(["state", "district"]).agg(avg_monthly_enrollments = ("E", "sum"), avg_monthly_demo_updates = ("DU", "sum"), avg_monthly_bio_updates = ("BU", "sum")).reset_index()

district_update_burden["U"] = district_update_burden["avg_monthly_demo_updates"] + district_update_burden["avg_monthly_bio_updates"]
district_update_burden["biometric_burden"] = district_update_burden["avg_monthly_bio_updates"] / (district_update_burden["avg_monthly_bio_updates"] + district_update_burden["avg_monthly_demo_updates"])
district_update_burden["update_dominant"] = np.where(district_update_burden["U"] > district_update_burden["avg_monthly_enrollments"], 1, 0)
district_update_burden["enrollment_update_balance"] = district_update_burden["avg_monthly_enrollments"] / (district_update_burden["avg_monthly_enrollments"] + district_update_burden["U"])

combined_df = combined_df.merge(district_update_burden[["state", "district", "biometric_burden", "update_dominant", "enrollment_update_balance"]], on=["state", "district"], how="left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance
0,Delhi,central delhi,1,79,27,3,26.0,566.0,533,732,109,592.0,1265,1857.0,1966.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575,0.597422,1,0.042074
1,Delhi,central delhi,4,80,41,12,932.0,8203.0,5628,5728,133,9135.0,11356,20491.0,20624.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575,0.597422,1,0.042074
2,Delhi,central delhi,5,208,105,20,681.0,6032.0,5276,8060,333,6713.0,13336,20049.0,20382.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575,0.597422,1,0.042074
3,Delhi,central delhi,6,325,156,24,262.0,2850.0,2423,4540,505,3112.0,6963,10075.0,10580.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575,0.597422,1,0.042074
4,Delhi,central delhi,7,773,328,51,473.0,3346.0,2125,4285,1152,3819.0,6410,10229.0,11381.0,1.0,0.0,1.0,0.0,487.333333,0.47995,1.780575,0.597422,1,0.042074


In [292]:
combined_df.drop(["activity_ratio_y", "zero_month_ratio_y"], axis=1, inplace=True)
combined_df.rename(columns={"activity_ratio_x": "activity_ratio", "zero_month_ratio_x": "zero_month_ratio"}, inplace=True)

In [293]:
combined_df = combined_df.groupby(["state", "district"], as_index = False).first()
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance
0,Delhi,central delhi,1,79,27,3,26.0,566.0,533,732,109,592.0,1265,1857.0,1966.0,1.0,0.0,487.333333,0.47995,1.780575,0.597422,1,0.042074
1,Delhi,east delhi,1,257,95,4,121.0,1300.0,1144,1218,356,1421.0,2362,3783.0,4139.0,1.0,0.0,1334.888889,0.424628,1.690609,0.509042,1,0.04631
2,Delhi,najafgarh,1,0,3,0,0.0,6.0,0,10,3,6.0,10,16.0,19.0,1.0,0.0,6.166667,0.455196,1.47644,0.69326,1,0.048429
3,Delhi,new delhi,1,6,1,0,18.0,34.0,55,39,7,52.0,94,146.0,153.0,1.0,0.0,240.142857,0.423448,1.564781,0.600401,1,0.219108
4,Delhi,north delhi,1,164,52,1,26.0,193.0,140,172,217,219.0,312,531.0,748.0,1.0,0.0,719.428571,0.446617,1.430921,0.529053,1,0.062664


In [294]:
def normalize(x):
    maxx, minx = x.max(), x.min()
    if maxx == minx:
        return x * 0 + 0.5
    normalized = (x - minx) / (maxx - minx)
    return normalized

def inverse_normalize(x):
    inversed = 1 - normalize(x)
    return inversed

In [295]:
combined_df["access"] = (combined_df["activity_ratio"] + normalize(combined_df["avg_monthly_enrolment"])) / 2
combined_df["responsiveness"] = normalize(combined_df["U"] / (combined_df["E"] + combined_df["U"]))
combined_df["inclusion"] = normalize((combined_df["age_0_5"] + combined_df["age_5_17"]) / combined_df["E"])
combined_df["stability"] = (inverse_normalize(combined_df["monthly_valatility"]) + inverse_normalize(combined_df["peak_load_ratio"])) / 2
combined_df["visibility"] = combined_df["activity_ratio"]

combined_df["DEI"] = (combined_df["access"] + combined_df["responsiveness"] + combined_df["inclusion"] + combined_df["stability"] + combined_df["visibility"]) / 5
combined_df["ASS"] = (inverse_normalize(combined_df["activity_ratio"]) + inverse_normalize(combined_df["avg_monthly_enrolment"])) / 2
combined_df["UBS"] = (normalize(combined_df["biometric_burden"]) + normalize(combined_df["update_dominant"])) / 2
combined_df["SRS"] = (normalize(combined_df["monthly_valatility"]) + normalize(combined_df["zero_month_ratio"])) / 2

combined_df.head()


Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance,access,responsiveness,inclusion,stability,visibility,DEI,ASS,UBS,SRS
0,Delhi,central delhi,1,79,27,3,26.0,566.0,533,732,109,592.0,1265,1857.0,1966.0,1.0,0.0,487.333333,0.47995,1.780575,0.597422,1,0.042074,0.609514,0.960341,0.688073,0.48026,1.0,0.747638,0.640486,0.642942,0.495392
1,Delhi,east delhi,1,257,95,4,121.0,1300.0,1144,1218,356,1421.0,2362,3783.0,4139.0,1.0,0.0,1334.888889,0.424628,1.690609,0.509042,1,0.04631,0.802418,0.835242,0.872659,0.725095,1.0,0.847083,0.447582,0.544214,0.308513
2,Delhi,najafgarh,1,0,3,0,0.0,6.0,0,10,3,6.0,10,16.0,19.0,1.0,0.0,6.166667,0.455196,1.47644,0.69326,1,0.048429,0.5,0.541065,1.0,0.759802,1.0,0.760173,0.75,0.75,0.411774
3,Delhi,new delhi,1,6,1,0,18.0,34.0,55,39,7,52.0,94,146.0,153.0,1.0,0.0,240.142857,0.423448,1.564781,0.600401,1,0.219108,0.553253,1.0,1.0,0.81014,1.0,0.872679,0.696747,0.646269,0.304527
4,Delhi,north delhi,1,164,52,1,26.0,193.0,140,172,217,219.0,312,531.0,748.0,1.0,0.0,719.428571,0.446617,1.430921,0.529053,1,0.062664,0.662339,0.0,0.947773,0.818105,1.0,0.685643,0.587661,0.566568,0.382794


In [296]:
combined_df.drop(["access", "responsiveness", "inclusion", "stability", "visibility"], axis=1, inplace=True)
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance,DEI,ASS,UBS,SRS
0,Delhi,central delhi,1,79,27,3,26.0,566.0,533,732,109,592.0,1265,1857.0,1966.0,1.0,0.0,487.333333,0.47995,1.780575,0.597422,1,0.042074,0.747638,0.640486,0.642942,0.495392
1,Delhi,east delhi,1,257,95,4,121.0,1300.0,1144,1218,356,1421.0,2362,3783.0,4139.0,1.0,0.0,1334.888889,0.424628,1.690609,0.509042,1,0.04631,0.847083,0.447582,0.544214,0.308513
2,Delhi,najafgarh,1,0,3,0,0.0,6.0,0,10,3,6.0,10,16.0,19.0,1.0,0.0,6.166667,0.455196,1.47644,0.69326,1,0.048429,0.760173,0.75,0.75,0.411774
3,Delhi,new delhi,1,6,1,0,18.0,34.0,55,39,7,52.0,94,146.0,153.0,1.0,0.0,240.142857,0.423448,1.564781,0.600401,1,0.219108,0.872679,0.696747,0.646269,0.304527
4,Delhi,north delhi,1,164,52,1,26.0,193.0,140,172,217,219.0,312,531.0,748.0,1.0,0.0,719.428571,0.446617,1.430921,0.529053,1,0.062664,0.685643,0.587661,0.566568,0.382794


In [None]:
combined_df.to_csv(r"D:\UIDAI hackathon\North\delhi\delhi_district_analysis.csv", index=False)
final_df = combined_df[["state", "district", "DEI", "ASS", "UBS", "SRS"]]
final_df.to_csv(r"D:\UIDAI hackathon\North\delhi\delhi_district_final_scores.csv", index=False)