In [None]:
import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt

In [20]:
pd.set_option("display.max_rows", 2000)
pd.set_option("display.max_columns", None)

In [None]:
enrol_df = pd.read_csv(r"D:\UIDAI hackathon\North\UK\enrollment.csv")
demo_df = pd.read_csv(r"D:\UIDAI hackathon\North\UK\demographic.csv")
bio_df = pd.read_csv(r"D:\UIDAI hackathon\North\UK\biometric.csv")

In [22]:
all_dfs = [enrol_df, demo_df, bio_df]

In [23]:
enrol_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater
0,15-03-2025,Uttarakhand,Haridwar,247667,17,15,28
1,03-09-2025,Uttarakhand,Chamoli,246427,2,0,0
2,03-09-2025,Uttarakhand,Dehradun,248008,5,0,0
3,03-09-2025,Uttarakhand,Dehradun,248009,2,0,0
4,03-09-2025,Uttarakhand,Dehradun,248121,1,0,0


In [24]:
enrol_df["date"] = pd.to_datetime(enrol_df["date"], dayfirst=True)
demo_df["date"] = pd.to_datetime(demo_df["date"], dayfirst=True)
bio_df["date"] = pd.to_datetime(bio_df["date"], dayfirst=True)

In [25]:
print(enrol_df["district"].nunique(), demo_df["district"].nunique(), bio_df["district"].nunique())

15 15 15


In [26]:
enrol_df["district"].value_counts().sort_index()

district
Almora                965
Bageshwar             492
Chamoli               777
Champawat             388
Dehradun             1801
Garhwal                51
Hardwar               138
Haridwar             1283
Nainital              895
Pauri Garhwal        1180
Pithoragarh           827
Rudraprayag           542
Tehri Garhwal        1113
Udham Singh Nagar     938
Uttarkashi            512
Name: count, dtype: int64

In [27]:
bio_df["district"].value_counts().sort_index()

district
Almora               6648
Bageshwar            2821
Chamoli              5106
Champawat            1883
Dehradun             8281
Garhwal               651
Hardwar              1937
Haridwar             4444
Nainital             4459
Pauri Garhwal        9166
Pithoragarh          5136
Rudraprayag          2654
Tehri Garhwal        5276
Udham Singh Nagar    3563
Uttarkashi           2302
Name: count, dtype: int64

In [28]:
demo_df["district"].value_counts().sort_index()

district
Almora               2839
Bageshwar            1171
Chamoli              2049
Champawat             814
Dehradun             3504
Garhwal               215
Hardwar               737
Haridwar             2044
Nainital             1894
Pauri Garhwal        3489
Pithoragarh          2208
Rudraprayag          1095
Tehri Garhwal        2136
Udham Singh Nagar    1504
Uttarkashi            950
Name: count, dtype: int64

In [29]:
cleanup_map = {
    "Hardwar": "Haridwar",
    "Garhwal": "Pauri Garhwal"
}

for df in all_dfs:
    df["district"] = df["district"].replace(cleanup_map)
    
print(enrol_df["district"].nunique(), demo_df["district"].nunique(), bio_df["district"].nunique())

13 13 13


In [30]:
enrol_df["district"] = enrol_df["district"].str.lower()
demo_df["district"] = demo_df["district"].str.lower()
bio_df["district"] = bio_df["district"].str.lower()

In [31]:
print(enrol_df["pincode"].nunique(), demo_df["pincode"].nunique(), bio_df["pincode"].nunique())

292 296 300


In [32]:
# Analyze pincode differences
enrol_pincodes = set(enrol_df["pincode"].dropna())
demo_pincodes = set(demo_df["pincode"].dropna())
bio_pincodes = set(bio_df["pincode"].dropna())

print("Enrollment pincodes:", len(enrol_pincodes))
print("Demographic pincodes:", len(demo_pincodes))
print("Biometric pincodes:", len(bio_pincodes))
print()

# Find differences
only_in_enrol = enrol_pincodes - demo_pincodes - bio_pincodes
only_in_demo = demo_pincodes - enrol_pincodes - bio_pincodes
only_in_bio = bio_pincodes - enrol_pincodes - demo_pincodes
in_all = enrol_pincodes & demo_pincodes & bio_pincodes

print(f"Pincodes only in enrollment: {len(only_in_enrol)}")
print(f"Pincodes only in demographic: {len(only_in_demo)}")
print(f"Pincodes only in biometric: {len(only_in_bio)}")
print(f"Pincodes in all three: {len(in_all)}")
print()

# Check for NULL/NaN values
print("NULL pincodes:")
print(f"Enrollment: {enrol_df["pincode"].isna().sum()}")
print(f"Demographic: {demo_df["pincode"].isna().sum()}")
print(f"Biometric: {bio_df["pincode"].isna().sum()}")
print()

# Show some examples of unique pincodes
print("Sample pincodes only in enrollment:", list(only_in_enrol))
print("Sample pincodes only in demographic:", list(only_in_demo))
print("Sample pincodes only in biometric:", list(only_in_bio))

Enrollment pincodes: 292
Demographic pincodes: 296
Biometric pincodes: 300

Pincodes only in enrollment: 0
Pincodes only in demographic: 0
Pincodes only in biometric: 4
Pincodes in all three: 292

NULL pincodes:
Enrollment: 0
Demographic: 0
Biometric: 0

Sample pincodes only in enrollment: []
Sample pincodes only in demographic: []
Sample pincodes only in biometric: [263144, 251001, 246445, 263686]


In [33]:
for df in all_dfs:
    df["month"] = df["date"].dt.month

In [34]:
enrol_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11902 entries, 0 to 11901
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            11902 non-null  datetime64[ns]
 1   state           11902 non-null  object        
 2   district        11902 non-null  object        
 3   pincode         11902 non-null  int64         
 4   age_0_5         11902 non-null  int64         
 5   age_5_17        11902 non-null  int64         
 6   age_18_greater  11902 non-null  int64         
 7   month           11902 non-null  int32         
dtypes: datetime64[ns](1), int32(1), int64(4), object(2)
memory usage: 697.5+ KB


In [35]:
demo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26649 entries, 0 to 26648
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           26649 non-null  datetime64[ns]
 1   state          26649 non-null  object        
 2   district       26649 non-null  object        
 3   pincode        26649 non-null  int64         
 4   demo_age_5_17  26649 non-null  int64         
 5   demo_age_17_   26649 non-null  int64         
 6   month          26649 non-null  int32         
dtypes: datetime64[ns](1), int32(1), int64(3), object(2)
memory usage: 1.3+ MB


In [36]:
bio_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64327 entries, 0 to 64326
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          64327 non-null  datetime64[ns]
 1   state         64327 non-null  object        
 2   district      64327 non-null  object        
 3   pincode       64327 non-null  int64         
 4   bio_age_5_17  64327 non-null  int64         
 5   bio_age_17_   64327 non-null  int64         
 6   month         64327 non-null  int32         
dtypes: datetime64[ns](1), int32(1), int64(3), object(2)
memory usage: 3.2+ MB


In [37]:
enrol_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater,month
0,2025-03-15,Uttarakhand,haridwar,247667,17,15,28,3
1,2025-09-03,Uttarakhand,chamoli,246427,2,0,0,9
2,2025-09-03,Uttarakhand,dehradun,248008,5,0,0,9
3,2025-09-03,Uttarakhand,dehradun,248009,2,0,0,9
4,2025-09-03,Uttarakhand,dehradun,248121,1,0,0,9


In [38]:
demo_df.head()

Unnamed: 0,date,state,district,pincode,demo_age_5_17,demo_age_17_,month
0,2025-03-01,Uttarakhand,haridwar,249403,55,402,3
1,2025-03-01,Uttarakhand,udham singh nagar,244716,11,60,3
2,2025-03-01,Uttarakhand,udham singh nagar,263160,80,586,3
3,2025-03-01,Uttarakhand,dehradun,248171,71,578,3
4,2025-03-01,Uttarakhand,haridwar,247665,31,502,3


In [39]:
bio_df.head()

Unnamed: 0,date,state,district,pincode,bio_age_5_17,bio_age_17_,month
0,2025-03-01,Uttarakhand,chamoli,246444,43,193,3
1,2025-03-01,Uttarakhand,dehradun,248145,37,75,3
2,2025-03-01,Uttarakhand,champawat,262580,89,165,3
3,2025-03-01,Uttarakhand,nainital,263134,26,53,3
4,2025-03-01,Uttarakhand,bageshwar,263639,26,57,3


In [40]:
enrol_agg = enrol_df.groupby(["state", "district", "month"])[["age_0_5", "age_5_17", "age_18_greater"]].sum().reset_index()
demo_agg = demo_df.groupby(["state", "district", "month"])[["demo_age_5_17", "demo_age_17_"]].sum().reset_index()
bio_agg = bio_df.groupby(["state", "district", "month"])[["bio_age_5_17", "bio_age_17_"]].sum().reset_index()

combined_df = enrol_agg.merge(demo_agg, on = ["state", "district", "month"], how = "left").merge(bio_agg, on = ["state", "district", "month"], how = "left")
combined_df.fillna(0, inplace=True)
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_
0,Uttarakhand,almora,1,72,9,0,22.0,202.0,584,460
1,Uttarakhand,almora,6,191,33,0,0.0,0.0,2083,2770
2,Uttarakhand,almora,9,355,45,3,393.0,2448.0,2511,2740
3,Uttarakhand,almora,10,251,29,2,185.0,1670.0,1614,1859
4,Uttarakhand,almora,11,381,34,3,337.0,3221.0,4093,3363


In [41]:
combined_df["E"] = combined_df["age_0_5"] + combined_df["age_5_17"] + combined_df["age_18_greater"]
combined_df["DU"] = combined_df["demo_age_5_17"] + combined_df["demo_age_17_"]
combined_df["BU"] = combined_df["bio_age_5_17"] + combined_df["bio_age_17_"]
combined_df["U"] = combined_df["DU"] + combined_df["BU"]
combined_df["T"] = combined_df["E"] + combined_df["U"]
combined_df.head(20)

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T
0,Uttarakhand,almora,1,72,9,0,22.0,202.0,584,460,81,224.0,1044,1268.0,1349.0
1,Uttarakhand,almora,6,191,33,0,0.0,0.0,2083,2770,224,0.0,4853,4853.0,5077.0
2,Uttarakhand,almora,9,355,45,3,393.0,2448.0,2511,2740,403,2841.0,5251,8092.0,8495.0
3,Uttarakhand,almora,10,251,29,2,185.0,1670.0,1614,1859,282,1855.0,3473,5328.0,5610.0
4,Uttarakhand,almora,11,381,34,3,337.0,3221.0,4093,3363,418,3558.0,7456,11014.0,11432.0
5,Uttarakhand,almora,12,259,20,0,410.0,4107.0,5607,4470,279,4517.0,10077,14594.0,14873.0
6,Uttarakhand,bageshwar,1,39,2,0,8.0,129.0,172,237,41,137.0,409,546.0,587.0
7,Uttarakhand,bageshwar,6,76,15,0,0.0,0.0,732,2004,91,0.0,2736,2736.0,2827.0
8,Uttarakhand,bageshwar,9,219,22,0,239.0,892.0,1167,1546,241,1131.0,2713,3844.0,4085.0
9,Uttarakhand,bageshwar,10,133,13,0,84.0,463.0,750,1095,146,547.0,1845,2392.0,2538.0


In [42]:
district_monthly_counts = combined_df.groupby(["district", "month"]).agg(total_months = ("month", "count"), active_months = ("T", lambda x : (x > 0).sum())).reset_index()
district_monthly_counts.head()

Unnamed: 0,district,month,total_months,active_months
0,almora,1,1,1
1,almora,6,1,1
2,almora,9,1,1
3,almora,10,1,1
4,almora,11,1,1


In [43]:
district_monthly_counts["zero_months"] = district_monthly_counts["total_months"] - district_monthly_counts["active_months"]
district_monthly_counts["activity_ratio"] = district_monthly_counts["active_months"] / district_monthly_counts["total_months"]
district_monthly_counts["zero_month_ratio"] = district_monthly_counts["zero_months"] / district_monthly_counts["total_months"]

combined_df = combined_df.merge(district_monthly_counts[["district", "month", "activity_ratio", "zero_month_ratio"]], on = ["district", "month"], how = "left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio
0,Uttarakhand,almora,1,72,9,0,22.0,202.0,584,460,81,224.0,1044,1268.0,1349.0,1.0,0.0
1,Uttarakhand,almora,6,191,33,0,0.0,0.0,2083,2770,224,0.0,4853,4853.0,5077.0,1.0,0.0
2,Uttarakhand,almora,9,355,45,3,393.0,2448.0,2511,2740,403,2841.0,5251,8092.0,8495.0,1.0,0.0
3,Uttarakhand,almora,10,251,29,2,185.0,1670.0,1614,1859,282,1855.0,3473,5328.0,5610.0,1.0,0.0
4,Uttarakhand,almora,11,381,34,3,337.0,3221.0,4093,3363,418,3558.0,7456,11014.0,11432.0,1.0,0.0


In [44]:
combined_df = combined_df.merge(district_monthly_counts[["district", "month", "activity_ratio", "zero_month_ratio"]], on = ["district", "month"], how = "left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y
0,Uttarakhand,almora,1,72,9,0,22.0,202.0,584,460,81,224.0,1044,1268.0,1349.0,1.0,0.0,1.0,0.0
1,Uttarakhand,almora,6,191,33,0,0.0,0.0,2083,2770,224,0.0,4853,4853.0,5077.0,1.0,0.0,1.0,0.0
2,Uttarakhand,almora,9,355,45,3,393.0,2448.0,2511,2740,403,2841.0,5251,8092.0,8495.0,1.0,0.0,1.0,0.0
3,Uttarakhand,almora,10,251,29,2,185.0,1670.0,1614,1859,282,1855.0,3473,5328.0,5610.0,1.0,0.0,1.0,0.0
4,Uttarakhand,almora,11,381,34,3,337.0,3221.0,4093,3363,418,3558.0,7456,11014.0,11432.0,1.0,0.0,1.0,0.0


In [45]:
district_volume_metrics = combined_df.groupby(["state", "district"]).agg(avg_monthly_enrolment = ("E", "mean"),
                                               monthly_valatility = ("T", lambda x: x.std(ddof=0) / x.mean() if x.mean() > 0 else 0),
                                               peak_load_ratio = ("T", lambda x: x.max() / x.mean() if x.mean() > 0 else 0)).reset_index()


combined_df = combined_df.merge(district_volume_metrics, on=["state", "district"], how="left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y,avg_monthly_enrolment,monthly_valatility,peak_load_ratio
0,Uttarakhand,almora,1,72,9,0,22.0,202.0,584,460,81,224.0,1044,1268.0,1349.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329
1,Uttarakhand,almora,6,191,33,0,0.0,0.0,2083,2770,224,0.0,4853,4853.0,5077.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329
2,Uttarakhand,almora,9,355,45,3,393.0,2448.0,2511,2740,403,2841.0,5251,8092.0,8495.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329
3,Uttarakhand,almora,10,251,29,2,185.0,1670.0,1614,1859,282,1855.0,3473,5328.0,5610.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329
4,Uttarakhand,almora,11,381,34,3,337.0,3221.0,4093,3363,418,3558.0,7456,11014.0,11432.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329


In [46]:
district_update_burden = combined_df.groupby(["state", "district"]).agg(avg_monthly_enrollments = ("E", "sum"), avg_monthly_demo_updates = ("DU", "sum"), avg_monthly_bio_updates = ("BU", "sum")).reset_index()

district_update_burden["U"] = district_update_burden["avg_monthly_demo_updates"] + district_update_burden["avg_monthly_bio_updates"]
district_update_burden["biometric_burden"] = district_update_burden["avg_monthly_bio_updates"] / (district_update_burden["avg_monthly_bio_updates"] + district_update_burden["avg_monthly_demo_updates"])
district_update_burden["update_dominant"] = np.where(district_update_burden["U"] > district_update_burden["avg_monthly_enrollments"], 1, 0)
district_update_burden["enrollment_update_balance"] = district_update_burden["avg_monthly_enrollments"] / (district_update_burden["avg_monthly_enrollments"] + district_update_burden["U"])

combined_df = combined_df.merge(district_update_burden[["state", "district", "biometric_burden", "update_dominant", "enrollment_update_balance"]], on=["state", "district"], how="left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance
0,Uttarakhand,almora,1,72,9,0,22.0,202.0,584,460,81,224.0,1044,1268.0,1349.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329,0.712175,1,0.036019
1,Uttarakhand,almora,6,191,33,0,0.0,0.0,2083,2770,224,0.0,4853,4853.0,5077.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329,0.712175,1,0.036019
2,Uttarakhand,almora,9,355,45,3,393.0,2448.0,2511,2740,403,2841.0,5251,8092.0,8495.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329,0.712175,1,0.036019
3,Uttarakhand,almora,10,251,29,2,185.0,1670.0,1614,1859,282,1855.0,3473,5328.0,5610.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329,0.712175,1,0.036019
4,Uttarakhand,almora,11,381,34,3,337.0,3221.0,4093,3363,418,3558.0,7456,11014.0,11432.0,1.0,0.0,1.0,0.0,281.166667,0.566978,1.905329,0.712175,1,0.036019


In [47]:
combined_df.drop(["activity_ratio_y", "zero_month_ratio_y"], axis=1, inplace=True)
combined_df.rename(columns={"activity_ratio_x": "activity_ratio", "zero_month_ratio_x": "zero_month_ratio"}, inplace=True)

In [48]:
combined_df = combined_df.groupby(["state", "district"], as_index = False).first()
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance
0,Uttarakhand,almora,1,72,9,0,22.0,202.0,584,460,81,224.0,1044,1268.0,1349.0,1.0,0.0,281.166667,0.566978,1.905329,0.712175,1,0.036019
1,Uttarakhand,bageshwar,1,39,2,0,8.0,129.0,172,237,41,137.0,409,546.0,587.0,1.0,0.0,146.333333,0.570887,1.868827,0.758957,1,0.037835
2,Uttarakhand,chamoli,1,51,2,0,12.0,159.0,441,395,53,171.0,836,1007.0,1060.0,1.0,0.0,225.833333,0.550499,1.960495,0.751169,1,0.039769
3,Uttarakhand,champawat,1,37,3,1,19.0,131.0,360,261,41,150.0,621,771.0,812.0,1.0,0.0,175.166667,0.550855,1.723925,0.738154,1,0.032383
4,Uttarakhand,dehradun,1,252,49,0,107.0,844.0,2510,1688,301,951.0,4198,5149.0,5450.0,1.0,0.0,1054.2,0.40974,1.622943,0.626248,1,0.03751


In [49]:
def normalize(x):
    maxx, minx = x.max(), x.min()
    if maxx == minx:
        return x * 0 + 0.5
    normalized = (x - minx) / (maxx - minx)
    return normalized

def inverse_normalize(x):
    inversed = 1 - normalize(x)
    return inversed

In [50]:
combined_df["access"] = (combined_df["activity_ratio"] + normalize(combined_df["avg_monthly_enrolment"])) / 2
combined_df["responsiveness"] = normalize(combined_df["U"] / (combined_df["E"] + combined_df["U"]))
combined_df["inclusion"] = normalize((combined_df["age_0_5"] + combined_df["age_5_17"]) / combined_df["E"])
combined_df["stability"] = (inverse_normalize(combined_df["monthly_valatility"]) + inverse_normalize(combined_df["peak_load_ratio"])) / 2
combined_df["visibility"] = combined_df["activity_ratio"]

combined_df["DEI"] = (combined_df["access"] + combined_df["responsiveness"] + combined_df["inclusion"] + combined_df["stability"] + combined_df["visibility"]) / 5
combined_df["ASS"] = (inverse_normalize(combined_df["activity_ratio"]) + inverse_normalize(combined_df["avg_monthly_enrolment"])) / 2
combined_df["UBS"] = (normalize(combined_df["biometric_burden"]) + normalize(combined_df["update_dominant"])) / 2
combined_df["SRS"] = (normalize(combined_df["monthly_valatility"]) + normalize(combined_df["zero_month_ratio"])) / 2

combined_df.head()


Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance,access,responsiveness,inclusion,stability,visibility,DEI,ASS,UBS,SRS
0,Uttarakhand,almora,1,72,9,0,22.0,202.0,584,460,81,224.0,1044,1268.0,1349.0,1.0,0.0,281.166667,0.566978,1.905329,0.712175,1,0.036019,0.565337,0.773646,1.0,0.64642,1.0,0.797081,0.684663,0.480093,0.46908
1,Uttarakhand,bageshwar,1,39,2,0,8.0,129.0,172,237,41,137.0,409,546.0,587.0,1.0,0.0,146.333333,0.570887,1.868827,0.758957,1,0.037835,0.5,0.629198,1.0,0.65836,1.0,0.757512,0.75,0.605365,0.474526
2,Uttarakhand,chamoli,1,51,2,0,12.0,159.0,441,395,53,171.0,836,1007.0,1060.0,1.0,0.0,225.833333,0.550499,1.960495,0.751169,1,0.039769,0.538524,0.921664,1.0,0.643104,1.0,0.820658,0.711476,0.58451,0.44612
3,Uttarakhand,champawat,1,37,3,1,19.0,131.0,360,261,41,150.0,621,771.0,812.0,1.0,0.0,175.166667,0.550855,1.723925,0.738154,1,0.032383,0.513972,0.914405,0.0,0.755287,1.0,0.636733,0.736028,0.54966,0.446615
4,Uttarakhand,dehradun,1,252,49,0,107.0,844.0,2510,1688,301,951.0,4198,5149.0,5450.0,1.0,0.0,1054.2,0.40974,1.622943,0.626248,1,0.03751,0.939929,0.844603,1.0,1.0,1.0,0.956906,0.310071,0.25,0.25


In [51]:
combined_df.drop(["access", "responsiveness", "inclusion", "stability", "visibility"], axis=1, inplace=True)
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance,DEI,ASS,UBS,SRS
0,Uttarakhand,almora,1,72,9,0,22.0,202.0,584,460,81,224.0,1044,1268.0,1349.0,1.0,0.0,281.166667,0.566978,1.905329,0.712175,1,0.036019,0.797081,0.684663,0.480093,0.46908
1,Uttarakhand,bageshwar,1,39,2,0,8.0,129.0,172,237,41,137.0,409,546.0,587.0,1.0,0.0,146.333333,0.570887,1.868827,0.758957,1,0.037835,0.757512,0.75,0.605365,0.474526
2,Uttarakhand,chamoli,1,51,2,0,12.0,159.0,441,395,53,171.0,836,1007.0,1060.0,1.0,0.0,225.833333,0.550499,1.960495,0.751169,1,0.039769,0.820658,0.711476,0.58451,0.44612
3,Uttarakhand,champawat,1,37,3,1,19.0,131.0,360,261,41,150.0,621,771.0,812.0,1.0,0.0,175.166667,0.550855,1.723925,0.738154,1,0.032383,0.636733,0.736028,0.54966,0.446615
4,Uttarakhand,dehradun,1,252,49,0,107.0,844.0,2510,1688,301,951.0,4198,5149.0,5450.0,1.0,0.0,1054.2,0.40974,1.622943,0.626248,1,0.03751,0.956906,0.310071,0.25,0.25


In [None]:
combined_df.to_csv(r"D:\UIDAI hackathon\North\UK\uk_district_analysis.csv", index=False)
final_df = combined_df[["state", "district", "DEI", "ASS", "UBS", "SRS"]]
final_df.to_csv(r"D:\UIDAI hackathon\North\UK\uk_district_final_scores.csv", index=False)