In [None]:
import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt

In [13]:
pd.set_option("display.max_rows", 2000)
pd.set_option("display.max_columns", None)

In [None]:
enrol_df = pd.read_csv(r"D:\UIDAI hackathon\North\HP\enrollment.csv")
demo_df = pd.read_csv(r"D:\UIDAI hackathon\North\HP\demographic.csv")
bio_df = pd.read_csv(r"D:\UIDAI hackathon\North\HP\biometric.csv")

In [15]:
all_dfs = [enrol_df, demo_df, bio_df]

In [16]:
enrol_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater
0,01-09-2025,Himachal Pradesh,Bilaspur,174021,2,0,0
1,01-09-2025,Himachal Pradesh,Bilaspur,174033,1,0,0
2,01-09-2025,Himachal Pradesh,Bilaspur,174035,1,0,0
3,01-09-2025,Himachal Pradesh,Hamirpur,177001,3,0,0
4,01-09-2025,Himachal Pradesh,Hamirpur,177020,4,0,0


In [17]:
enrol_df["date"] = pd.to_datetime(enrol_df["date"], dayfirst=True)
demo_df["date"] = pd.to_datetime(demo_df["date"], dayfirst=True)
bio_df["date"] = pd.to_datetime(bio_df["date"], dayfirst=True)

In [18]:
print(enrol_df["district"].nunique(), demo_df["district"].nunique(), bio_df["district"].nunique())

14 14 14


In [19]:
enrol_df["district"].value_counts().sort_index()

district
Bilaspur             738
Chamba               963
Hamirpur            1034
Kangra              2958
Kinnaur              179
Kullu                811
Lahaul and Spiti       8
Lahul & Spiti         38
Lahul and Spiti       40
Mandi               1762
Shimla              1338
Sirmaur              741
Solan                792
Una                  969
Name: count, dtype: int64

In [20]:
bio_df["district"].value_counts().sort_index()

district
Bilaspur             5091
Chamba               4756
Hamirpur             9136
Kangra              19609
Kinnaur              2389
Kullu                4953
Lahaul and Spiti      281
Lahul & Spiti         851
Lahul and Spiti       874
Mandi               11787
Shimla              10639
Sirmaur              3335
Solan                5373
Una                  6836
Name: count, dtype: int64

In [21]:
demo_df["district"].value_counts().sort_index()

district
Bilaspur            1915
Chamba              2130
Hamirpur            3089
Kangra              7899
Kinnaur              731
Kullu               2062
Lahaul and Spiti      86
Lahul & Spiti        226
Lahul and Spiti      218
Mandi               4491
Shimla              4163
Sirmaur             1420
Solan               1982
Una                 2554
Name: count, dtype: int64

In [22]:
cleanup_map = {
    "Lahul & Spiti": "Lahaul and Spiti",
    "Lahul and Spiti": "Lahaul and Spiti"
}

for df in all_dfs:
    df["district"] = df["district"].replace(cleanup_map)
    
print(enrol_df["district"].nunique(), demo_df["district"].nunique(), bio_df["district"].nunique())

12 12 12


In [23]:
enrol_df["district"] = enrol_df["district"].str.lower()
demo_df["district"] = demo_df["district"].str.lower()
bio_df["district"] = bio_df["district"].str.lower()

In [24]:
print(enrol_df["pincode"].nunique(), demo_df["pincode"].nunique(), bio_df["pincode"].nunique())

444 449 450


In [25]:
# Analyze pincode differences
enrol_pincodes = set(enrol_df["pincode"].dropna())
demo_pincodes = set(demo_df["pincode"].dropna())
bio_pincodes = set(bio_df["pincode"].dropna())

print("Enrollment pincodes:", len(enrol_pincodes))
print("Demographic pincodes:", len(demo_pincodes))
print("Biometric pincodes:", len(bio_pincodes))
print()

# Find differences
only_in_enrol = enrol_pincodes - demo_pincodes - bio_pincodes
only_in_demo = demo_pincodes - enrol_pincodes - bio_pincodes
only_in_bio = bio_pincodes - enrol_pincodes - demo_pincodes
in_all = enrol_pincodes & demo_pincodes & bio_pincodes

print(f"Pincodes only in enrollment: {len(only_in_enrol)}")
print(f"Pincodes only in demographic: {len(only_in_demo)}")
print(f"Pincodes only in biometric: {len(only_in_bio)}")
print(f"Pincodes in all three: {len(in_all)}")
print()

# Check for NULL/NaN values
print("NULL pincodes:")
print(f"Enrollment: {enrol_df["pincode"].isna().sum()}")
print(f"Demographic: {demo_df["pincode"].isna().sum()}")
print(f"Biometric: {bio_df["pincode"].isna().sum()}")
print()

# Show some examples of unique pincodes
print("Sample pincodes only in enrollment:", list(only_in_enrol))
print("Sample pincodes only in demographic:", list(only_in_demo))
print("Sample pincodes only in biometric:", list(only_in_bio))

Enrollment pincodes: 444
Demographic pincodes: 449
Biometric pincodes: 450

Pincodes only in enrollment: 0
Pincodes only in demographic: 0
Pincodes only in biometric: 1
Pincodes in all three: 444

NULL pincodes:
Enrollment: 0
Demographic: 0
Biometric: 0

Sample pincodes only in enrollment: []
Sample pincodes only in demographic: []
Sample pincodes only in biometric: [171014]


In [26]:
for df in all_dfs:
    df["month"] = df["date"].dt.month

In [27]:
enrol_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12371 entries, 0 to 12370
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            12371 non-null  datetime64[ns]
 1   state           12371 non-null  object        
 2   district        12371 non-null  object        
 3   pincode         12371 non-null  int64         
 4   age_0_5         12371 non-null  int64         
 5   age_5_17        12371 non-null  int64         
 6   age_18_greater  12371 non-null  int64         
 7   month           12371 non-null  int32         
dtypes: datetime64[ns](1), int32(1), int64(4), object(2)
memory usage: 725.0+ KB


In [28]:
demo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32966 entries, 0 to 32965
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           32966 non-null  datetime64[ns]
 1   state          32966 non-null  object        
 2   district       32966 non-null  object        
 3   pincode        32966 non-null  int64         
 4   demo_age_5_17  32966 non-null  int64         
 5   demo_age_17_   32966 non-null  int64         
 6   month          32966 non-null  int32         
dtypes: datetime64[ns](1), int32(1), int64(3), object(2)
memory usage: 1.6+ MB


In [29]:
bio_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85910 entries, 0 to 85909
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          85910 non-null  datetime64[ns]
 1   state         85910 non-null  object        
 2   district      85910 non-null  object        
 3   pincode       85910 non-null  int64         
 4   bio_age_5_17  85910 non-null  int64         
 5   bio_age_17_   85910 non-null  int64         
 6   month         85910 non-null  int32         
dtypes: datetime64[ns](1), int32(1), int64(3), object(2)
memory usage: 4.3+ MB


In [30]:
enrol_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater,month
0,2025-09-01,Himachal Pradesh,bilaspur,174021,2,0,0,9
1,2025-09-01,Himachal Pradesh,bilaspur,174033,1,0,0,9
2,2025-09-01,Himachal Pradesh,bilaspur,174035,1,0,0,9
3,2025-09-01,Himachal Pradesh,hamirpur,177001,3,0,0,9
4,2025-09-01,Himachal Pradesh,hamirpur,177020,4,0,0,9


In [31]:
demo_df.head()

Unnamed: 0,date,state,district,pincode,demo_age_5_17,demo_age_17_,month
0,2025-11-16,Himachal Pradesh,bilaspur,174029,0,1,11
1,2025-11-16,Himachal Pradesh,chamba,176319,1,0,11
2,2025-11-16,Himachal Pradesh,chamba,176320,0,2,11
3,2025-11-16,Himachal Pradesh,hamirpur,177048,0,1,11
4,2025-11-16,Himachal Pradesh,kangra,176038,0,1,11


In [32]:
bio_df.head()

Unnamed: 0,date,state,district,pincode,bio_age_5_17,bio_age_17_,month
0,2025-03-01,Himachal Pradesh,kullu,172023,63,60,3
1,2025-03-01,Himachal Pradesh,shimla,171004,26,30,3
2,2025-03-01,Himachal Pradesh,kinnaur,172115,13,28,3
3,2025-03-01,Himachal Pradesh,kangra,176122,39,47,3
4,2025-03-01,Himachal Pradesh,una,174302,18,30,3


In [33]:
enrol_agg = enrol_df.groupby(["state", "district", "month"])[["age_0_5", "age_5_17", "age_18_greater"]].sum().reset_index()
demo_agg = demo_df.groupby(["state", "district", "month"])[["demo_age_5_17", "demo_age_17_"]].sum().reset_index()
bio_agg = bio_df.groupby(["state", "district", "month"])[["bio_age_5_17", "bio_age_17_"]].sum().reset_index()

combined_df = enrol_agg.merge(demo_agg, on = ["state", "district", "month"], how = "left").merge(bio_agg, on = ["state", "district", "month"], how = "left")
combined_df.fillna(0, inplace=True)
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_
0,Himachal Pradesh,bilaspur,1,68,0,0,9.0,104.0,61,103
1,Himachal Pradesh,bilaspur,6,113,8,0,19.0,124.0,898,2639
2,Himachal Pradesh,bilaspur,9,254,3,0,217.0,1441.0,1353,1891
3,Himachal Pradesh,bilaspur,10,232,5,0,132.0,592.0,3111,1694
4,Himachal Pradesh,bilaspur,11,270,4,0,160.0,1075.0,2471,1599


In [34]:
combined_df["E"] = combined_df["age_0_5"] + combined_df["age_5_17"] + combined_df["age_18_greater"]
combined_df["DU"] = combined_df["demo_age_5_17"] + combined_df["demo_age_17_"]
combined_df["BU"] = combined_df["bio_age_5_17"] + combined_df["bio_age_17_"]
combined_df["U"] = combined_df["DU"] + combined_df["BU"]
combined_df["T"] = combined_df["E"] + combined_df["U"]
combined_df.head(20)

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T
0,Himachal Pradesh,bilaspur,1,68,0,0,9.0,104.0,61,103,68,113.0,164,277.0,345.0
1,Himachal Pradesh,bilaspur,6,113,8,0,19.0,124.0,898,2639,121,143.0,3537,3680.0,3801.0
2,Himachal Pradesh,bilaspur,9,254,3,0,217.0,1441.0,1353,1891,257,1658.0,3244,4902.0,5159.0
3,Himachal Pradesh,bilaspur,10,232,5,0,132.0,592.0,3111,1694,237,724.0,4805,5529.0,5766.0
4,Himachal Pradesh,bilaspur,11,270,4,0,160.0,1075.0,2471,1599,274,1235.0,4070,5305.0,5579.0
5,Himachal Pradesh,bilaspur,12,200,2,0,212.0,1670.0,1406,1976,202,1882.0,3382,5264.0,5466.0
6,Himachal Pradesh,chamba,1,101,8,0,48.0,222.0,104,156,109,270.0,260,530.0,639.0
7,Himachal Pradesh,chamba,6,282,39,2,0.0,0.0,1299,4141,323,0.0,5440,5440.0,5763.0
8,Himachal Pradesh,chamba,9,360,23,1,365.0,4650.0,2486,2140,384,5015.0,4626,9641.0,10025.0
9,Himachal Pradesh,chamba,10,372,14,2,347.0,1951.0,3634,1914,388,2298.0,5548,7846.0,8234.0


In [35]:
district_monthly_counts = combined_df.groupby(["district", "month"]).agg(total_months = ("month", "count"), active_months = ("T", lambda x : (x > 0).sum())).reset_index()
district_monthly_counts.head()

Unnamed: 0,district,month,total_months,active_months
0,bilaspur,1,1,1
1,bilaspur,6,1,1
2,bilaspur,9,1,1
3,bilaspur,10,1,1
4,bilaspur,11,1,1


In [36]:
district_monthly_counts["zero_months"] = district_monthly_counts["total_months"] - district_monthly_counts["active_months"]
district_monthly_counts["activity_ratio"] = district_monthly_counts["active_months"] / district_monthly_counts["total_months"]
district_monthly_counts["zero_month_ratio"] = district_monthly_counts["zero_months"] / district_monthly_counts["total_months"]

combined_df = combined_df.merge(district_monthly_counts[["district", "month", "activity_ratio", "zero_month_ratio"]], on = ["district", "month"], how = "left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio
0,Himachal Pradesh,bilaspur,1,68,0,0,9.0,104.0,61,103,68,113.0,164,277.0,345.0,1.0,0.0
1,Himachal Pradesh,bilaspur,6,113,8,0,19.0,124.0,898,2639,121,143.0,3537,3680.0,3801.0,1.0,0.0
2,Himachal Pradesh,bilaspur,9,254,3,0,217.0,1441.0,1353,1891,257,1658.0,3244,4902.0,5159.0,1.0,0.0
3,Himachal Pradesh,bilaspur,10,232,5,0,132.0,592.0,3111,1694,237,724.0,4805,5529.0,5766.0,1.0,0.0
4,Himachal Pradesh,bilaspur,11,270,4,0,160.0,1075.0,2471,1599,274,1235.0,4070,5305.0,5579.0,1.0,0.0


In [37]:
combined_df = combined_df.merge(district_monthly_counts[["district", "month", "activity_ratio", "zero_month_ratio"]], on = ["district", "month"], how = "left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y
0,Himachal Pradesh,bilaspur,1,68,0,0,9.0,104.0,61,103,68,113.0,164,277.0,345.0,1.0,0.0,1.0,0.0
1,Himachal Pradesh,bilaspur,6,113,8,0,19.0,124.0,898,2639,121,143.0,3537,3680.0,3801.0,1.0,0.0,1.0,0.0
2,Himachal Pradesh,bilaspur,9,254,3,0,217.0,1441.0,1353,1891,257,1658.0,3244,4902.0,5159.0,1.0,0.0,1.0,0.0
3,Himachal Pradesh,bilaspur,10,232,5,0,132.0,592.0,3111,1694,237,724.0,4805,5529.0,5766.0,1.0,0.0,1.0,0.0
4,Himachal Pradesh,bilaspur,11,270,4,0,160.0,1075.0,2471,1599,274,1235.0,4070,5305.0,5579.0,1.0,0.0,1.0,0.0


In [38]:
district_volume_metrics = combined_df.groupby(["state", "district"]).agg(avg_monthly_enrolment = ("E", "mean"),
                                               monthly_valatility = ("T", lambda x: x.std(ddof=0) / x.mean() if x.mean() > 0 else 0),
                                               peak_load_ratio = ("T", lambda x: x.max() / x.mean() if x.mean() > 0 else 0)).reset_index()


combined_df = combined_df.merge(district_volume_metrics, on=["state", "district"], how="left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y,avg_monthly_enrolment,monthly_valatility,peak_load_ratio
0,Himachal Pradesh,bilaspur,1,68,0,0,9.0,104.0,61,103,68,113.0,164,277.0,345.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705
1,Himachal Pradesh,bilaspur,6,113,8,0,19.0,124.0,898,2639,121,143.0,3537,3680.0,3801.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705
2,Himachal Pradesh,bilaspur,9,254,3,0,217.0,1441.0,1353,1891,257,1658.0,3244,4902.0,5159.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705
3,Himachal Pradesh,bilaspur,10,232,5,0,132.0,592.0,3111,1694,237,724.0,4805,5529.0,5766.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705
4,Himachal Pradesh,bilaspur,11,270,4,0,160.0,1075.0,2471,1599,274,1235.0,4070,5305.0,5579.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705


In [39]:
district_update_burden = combined_df.groupby(["state", "district"]).agg(avg_monthly_enrollments = ("E", "sum"), avg_monthly_demo_updates = ("DU", "sum"), avg_monthly_bio_updates = ("BU", "sum")).reset_index()

district_update_burden["U"] = district_update_burden["avg_monthly_demo_updates"] + district_update_burden["avg_monthly_bio_updates"]
district_update_burden["biometric_burden"] = district_update_burden["avg_monthly_bio_updates"] / (district_update_burden["avg_monthly_bio_updates"] + district_update_burden["avg_monthly_demo_updates"])
district_update_burden["update_dominant"] = np.where(district_update_burden["U"] > district_update_burden["avg_monthly_enrollments"], 1, 0)
district_update_burden["enrollment_update_balance"] = district_update_burden["avg_monthly_enrollments"] / (district_update_burden["avg_monthly_enrollments"] + district_update_burden["U"])

combined_df = combined_df.merge(district_update_burden[["state", "district", "biometric_burden", "update_dominant", "enrollment_update_balance"]], on=["state", "district"], how="left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance
0,Himachal Pradesh,bilaspur,1,68,0,0,9.0,104.0,61,103,68,113.0,164,277.0,345.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705,0.769403,1,0.044379
1,Himachal Pradesh,bilaspur,6,113,8,0,19.0,124.0,898,2639,121,143.0,3537,3680.0,3801.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705,0.769403,1,0.044379
2,Himachal Pradesh,bilaspur,9,254,3,0,217.0,1441.0,1353,1891,257,1658.0,3244,4902.0,5159.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705,0.769403,1,0.044379
3,Himachal Pradesh,bilaspur,10,232,5,0,132.0,592.0,3111,1694,237,724.0,4805,5529.0,5766.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705,0.769403,1,0.044379
4,Himachal Pradesh,bilaspur,11,270,4,0,160.0,1075.0,2471,1599,274,1235.0,4070,5305.0,5579.0,1.0,0.0,1.0,0.0,193.166667,0.437489,1.324705,0.769403,1,0.044379


In [40]:
combined_df.drop(["activity_ratio_y", "zero_month_ratio_y"], axis=1, inplace=True)
combined_df.rename(columns={"activity_ratio_x": "activity_ratio", "zero_month_ratio_x": "zero_month_ratio"}, inplace=True)

In [41]:
combined_df = combined_df.groupby(["state", "district"], as_index = False).first()
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance
0,Himachal Pradesh,bilaspur,1,68,0,0,9.0,104.0,61,103,68,113.0,164,277.0,345.0,1.0,0.0,193.166667,0.437489,1.324705,0.769403,1,0.044379
1,Himachal Pradesh,chamba,1,101,8,0,48.0,222.0,104,156,109,270.0,260,530.0,639.0,1.0,0.0,343.166667,0.484176,1.594573,0.647032,1,0.044065
2,Himachal Pradesh,hamirpur,1,57,1,0,6.0,105.0,98,168,58,111.0,266,377.0,435.0,1.0,0.0,234.0,0.429342,1.311785,0.78701,1,0.042136
3,Himachal Pradesh,kangra,1,232,5,2,35.0,465.0,463,590,239,500.0,1053,1553.0,1792.0,1.0,0.0,785.0,0.443914,1.36956,0.751453,1,0.040104
4,Himachal Pradesh,kinnaur,1,9,0,0,1.0,26.0,7,29,9,27.0,36,63.0,72.0,1.0,0.0,33.333333,0.502404,1.618738,0.770579,1,0.034194


In [42]:
def normalize(x):
    maxx, minx = x.max(), x.min()
    if maxx == minx:
        return x * 0 + 0.5
    normalized = (x - minx) / (maxx - minx)
    return normalized

def inverse_normalize(x):
    inversed = 1 - normalize(x)
    return inversed

In [43]:
combined_df["access"] = (combined_df["activity_ratio"] + normalize(combined_df["avg_monthly_enrolment"])) / 2
combined_df["responsiveness"] = normalize(combined_df["U"] / (combined_df["E"] + combined_df["U"]))
combined_df["inclusion"] = normalize((combined_df["age_0_5"] + combined_df["age_5_17"]) / combined_df["E"])
combined_df["stability"] = (inverse_normalize(combined_df["monthly_valatility"]) + inverse_normalize(combined_df["peak_load_ratio"])) / 2
combined_df["visibility"] = combined_df["activity_ratio"]

combined_df["DEI"] = (combined_df["access"] + combined_df["responsiveness"] + combined_df["inclusion"] + combined_df["stability"] + combined_df["visibility"]) / 5
combined_df["ASS"] = (inverse_normalize(combined_df["activity_ratio"]) + inverse_normalize(combined_df["avg_monthly_enrolment"])) / 2
combined_df["UBS"] = (normalize(combined_df["biometric_burden"]) + normalize(combined_df["update_dominant"])) / 2
combined_df["SRS"] = (normalize(combined_df["monthly_valatility"]) + normalize(combined_df["zero_month_ratio"])) / 2

combined_df.head()


Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance,access,responsiveness,inclusion,stability,visibility,DEI,ASS,UBS,SRS
0,Himachal Pradesh,bilaspur,1,68,0,0,9.0,104.0,61,103,68,113.0,164,277.0,345.0,1.0,0.0,193.166667,0.437489,1.324705,0.769403,1,0.044379,0.614943,0.0,1.0,0.86777,1.0,0.696543,0.635057,0.635006,0.364097
1,Himachal Pradesh,chamba,1,101,8,0,48.0,222.0,104,156,109,270.0,260,530.0,639.0,1.0,0.0,343.166667,0.484176,1.594573,0.647032,1,0.044065,0.712535,0.218918,1.0,0.211468,1.0,0.628584,0.537465,0.306855,0.641639
2,Himachal Pradesh,hamirpur,1,57,1,0,6.0,105.0,98,168,58,111.0,266,377.0,435.0,1.0,0.0,234.0,0.429342,1.311785,0.78701,1,0.042136,0.641509,0.526348,1.0,0.93433,1.0,0.820437,0.608491,0.68222,0.31567
3,Himachal Pradesh,kangra,1,232,5,2,35.0,465.0,463,590,239,500.0,1053,1553.0,1792.0,1.0,0.0,785.0,0.443914,1.36956,0.751453,1,0.040104,1.0,0.526041,0.889819,0.766619,1.0,0.836496,0.25,0.586869,0.402294
4,Himachal Pradesh,kinnaur,1,9,0,0,1.0,26.0,7,29,9,27.0,36,63.0,72.0,1.0,0.0,33.333333,0.502404,1.618738,0.770579,1,0.034194,0.510952,0.595132,1.0,0.069191,1.0,0.635055,0.739048,0.638158,0.75


In [44]:
combined_df.drop(["access", "responsiveness", "inclusion", "stability", "visibility"], axis=1, inplace=True)
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance,DEI,ASS,UBS,SRS
0,Himachal Pradesh,bilaspur,1,68,0,0,9.0,104.0,61,103,68,113.0,164,277.0,345.0,1.0,0.0,193.166667,0.437489,1.324705,0.769403,1,0.044379,0.696543,0.635057,0.635006,0.364097
1,Himachal Pradesh,chamba,1,101,8,0,48.0,222.0,104,156,109,270.0,260,530.0,639.0,1.0,0.0,343.166667,0.484176,1.594573,0.647032,1,0.044065,0.628584,0.537465,0.306855,0.641639
2,Himachal Pradesh,hamirpur,1,57,1,0,6.0,105.0,98,168,58,111.0,266,377.0,435.0,1.0,0.0,234.0,0.429342,1.311785,0.78701,1,0.042136,0.820437,0.608491,0.68222,0.31567
3,Himachal Pradesh,kangra,1,232,5,2,35.0,465.0,463,590,239,500.0,1053,1553.0,1792.0,1.0,0.0,785.0,0.443914,1.36956,0.751453,1,0.040104,0.836496,0.25,0.586869,0.402294
4,Himachal Pradesh,kinnaur,1,9,0,0,1.0,26.0,7,29,9,27.0,36,63.0,72.0,1.0,0.0,33.333333,0.502404,1.618738,0.770579,1,0.034194,0.635055,0.739048,0.638158,0.75


In [None]:
combined_df.to_csv(r"D:\UIDAI hackathon\North\HP\hp_district_analysis.csv", index=False)
final_df = combined_df[["state", "district", "DEI", "ASS", "UBS", "SRS"]]
final_df.to_csv(r"D:\UIDAI hackathon\North\HP\hp_district_final_scores.csv", index=False)