In [None]:
import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt

In [12]:
pd.set_option("display.max_rows", 2000)
pd.set_option("display.max_columns", None)

In [None]:
enrol_df = pd.read_csv(r"D:\UIDAI hackathon\North\CH\enrollment.csv")
demo_df = pd.read_csv(r"D:\UIDAI hackathon\North\CH\demographic.csv")
bio_df = pd.read_csv(r"D:\UIDAI hackathon\North\CH\biometric.csv")

In [14]:
all_dfs = [enrol_df, demo_df, bio_df]

In [15]:
enrol_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater
0,01-09-2025,Chandigarh,Chandigarh,160002,1,0,0
1,01-09-2025,Chandigarh,Chandigarh,160030,2,0,0
2,01-09-2025,Chandigarh,Chandigarh,160036,2,0,0
3,02-09-2025,Chandigarh,Chandigarh,160022,2,0,0
4,02-09-2025,Chandigarh,Chandigarh,160047,2,0,0


In [16]:
enrol_df["date"] = pd.to_datetime(enrol_df["date"], dayfirst=True)
demo_df["date"] = pd.to_datetime(demo_df["date"], dayfirst=True)
bio_df["date"] = pd.to_datetime(bio_df["date"], dayfirst=True)

In [17]:
print(enrol_df["district"].nunique(), demo_df["district"].nunique(), bio_df["district"].nunique())

3 3 3


In [18]:
enrol_df["district"].value_counts().sort_index()

district
Chandigarh    1019
Mohali           1
Rupnagar         3
Name: count, dtype: int64

In [19]:
bio_df["district"].value_counts().sort_index()

district
Chandigarh    5025
Mohali          25
Rupnagar       122
Name: count, dtype: int64

In [20]:
demo_df["district"].value_counts().sort_index()

district
Chandigarh    2188
Mohali           5
Rupnagar        30
Name: count, dtype: int64

In [21]:
cleanup_map = {
    "Ambedkar Nagar *": "Ambedkar Nagar",
    "Gautam Buddh Nagar": "Gautam Buddha Nagar",
    "Auraiya *": "Auraiya",
    "Chandauli *": "Chandauli",
    "Chitrakoot *": "Chitrakoot",
    "Gautam Buddha Nagar *": "Gautam Buddh Nagar",
    "Jyotiba Phule Nagar *": "Amroha",
    "Mahoba *": "Mahoba",
    "Sant Kabir Nagar *": "Sant Kabir Nagar",
    "Baghpat *": "Baghpat",
    "Chandauli *": "Chandauli",
    "Allahabad": "Prayagraj",
    "Faizabad": "Ayodhya",
    "Jyotiba Phule Nagar": "Amroha",
    "Bara Banki": "Barabanki",
    "Bulandshahar": "Bulandshahr",
    "Kushi Nagar": "Kushinagar",
    "Kushinagar *": "Kushinagar",
    "Rae Bareli": "Raebareli",
    "Siddharth Nagar": "Siddharthnagar",
    "Shravasti": "Shrawasti",
    "Mahrajganj": "Maharajganj",
    "Bagpat": "Baghpat",
    "Sant Ravidas Nagar": "Bhadohi",
    "Sant Ravidas Nagar Bhadohi": "Bhadohi"
}

for df in all_dfs:
    df["district"] = df["district"].replace(cleanup_map)
    
print(enrol_df["district"].nunique(), demo_df["district"].nunique(), bio_df["district"].nunique())

3 3 3


In [22]:
enrol_df["district"] = enrol_df["district"].str.lower()
demo_df["district"] = demo_df["district"].str.lower()
bio_df["district"] = bio_df["district"].str.lower()

In [23]:
print(enrol_df["pincode"].nunique(), demo_df["pincode"].nunique(), bio_df["pincode"].nunique())

25 26 27


In [24]:
# Analyze pincode differences
enrol_pincodes = set(enrol_df["pincode"].dropna())
demo_pincodes = set(demo_df["pincode"].dropna())
bio_pincodes = set(bio_df["pincode"].dropna())

print("Enrollment pincodes:", len(enrol_pincodes))
print("Demographic pincodes:", len(demo_pincodes))
print("Biometric pincodes:", len(bio_pincodes))
print()

# Find differences
only_in_enrol = enrol_pincodes - demo_pincodes - bio_pincodes
only_in_demo = demo_pincodes - enrol_pincodes - bio_pincodes
only_in_bio = bio_pincodes - enrol_pincodes - demo_pincodes
in_all = enrol_pincodes & demo_pincodes & bio_pincodes

print(f"Pincodes only in enrollment: {len(only_in_enrol)}")
print(f"Pincodes only in demographic: {len(only_in_demo)}")
print(f"Pincodes only in biometric: {len(only_in_bio)}")
print(f"Pincodes in all three: {len(in_all)}")
print()

# Check for NULL/NaN values
print("NULL pincodes:")
print(f"Enrollment: {enrol_df["pincode"].isna().sum()}")
print(f"Demographic: {demo_df["pincode"].isna().sum()}")
print(f"Biometric: {bio_df["pincode"].isna().sum()}")
print()

# Show some examples of unique pincodes
print("Sample pincodes only in enrollment:", list(only_in_enrol))
print("Sample pincodes only in demographic:", list(only_in_demo))
print("Sample pincodes only in biometric:", list(only_in_bio))

Enrollment pincodes: 25
Demographic pincodes: 26
Biometric pincodes: 27

Pincodes only in enrollment: 0
Pincodes only in demographic: 0
Pincodes only in biometric: 1
Pincodes in all three: 25

NULL pincodes:
Enrollment: 0
Demographic: 0
Biometric: 0

Sample pincodes only in enrollment: []
Sample pincodes only in demographic: []
Sample pincodes only in biometric: [140308]


In [25]:
for df in all_dfs:
    df["month"] = df["date"].dt.month

In [26]:
enrol_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1023 entries, 0 to 1022
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            1023 non-null   datetime64[ns]
 1   state           1023 non-null   object        
 2   district        1023 non-null   object        
 3   pincode         1023 non-null   int64         
 4   age_0_5         1023 non-null   int64         
 5   age_5_17        1023 non-null   int64         
 6   age_18_greater  1023 non-null   int64         
 7   month           1023 non-null   int32         
dtypes: datetime64[ns](1), int32(1), int64(4), object(2)
memory usage: 60.1+ KB


In [27]:
demo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2223 entries, 0 to 2222
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           2223 non-null   datetime64[ns]
 1   state          2223 non-null   object        
 2   district       2223 non-null   object        
 3   pincode        2223 non-null   int64         
 4   demo_age_5_17  2223 non-null   int64         
 5   demo_age_17_   2223 non-null   int64         
 6   month          2223 non-null   int32         
dtypes: datetime64[ns](1), int32(1), int64(3), object(2)
memory usage: 113.0+ KB


In [28]:
bio_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5172 entries, 0 to 5171
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          5172 non-null   datetime64[ns]
 1   state         5172 non-null   object        
 2   district      5172 non-null   object        
 3   pincode       5172 non-null   int64         
 4   bio_age_5_17  5172 non-null   int64         
 5   bio_age_17_   5172 non-null   int64         
 6   month         5172 non-null   int32         
dtypes: datetime64[ns](1), int32(1), int64(3), object(2)
memory usage: 262.8+ KB


In [29]:
enrol_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater,month
0,2025-09-01,Chandigarh,chandigarh,160002,1,0,0,9
1,2025-09-01,Chandigarh,chandigarh,160030,2,0,0,9
2,2025-09-01,Chandigarh,chandigarh,160036,2,0,0,9
3,2025-09-02,Chandigarh,chandigarh,160022,2,0,0,9
4,2025-09-02,Chandigarh,chandigarh,160047,2,0,0,9


In [30]:
demo_df.head()

Unnamed: 0,date,state,district,pincode,demo_age_5_17,demo_age_17_,month
0,2025-03-01,Chandigarh,chandigarh,160103,12,78,3
1,2025-03-01,Chandigarh,chandigarh,160003,31,149,3
2,2025-04-01,Chandigarh,chandigarh,160002,246,641,4
3,2025-04-01,Chandigarh,chandigarh,160102,382,697,4
4,2025-04-01,Chandigarh,chandigarh,160022,71,321,4


In [31]:
bio_df.head()

Unnamed: 0,date,state,district,pincode,bio_age_5_17,bio_age_17_,month
0,2025-03-01,Chandigarh,chandigarh,160025,135,244,3
1,2025-03-01,Chandigarh,chandigarh,160022,80,108,3
2,2025-03-01,Chandigarh,chandigarh,160055,87,49,3
3,2025-03-01,Chandigarh,chandigarh,160009,26,27,3
4,2025-05-01,Chandigarh,chandigarh,160018,16,13,5


In [32]:
enrol_agg = enrol_df.groupby(["state", "district", "month"])[["age_0_5", "age_5_17", "age_18_greater"]].sum().reset_index()
demo_agg = demo_df.groupby(["state", "district", "month"])[["demo_age_5_17", "demo_age_17_"]].sum().reset_index()
bio_agg = bio_df.groupby(["state", "district", "month"])[["bio_age_5_17", "bio_age_17_"]].sum().reset_index()

combined_df = enrol_agg.merge(demo_agg, on = ["state", "district", "month"], how = "left").merge(bio_agg, on = ["state", "district", "month"], how = "left")
combined_df.fillna(0, inplace=True)
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_
0,Chandigarh,chandigarh,1,94,13,2,57.0,566.0,226,285
1,Chandigarh,chandigarh,4,86,21,10,2731.0,7773.0,4587,3334
2,Chandigarh,chandigarh,6,376,64,7,663.0,5313.0,2259,3764
3,Chandigarh,chandigarh,9,775,79,1,676.0,4939.0,3098,3513
4,Chandigarh,chandigarh,10,446,51,0,330.0,3288.0,2425,2792


In [33]:
combined_df["E"] = combined_df["age_0_5"] + combined_df["age_5_17"] + combined_df["age_18_greater"]
combined_df["DU"] = combined_df["demo_age_5_17"] + combined_df["demo_age_17_"]
combined_df["BU"] = combined_df["bio_age_5_17"] + combined_df["bio_age_17_"]
combined_df["U"] = combined_df["DU"] + combined_df["BU"]
combined_df["T"] = combined_df["E"] + combined_df["U"]
combined_df.head(20)

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T
0,Chandigarh,chandigarh,1,94,13,2,57.0,566.0,226,285,109,623.0,511,1134.0,1243.0
1,Chandigarh,chandigarh,4,86,21,10,2731.0,7773.0,4587,3334,117,10504.0,7921,18425.0,18542.0
2,Chandigarh,chandigarh,6,376,64,7,663.0,5313.0,2259,3764,447,5976.0,6023,11999.0,12446.0
3,Chandigarh,chandigarh,9,775,79,1,676.0,4939.0,3098,3513,855,5615.0,6611,12226.0,13081.0
4,Chandigarh,chandigarh,10,446,51,0,330.0,3288.0,2425,2792,497,3618.0,5217,8835.0,9332.0
5,Chandigarh,chandigarh,11,638,39,16,629.0,6819.0,2553,3118,693,7448.0,5671,13119.0,13812.0
6,Chandigarh,chandigarh,12,504,33,8,784.0,8617.0,2250,3107,545,9401.0,5357,14758.0,15303.0
7,Chandigarh,mohali,6,0,1,0,0.0,0.0,1,1,1,0.0,2,2.0,3.0
8,Chandigarh,rupnagar,9,1,0,0,0.0,6.0,1,16,1,6.0,17,23.0,24.0
9,Chandigarh,rupnagar,11,2,0,0,1.0,8.0,0,13,2,9.0,13,22.0,24.0


In [34]:
district_monthly_counts = combined_df.groupby(["district", "month"]).agg(total_months = ("month", "count"), active_months = ("T", lambda x : (x > 0).sum())).reset_index()
district_monthly_counts.head()

Unnamed: 0,district,month,total_months,active_months
0,chandigarh,1,1,1
1,chandigarh,4,1,1
2,chandigarh,6,1,1
3,chandigarh,9,1,1
4,chandigarh,10,1,1


In [35]:
district_monthly_counts["zero_months"] = district_monthly_counts["total_months"] - district_monthly_counts["active_months"]
district_monthly_counts["activity_ratio"] = district_monthly_counts["active_months"] / district_monthly_counts["total_months"]
district_monthly_counts["zero_month_ratio"] = district_monthly_counts["zero_months"] / district_monthly_counts["total_months"]

combined_df = combined_df.merge(district_monthly_counts[["district", "month", "activity_ratio", "zero_month_ratio"]], on = ["district", "month"], how = "left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio
0,Chandigarh,chandigarh,1,94,13,2,57.0,566.0,226,285,109,623.0,511,1134.0,1243.0,1.0,0.0
1,Chandigarh,chandigarh,4,86,21,10,2731.0,7773.0,4587,3334,117,10504.0,7921,18425.0,18542.0,1.0,0.0
2,Chandigarh,chandigarh,6,376,64,7,663.0,5313.0,2259,3764,447,5976.0,6023,11999.0,12446.0,1.0,0.0
3,Chandigarh,chandigarh,9,775,79,1,676.0,4939.0,3098,3513,855,5615.0,6611,12226.0,13081.0,1.0,0.0
4,Chandigarh,chandigarh,10,446,51,0,330.0,3288.0,2425,2792,497,3618.0,5217,8835.0,9332.0,1.0,0.0


In [36]:
combined_df = combined_df.merge(district_monthly_counts[["district", "month", "activity_ratio", "zero_month_ratio"]], on = ["district", "month"], how = "left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y
0,Chandigarh,chandigarh,1,94,13,2,57.0,566.0,226,285,109,623.0,511,1134.0,1243.0,1.0,0.0,1.0,0.0
1,Chandigarh,chandigarh,4,86,21,10,2731.0,7773.0,4587,3334,117,10504.0,7921,18425.0,18542.0,1.0,0.0,1.0,0.0
2,Chandigarh,chandigarh,6,376,64,7,663.0,5313.0,2259,3764,447,5976.0,6023,11999.0,12446.0,1.0,0.0,1.0,0.0
3,Chandigarh,chandigarh,9,775,79,1,676.0,4939.0,3098,3513,855,5615.0,6611,12226.0,13081.0,1.0,0.0,1.0,0.0
4,Chandigarh,chandigarh,10,446,51,0,330.0,3288.0,2425,2792,497,3618.0,5217,8835.0,9332.0,1.0,0.0,1.0,0.0


In [37]:
district_volume_metrics = combined_df.groupby(["state", "district"]).agg(avg_monthly_enrolment = ("E", "mean"),
                                               monthly_valatility = ("T", lambda x: x.std(ddof=0) / x.mean() if x.mean() > 0 else 0),
                                               peak_load_ratio = ("T", lambda x: x.max() / x.mean() if x.mean() > 0 else 0)).reset_index()


combined_df = combined_df.merge(district_volume_metrics, on=["state", "district"], how="left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y,avg_monthly_enrolment,monthly_valatility,peak_load_ratio
0,Chandigarh,chandigarh,1,94,13,2,57.0,566.0,226,285,109,623.0,511,1134.0,1243.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613
1,Chandigarh,chandigarh,4,86,21,10,2731.0,7773.0,4587,3334,117,10504.0,7921,18425.0,18542.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613
2,Chandigarh,chandigarh,6,376,64,7,663.0,5313.0,2259,3764,447,5976.0,6023,11999.0,12446.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613
3,Chandigarh,chandigarh,9,775,79,1,676.0,4939.0,3098,3513,855,5615.0,6611,12226.0,13081.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613
4,Chandigarh,chandigarh,10,446,51,0,330.0,3288.0,2425,2792,497,3618.0,5217,8835.0,9332.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613


In [38]:
district_update_burden = combined_df.groupby(["state", "district"]).agg(avg_monthly_enrollments = ("E", "sum"), avg_monthly_demo_updates = ("DU", "sum"), avg_monthly_bio_updates = ("BU", "sum")).reset_index()

district_update_burden["U"] = district_update_burden["avg_monthly_demo_updates"] + district_update_burden["avg_monthly_bio_updates"]
district_update_burden["biometric_burden"] = district_update_burden["avg_monthly_bio_updates"] / (district_update_burden["avg_monthly_bio_updates"] + district_update_burden["avg_monthly_demo_updates"])
district_update_burden["update_dominant"] = np.where(district_update_burden["U"] > district_update_burden["avg_monthly_enrollments"], 1, 0)
district_update_burden["enrollment_update_balance"] = district_update_burden["avg_monthly_enrollments"] / (district_update_burden["avg_monthly_enrollments"] + district_update_burden["U"])

combined_df = combined_df.merge(district_update_burden[["state", "district", "biometric_burden", "update_dominant", "enrollment_update_balance"]], on=["state", "district"], how="left")
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio_x,zero_month_ratio_x,activity_ratio_y,zero_month_ratio_y,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance
0,Chandigarh,chandigarh,1,94,13,2,57.0,566.0,226,285,109,623.0,511,1134.0,1243.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613,0.463514,1,0.038957
1,Chandigarh,chandigarh,4,86,21,10,2731.0,7773.0,4587,3334,117,10504.0,7921,18425.0,18542.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613,0.463514,1,0.038957
2,Chandigarh,chandigarh,6,376,64,7,663.0,5313.0,2259,3764,447,5976.0,6023,11999.0,12446.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613,0.463514,1,0.038957
3,Chandigarh,chandigarh,9,775,79,1,676.0,4939.0,3098,3513,855,5615.0,6611,12226.0,13081.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613,0.463514,1,0.038957
4,Chandigarh,chandigarh,10,446,51,0,330.0,3288.0,2425,2792,497,3618.0,5217,8835.0,9332.0,1.0,0.0,1.0,0.0,466.142857,0.425181,1.549613,0.463514,1,0.038957


In [39]:
combined_df.drop(["activity_ratio_y", "zero_month_ratio_y"], axis=1, inplace=True)
combined_df.rename(columns={"activity_ratio_x": "activity_ratio", "zero_month_ratio_x": "zero_month_ratio"}, inplace=True)

In [40]:
combined_df = combined_df.groupby(["state", "district"], as_index = False).first()
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance
0,Chandigarh,chandigarh,1,94,13,2,57.0,566.0,226,285,109,623.0,511,1134.0,1243.0,1.0,0.0,466.142857,0.425181,1.549613,0.463514,1,0.038957
1,Chandigarh,mohali,6,0,1,0,0.0,0.0,1,1,1,0.0,2,2.0,3.0,1.0,0.0,1.0,0.0,1.0,1.0,1,0.333333
2,Chandigarh,rupnagar,9,1,0,0,0.0,6.0,1,16,1,6.0,17,23.0,24.0,1.0,0.0,1.5,0.0,1.0,0.666667,1,0.0625


In [41]:
def normalize(x):
    maxx, minx = x.max(), x.min()
    if maxx == minx:
        return x * 0 + 0.5
    normalized = (x - minx) / (maxx - minx)
    return normalized

def inverse_normalize(x):
    inversed = 1 - normalize(x)
    return inversed

In [42]:
combined_df["access"] = (combined_df["activity_ratio"] + normalize(combined_df["avg_monthly_enrolment"])) / 2
combined_df["responsiveness"] = normalize(combined_df["U"] / (combined_df["E"] + combined_df["U"]))
combined_df["inclusion"] = normalize((combined_df["age_0_5"] + combined_df["age_5_17"]) / combined_df["E"])
combined_df["stability"] = (inverse_normalize(combined_df["monthly_valatility"]) + inverse_normalize(combined_df["peak_load_ratio"])) / 2
combined_df["visibility"] = combined_df["activity_ratio"]

combined_df["DEI"] = (combined_df["access"] + combined_df["responsiveness"] + combined_df["inclusion"] + combined_df["stability"] + combined_df["visibility"]) / 5
combined_df["ASS"] = (inverse_normalize(combined_df["activity_ratio"]) + inverse_normalize(combined_df["avg_monthly_enrolment"])) / 2
combined_df["UBS"] = (normalize(combined_df["biometric_burden"]) + normalize(combined_df["update_dominant"])) / 2
combined_df["SRS"] = (normalize(combined_df["monthly_valatility"]) + normalize(combined_df["zero_month_ratio"])) / 2

combined_df.head()


Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance,access,responsiveness,inclusion,stability,visibility,DEI,ASS,UBS,SRS
0,Chandigarh,chandigarh,1,94,13,2,57.0,566.0,226,285,109,623.0,511,1134.0,1243.0,1.0,0.0,466.142857,0.425181,1.549613,0.463514,1,0.038957,1.0,0.842202,0.0,0.0,1.0,0.56844,0.25,0.25,0.75
1,Chandigarh,mohali,6,0,1,0,0.0,0.0,1,1,1,0.0,2,2.0,3.0,1.0,0.0,1.0,0.0,1.0,1.0,1,0.333333,0.5,0.0,1.0,1.0,1.0,0.7,0.75,0.75,0.25
2,Chandigarh,rupnagar,9,1,0,0,0.0,6.0,1,16,1,6.0,17,23.0,24.0,1.0,0.0,1.5,0.0,1.0,0.666667,1,0.0625,0.500537,1.0,1.0,1.0,1.0,0.900107,0.749463,0.439337,0.25


In [43]:
combined_df.drop(["access", "responsiveness", "inclusion", "stability", "visibility"], axis=1, inplace=True)
combined_df.head()

Unnamed: 0,state,district,month,age_0_5,age_5_17,age_18_greater,demo_age_5_17,demo_age_17_,bio_age_5_17,bio_age_17_,E,DU,BU,U,T,activity_ratio,zero_month_ratio,avg_monthly_enrolment,monthly_valatility,peak_load_ratio,biometric_burden,update_dominant,enrollment_update_balance,DEI,ASS,UBS,SRS
0,Chandigarh,chandigarh,1,94,13,2,57.0,566.0,226,285,109,623.0,511,1134.0,1243.0,1.0,0.0,466.142857,0.425181,1.549613,0.463514,1,0.038957,0.56844,0.25,0.25,0.75
1,Chandigarh,mohali,6,0,1,0,0.0,0.0,1,1,1,0.0,2,2.0,3.0,1.0,0.0,1.0,0.0,1.0,1.0,1,0.333333,0.7,0.75,0.75,0.25
2,Chandigarh,rupnagar,9,1,0,0,0.0,6.0,1,16,1,6.0,17,23.0,24.0,1.0,0.0,1.5,0.0,1.0,0.666667,1,0.0625,0.900107,0.749463,0.439337,0.25


In [None]:
combined_df.to_csv(r"D:\UIDAI hackathon\North\CH\ch_district_analysis.csv", index=False)
final_df = combined_df[["state", "district", "DEI", "ASS", "UBS", "SRS"]]
final_df.to_csv(r"D:\UIDAI hackathon\North\CH\ch_district_final_scores.csv", index=False)