In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Plot settings
plt.rcParams["figure.figsize"] = (10,5)
sns.set(style="whitegrid")

# Create outputs folder
os.makedirs("../outputs", exist_ok=True)


In [5]:
enrol = pd.read_csv("../data/processed/enrolment_clean.csv")
demo = pd.read_csv("../data/processed/demographic_clean.csv")
bio = pd.read_csv("../data/processed/biometric_clean.csv")

# Ensure date column is datetime
for df in [enrol, demo, bio]:
    df['date'] = pd.to_datetime(df['date'], dayfirst=True, errors='coerce')

print("Enrolment:", enrol.shape)
print("Demographic:", demo.shape)
print("Biometric:", bio.shape)


Enrolment: (983000, 8)
Demographic: (1598012, 7)
Biometric: (1766159, 7)


In [6]:
# --- Enrolment aggregation ---
enrol_agg = enrol.groupby(['state','district'])[['total_enrol']].sum().reset_index()

# --- Demographic updates aggregation ---
demo_agg = demo.groupby(['state','district'])[['total_demo_updates']].sum().reset_index()

# --- Biometric updates aggregation ---
bio_agg = bio.groupby(['state','district'])[['total_bio_updates']].sum().reset_index()


In [7]:
combined_agg = enrol_agg.merge(demo_agg, on=['state','district'], how='outer')
combined_agg = combined_agg.merge(bio_agg, on=['state','district'], how='outer')

# Fill missing values
combined_agg.fillna(0, inplace=True)

# Total updates and service ratio
combined_agg['total_updates'] = combined_agg['total_demo_updates'] + combined_agg['total_bio_updates']
combined_agg['service_ratio'] = combined_agg['total_updates'] / (combined_agg['total_enrol'] + 1)

combined_agg.head()


Unnamed: 0,state,district,total_enrol,total_demo_updates,total_bio_updates,total_updates,service_ratio
0,100000,100000,214.0,2.0,0.0,2.0,0.009302
1,Andaman & Nicobar Islands,Andamans,73.0,555.0,1935.0,2490.0,33.648649
2,Andaman & Nicobar Islands,Nicobars,1.0,3.0,2.0,5.0,2.5
3,Andaman & Nicobar Islands,South Andaman,37.0,236.0,324.0,560.0,14.736842
4,Andaman And Nicobar Islands,Nicobar,74.0,500.0,1755.0,2255.0,30.066667


In [8]:
top_states_updates = combined_agg.groupby('state')['total_updates'].sum().sort_values(ascending=False).head(10)

# Plot
top_states_updates.plot(kind='bar')
plt.title("Top 10 States by Aadhaar Updates (Demographic + Biometric)")
plt.ylabel("Total Updates")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("../outputs/top_states_updates.png")
plt.clf()


<Figure size 1000x500 with 0 Axes>

In [9]:
# Top 10 districts with highest service ratio
district_ratio = combined_agg.sort_values('service_ratio', ascending=False).head(10)

# Heatmap
plt.figure(figsize=(12,6))
sns.heatmap(district_ratio[['total_enrol','total_updates','service_ratio']], annot=True, fmt=".0f", cmap="YlOrRd")
plt.title("Top 10 Districts by Service Load")
plt.tight_layout()
plt.savefig("../outputs/districts_service_load.png")
plt.clf()


<Figure size 1200x600 with 0 Axes>

In [10]:
# Aggregate age-wise counts across all states
age_summary = pd.DataFrame({
    'Enrolment_0_5': enrol['age_0_5'].sum(),
    'Enrolment_5_17': enrol['age_5_17'].sum(),
    'Enrolment_18+': enrol['age_18_greater'].sum(),
    'Demographic_Updates_5_17': demo['demo_age_5_17'].sum(),
    'Demographic_Updates_18+': demo['demo_age_17_'].sum(),
    'Biometric_Updates_5_17': bio['bio_age_5_17'].sum(),
    'Biometric_Updates_18+': bio['bio_age_17_'].sum()
}, index=[0])

# Transpose for plotting
age_summary.T.plot(kind='bar')
plt.title("Age-wise Enrolment & Updates")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("../outputs/age_wise_summary.png")
plt.clf()


<Figure size 1000x500 with 0 Axes>

In [11]:
print("üìå Top insights graphs saved in outputs folder:")
print("1Ô∏è‚É£ top_states_updates.png ‚Üí Top 10 states by updates")
print("2Ô∏è‚É£ districts_service_load.png ‚Üí Districts with high service ratio")
print("3Ô∏è‚É£ age_wise_summary.png ‚Üí Age-wise enrolment & updates comparison")
print("\nAll ready for your PDF report and hackathon submission!")


üìå Top insights graphs saved in outputs folder:
1Ô∏è‚É£ top_states_updates.png ‚Üí Top 10 states by updates
2Ô∏è‚É£ districts_service_load.png ‚Üí Districts with high service ratio
3Ô∏è‚É£ age_wise_summary.png ‚Üí Age-wise enrolment & updates comparison

All ready for your PDF report and hackathon submission!
