In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from google.colab import files
import io

In [None]:
def load_and_clean(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.strip().str.lower()
    return df

# Define file paths
bio_filepath = '/content/drive/MyDrive/UDAI/Clean_DataSet/structured_Unified_DataSet/Aadhar_biometric_Str.csv'
demo_filepath = '/content/drive/MyDrive/UDAI/Clean_DataSet/structured_Unified_DataSet/Aadhar_demographic_Str.csv'
enrol_filepath = '/content/drive/MyDrive/UDAI/Clean_DataSet/structured_Unified_DataSet/Aadhar_enrolment_Str.csv'

# Load and clean DataFrames
try:
    df_bio = load_and_clean(bio_filepath)
    df_demo = load_and_clean(demo_filepath)
    df_enrol = load_and_clean(enrol_filepath)
    print("Datasets loaded and cleaned successfully.")
except FileNotFoundError:
    print("Files not found. Please ensure the CSVs are in the specified drive paths.")

Datasets loaded and cleaned successfully.


In [None]:
# 2. CALCULATION ENGINE (Digital Vulnerability Index)
def process_vulnerability_data(df_e, df_d, df_b):
    # Grouping by State, District, and Pincode
    cols = ['state', 'district', 'pincode']

    e_grp = df_e.groupby(cols).agg({'age_0_5':'sum', 'age_5_17':'sum', 'age_18_greater':'sum'}).reset_index()
    e_grp['total_enrol'] = e_grp['age_0_5'] + e_grp['age_5_17'] + e_grp['age_18_greater']

    # Matching columns from your specific dataset (bio_age_17_ and demo_age_17_)
    d_grp = df_d.groupby(cols).agg({'demo_age_5_17':'sum', 'demo_age_17_':'sum'}).reset_index()
    d_grp['total_demo'] = d_grp['demo_age_5_17'] + d_grp['demo_age_17_']

    b_grp = df_b.groupby(cols).agg({'bio_age_5_17':'sum', 'bio_age_17_':'sum'}).reset_index()
    b_grp['total_bio'] = b_grp['bio_age_5_17'] + b_grp['bio_age_17_']

    # Merge all
    df = e_grp.merge(d_grp, on=cols, how='left').merge(b_grp, on=cols, how='left').fillna(0)

    # Metrics
    df['BRR'] = df['total_bio'] / (df['total_enrol'] + 1) # Biometric Refresh Rate
    df['NL'] = (df['age_0_5'].mean() - (df['age_0_5'] / (df['total_enrol'] + 1))).clip(lower=0) # Newborn Lag
    df['MI'] = df['total_demo'] / (df['total_enrol'] + 1) # Migration Intensity

    # DVI Score (0-100)
    df['DVI_Raw'] = (df['BRR'] * 0.4) + (df['NL'] * 0.4) + (df['MI'] * 0.2)
    df['DVI_Score'] = (df['DVI_Raw'] - df['DVI_Raw'].min()) / (df['DVI_Raw'].max() - df['DVI_Raw'].min()) * 100
    return df

master_df = process_vulnerability_data(df_enrol, df_demo, df_bio)

In [None]:
import plotly.express as px

# 3. VISUALIZATION: PROOF OF CONCEPT CHARTS

# --- CHART 1: STATE-WISE VULNERABILITY ---
state_summary = master_df.groupby('state')['DVI_Score'].mean().reset_index().sort_values('DVI_Score', ascending=False)
fig_state = px.bar(state_summary, x='DVI_Score', y='state', orientation='h',
                   title='<b>State-Level Digital Vulnerability Profile</b>',
                   color='DVI_Score', color_continuous_scale='Reds')
fig_state.show()

# --- CHART 2: TOP 20 DISTRICT HOTSPOTS ---
dist_summary = master_df.groupby(['state', 'district'])['DVI_Score'].mean().reset_index().sort_values('DVI_Score', ascending=False)
fig_dist = px.treemap(dist_summary.head(30), path=['state', 'district'], values='DVI_Score',
                      color='DVI_Score', color_continuous_scale='Viridis',
                      title='<b>District Hotspots: Biometric & Enrolment Risk Distribution</b>')
fig_dist.show()

# --- CHART 3: PINCODE-WISE DRILL DOWN (Scatter Analysis) ---
# This shows where specific PIN codes are failing (High Update freq vs Low Enrolment)
fig_pin = px.scatter(master_df, x='total_enrol', y='total_bio',
                     size='DVI_Score', color='state', hover_data=['district', 'pincode'],
                     title='<b>Pincode-Level Analysis: Biometric Decay vs Enrolment Volume</b>',
                     labels={'total_enrol':'New Enrolments', 'total_bio':'Biometric Updates'})
fig_pin.update_layout(template='plotly_dark')
fig_pin.show()

print("Analysis Complete. Use the interactive charts above to identify exclusion zones.")

Analysis Complete. Use the interactive charts above to identify exclusion zones.
