In [1]:
from google.colab import files

uploaded = files.upload()

Saving api_data_aadhar_enrolment_0_500000.csv to api_data_aadhar_enrolment_0_500000.csv
Saving api_data_aadhar_enrolment_500000_1000000.csv to api_data_aadhar_enrolment_500000_1000000.csv
Saving api_data_aadhar_enrolment_1000000_1006029.csv to api_data_aadhar_enrolment_1000000_1006029.csv


In [2]:
!pip install prophet plotly gradio



In [3]:


import pandas as pd
import plotly.express as px

FILES = [
    "api_data_aadhar_enrolment_0_500000.csv",
    "api_data_aadhar_enrolment_500000_1000000.csv",
    "api_data_aadhar_enrolment_1000000_1006029.csv"
]

df = pd.concat([pd.read_csv(f) for f in FILES], ignore_index=True)

df["date"] = pd.to_datetime(df["date"], format="%d-%m-%Y", errors="coerce")
df = df.dropna(subset=["date"])

df["total_enrolments"] = (
    df["age_0_5"] + df["age_5_17"] + df["age_18_greater"]
)

df["year_month"] = df["date"].dt.to_period("M").dt.to_timestamp()


state_metrics = (
    df.groupby(["state", "year_month"])["total_enrolments"]
    .sum()
    .reset_index()
)


avg_volume = (
    state_metrics.groupby("state")["total_enrolments"]
    .mean()
    .reset_index(name="avg_monthly_enrolments")
)


state_metrics["growth"] = (
    state_metrics.groupby("state")["total_enrolments"].pct_change()
)

avg_growth = (
    state_metrics.groupby("state")["growth"]
    .mean()
    .reset_index(name="avg_growth_rate")
)

state_summary = avg_volume.merge(avg_growth, on="state").fillna(0)


fig_resource = px.scatter(
    state_summary,
    x="avg_monthly_enrolments",
    y="avg_growth_rate",
    size="avg_monthly_enrolments",
    color="avg_growth_rate",
    hover_name="state",
    title="UIDAI Enrolment Resource Allocation Priority Map",
    labels={
        "avg_monthly_enrolments": "Average Monthly Enrolments",
        "avg_growth_rate": "Average Growth Rate"
    },
    color_continuous_scale="Turbo"
)

fig_resource.add_hline(y=0, line_dash="dash")
fig_resource.show()


low_volume_states = state_summary.sort_values(
    "avg_monthly_enrolments"
).head(12)

fig_infra = px.bar(
    low_volume_states,
    x="avg_monthly_enrolments",
    y="state",
    orientation="h",
    title="States Potentially Requiring Digital Infrastructure Upgrades",
    labels={
        "avg_monthly_enrolments": "Average Monthly Enrolments",
        "state": "State"
    }
)

fig_infra.update_layout(yaxis=dict(autorange="reversed"))
fig_infra.show()

district_summary = (
    df.groupby(["state", "district"])["total_enrolments"]
    .sum()
    .reset_index()
)

infra_districts = district_summary.sort_values(
    "total_enrolments"
).head(20)

fig_district_infra = px.bar(
    infra_districts,
    x="total_enrolments",
    y="district",
    color="state",
    orientation="h",
    title="Districts with Persistent Low Aadhaar Activity (Infra Risk)",
    labels={
        "total_enrolments": "Total Enrolments",
        "district": "District"
    }
)

fig_district_infra.update_layout(yaxis=dict(autorange="reversed"))
fig_district_infra.show()

In [4]:


import pandas as pd
import numpy as np
import plotly.graph_objects as go

FILES = [
    "api_data_aadhar_enrolment_0_500000.csv",
    "api_data_aadhar_enrolment_500000_1000000.csv",
    "api_data_aadhar_enrolment_1000000_1006029.csv"
]

df = pd.concat([pd.read_csv(f) for f in FILES], ignore_index=True)

df["date"] = pd.to_datetime(df["date"], format="%d-%m-%Y", errors="coerce")
df = df.dropna(subset=["date"])

df["total_enrolments"] = (
    df["age_0_5"] + df["age_5_17"] + df["age_18_greater"]
)

df["year_month"] = df["date"].dt.to_period("M").dt.to_timestamp()

monthly = (
    df.groupby("year_month")["total_enrolments"]
    .sum()
    .reset_index()
    .rename(columns={"year_month": "date", "total_enrolments": "enrolments"})
)

mean = monthly["enrolments"].mean()
std = monthly["enrolments"].std()

monthly["z_score"] = (monthly["enrolments"] - mean) / std

monthly["anomaly"] = monthly["z_score"].abs() > 2.5


fig = go.Figure()


fig.add_trace(go.Scatter(
    x=monthly["date"],
    y=monthly["enrolments"],
    mode="lines+markers",
    name="Monthly Enrolments",
    line=dict(color="royalblue")
))


fig.add_trace(go.Scatter(
    x=monthly[monthly["anomaly"]]["date"],
    y=monthly[monthly["anomaly"]]["enrolments"],
    mode="markers",
    name="Detected Anomalies",
    marker=dict(color="red", size=10, symbol="x")
))

fig.update_layout(
    title="Anomaly Detection in Aadhaar Enrolment Volumes",
    xaxis_title="Month",
    yaxis_title="Total Enrolments",
    template="plotly_white"
)

fig.show()