# 03. Analysis

Comprehensive analysis of Aadhaar enrolment and update patterns.

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
from src.data_loader import load_all_datasets
from src.preprocessing import preprocess_all
from src.analysis import (
    temporal_trends, state_aggregations, district_aggregations,
    age_group_analysis, monthly_patterns, detect_anomalies_iqr,
    growth_rate_analysis, comparative_state_metrics, identify_hotspots,
    identify_coldspots, youth_transition_analysis, weekly_pattern_analysis
)

In [None]:
enrolment_raw, demographic_raw, biometric_raw = load_all_datasets()
enrolment, demographic, biometric = preprocess_all(
    enrolment_raw, demographic_raw, biometric_raw
)

## A. Temporal Analysis

In [None]:
enrol_trends = temporal_trends(enrolment, 'total_enrolments')
enrol_trends.head(10)

In [None]:
growth_stats = growth_rate_analysis(enrol_trends, 'date', 'total')
print("Enrolment Growth Statistics:")
for key, value in growth_stats.items():
    if value is not None:
        print(f"  {key}: {value:,.2f}")

In [None]:
enrol_monthly, enrol_month_avg = monthly_patterns(enrolment, 'total_enrolments')
enrol_month_avg

In [None]:
dow_patterns = weekly_pattern_analysis(enrolment, 'total_enrolments')
dow_patterns

## B. Geographic Analysis

In [None]:
state_enrol = state_aggregations(enrolment, 'total_enrolments')
state_enrol.head(15)

In [None]:
district_enrol = district_aggregations(enrolment, 'total_enrolments')
district_enrol.head(20)

In [None]:
hotspots = identify_hotspots(state_enrol, 'total', 90)
print("Hotspot States (90th percentile):")
hotspots[['state', 'total', 'pct_of_total']]

In [None]:
coldspots = identify_coldspots(state_enrol, 'total', 10)
print("Coldspot States (10th percentile):")
coldspots[['state', 'total', 'pct_of_total']]

## C. Demographic Analysis

In [None]:
age_dist = age_group_analysis(enrolment)
age_dist

## D. Anomaly Detection

In [None]:
enrol_anomalies = detect_anomalies_iqr(enrol_trends, 'total')
anomaly_days = enrol_anomalies[enrol_anomalies['is_anomaly']]
print(f"Anomalous days detected: {len(anomaly_days)}")
anomaly_days[['date', 'total', 'anomaly_type']]

In [None]:
demo_trends = temporal_trends(demographic, 'total_updates')
demo_anomalies = detect_anomalies_iqr(demo_trends, 'total')
print(f"Demographic update anomalies: {demo_anomalies['is_anomaly'].sum()}")

## E. Comparative Analysis

In [None]:
comparative = comparative_state_metrics(enrolment, demographic, biometric)
comparative.head(15)

In [None]:
# States with highest update-to-enrolment ratios
comparative.nlargest(10, 'demo_to_enrol_ratio')[['state', 'enrolments', 'demo_updates', 'demo_to_enrol_ratio']]

## F. Youth Transition Analysis

In [None]:
transitions = youth_transition_analysis(enrolment, biometric)
transitions.head(15)

In [None]:
# States with best youth biometric transition rates
transitions[transitions['transition_ratio'].notna()].nlargest(10, 'transition_ratio')

In [None]:
# States with lowest youth biometric transition rates
transitions[transitions['transition_ratio'].notna()].nsmallest(10, 'transition_ratio')