# SECTION 1: IMPORTS & SETUP

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 11

print("="*80)
print("LAST MILE CONNECT - COMPREHENSIVE VISUALIZATIONS")
print("="*80)

DATA_PATH = Path("../data/processed")
VIZ_PATH = Path("../docs/visualizations")
VIZ_PATH.mkdir(parents=True, exist_ok=True)

LAST MILE CONNECT - COMPREHENSIVE VISUALIZATIONS


# SECTION 2: DATA LOADING

In [2]:
gap_df = pd.read_csv(DATA_PATH / "district_gap_analysis.csv")
state_df = pd.read_csv(DATA_PATH / "state_gap_analysis.csv")
camps_df = pd.read_csv(DATA_PATH / "mobile_camp_locations.csv")

print(f"\n‚úÖ Data loaded:")
print(f"   Districts: {len(gap_df)}")
print(f"   States: {len(state_df)}")
print(f"   Camps: {len(camps_df)}")


‚úÖ Data loaded:
   Districts: 1045
   States: 49
   Camps: 200


# VISUALIZATION 1: EXECUTIVE DASHBOARD

In [3]:
print("\n[VIZ 1] Creating executive dashboard...")

fig = plt.figure(figsize=(18, 12))
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# 1. Coverage Rate Distribution
ax1 = fig.add_subplot(gs[0, :2])
ax1.hist(gap_df['coverage_rate'], bins=50, color='steelblue', edgecolor='black', alpha=0.7)
national_avg = gap_df['total_enrolment'].sum() / gap_df['population_2025'].sum() * 100
ax1.axvline(national_avg, color='red', linestyle='--', linewidth=2, 
            label=f'National Avg: {national_avg:.1f}%')
ax1.set_xlabel('Coverage Rate (%)', fontweight='bold')
ax1.set_ylabel('Number of Districts', fontweight='bold')
ax1.set_title('Distribution of Aadhaar Coverage Across Districts', 
              fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(alpha=0.3)

# 2. Priority Distribution (Pie)
ax2 = fig.add_subplot(gs[0, 2])
priority_counts = gap_df['priority_level'].value_counts()
colors = {'CRITICAL': '#d32f2f', 'HIGH': '#f57c00', 'MEDIUM': '#fbc02d', 'LOW': '#388e3c'}
pie_colors = [colors.get(x, 'gray') for x in priority_counts.index]
ax2.pie(priority_counts.values, labels=priority_counts.index, autopct='%1.1f%%',
        colors=pie_colors, startangle=90)
ax2.set_title('District Priority Levels', fontsize=12, fontweight='bold')

# 3. Top 15 States by Unreached
ax3 = fig.add_subplot(gs[1, :])
top_states = state_df.nlargest(15, 'unreached')
bars = ax3.barh(range(len(top_states)), top_states['unreached']/10**6, color='coral')
ax3.set_yticks(range(len(top_states)))
ax3.set_yticklabels(top_states['state'])
ax3.set_xlabel('Unreached Population (Millions)', fontweight='bold')
ax3.set_title('Top 15 States by Unreached Population', fontsize=14, fontweight='bold')
ax3.grid(axis='x', alpha=0.3)

# Add value labels
for i, (idx, row) in enumerate(top_states.iterrows()):
    ax3.text(row['unreached']/10**6, i, f" {row['unreached']/10**6:.1f}M", 
             va='center', fontsize=9)

# 4. Coverage vs Unreached Scatter
ax4 = fig.add_subplot(gs[2, :2])
scatter = ax4.scatter(gap_df['coverage_rate'], gap_df['unreached_population']/10**3,
                       c=gap_df['priority_level'].map(colors), s=50, alpha=0.6,
                       edgecolors='black', linewidth=0.5)
ax4.set_xlabel('Coverage Rate (%)', fontweight='bold')
ax4.set_ylabel('Unreached Population (Thousands)', fontweight='bold')
ax4.set_title('Coverage Rate vs Unreached Population by District', 
              fontsize=14, fontweight='bold')
ax4.grid(alpha=0.3)

# 5. Camp Distribution
ax5 = fig.add_subplot(gs[2, 2])
camp_priority = camps_df['camp_priority'].value_counts()
ax5.bar(camp_priority.index, camp_priority.values, 
        color=['#d32f2f', '#f57c00', '#fbc02d', '#388e3c'])
ax5.set_xlabel('Camp Priority', fontweight='bold')
ax5.set_ylabel('Number of Camps', fontweight='bold')
ax5.set_title('Mobile Camp\nPriority Levels', fontsize=12, fontweight='bold')
ax5.grid(axis='y', alpha=0.3)

plt.suptitle('Last Mile Connect - Executive Dashboard', 
             fontsize=18, fontweight='bold', y=0.995)
plt.savefig(VIZ_PATH / 'executive_dashboard.png', dpi=300, bbox_inches='tight')
print("‚úÖ Saved: executive_dashboard.png")
plt.close()


[VIZ 1] Creating executive dashboard...
‚úÖ Saved: executive_dashboard.png


# VISUALIZATION 2: STATE COMPARISON

In [4]:
print("\n[VIZ 2] Creating state comparison chart...")

fig, axes = plt.subplots(2, 1, figsize=(16, 12))

# Top 20 states by population
top_20_states = state_df.nlargest(20, 'population')

# Chart 1: Coverage vs Gap
x = np.arange(len(top_20_states))
width = 0.35

bars1 = axes[0].bar(x - width/2, top_20_states['coverage_rate'], width,
                     label='Coverage Rate (%)', color='seagreen', alpha=0.8)
bars2 = axes[0].bar(x + width/2, top_20_states['gap_pct'], width,
                     label='Gap (%)', color='crimson', alpha=0.8)

axes[0].set_xlabel('State', fontweight='bold')
axes[0].set_ylabel('Percentage', fontweight='bold')
axes[0].set_title('Coverage Rate vs Gap - Top 20 States by Population', 
                   fontsize=14, fontweight='bold')
axes[0].set_xticks(x)
axes[0].set_xticklabels(top_20_states['state'], rotation=45, ha='right')
axes[0].legend()
axes[0].grid(axis='y', alpha=0.3)

# Chart 2: Enrolled vs Unreached
bars3 = axes[1].bar(x - width/2, top_20_states['enrolled']/10**6, width,
                     label='Enrolled (M)', color='steelblue', alpha=0.8)
bars4 = axes[1].bar(x + width/2, top_20_states['unreached']/10**6, width,
                     label='Unreached (M)', color='orange', alpha=0.8)

axes[1].set_xlabel('State', fontweight='bold')
axes[1].set_ylabel('Population (Millions)', fontweight='bold')
axes[1].set_title('Enrolled vs Unreached Population - Top 20 States', 
                   fontsize=14, fontweight='bold')
axes[1].set_xticks(x)
axes[1].set_xticklabels(top_20_states['state'], rotation=45, ha='right')
axes[1].legend()
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(VIZ_PATH / 'state_comparison.png', dpi=300, bbox_inches='tight')
print("‚úÖ Saved: state_comparison.png")
plt.close()


[VIZ 2] Creating state comparison chart...
‚úÖ Saved: state_comparison.png


# VISUALIZATION 3: AGE GROUP ANALYSIS

In [5]:
print("\n[VIZ 3] Creating age group analysis...")

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Age group totals
age_totals = gap_df[['age_0_5', 'age_5_17', 'age_18_greater']].sum()
labels = ['0-5 years', '5-17 years', '18+ years']

# Pie chart
axes[0].pie(age_totals, labels=labels, autopct='%1.1f%%',
            colors=['#ff9999', '#66b3ff', '#99ff99'], startangle=90,
            textprops={'fontsize': 12, 'fontweight': 'bold'})
axes[0].set_title('Enrollment Distribution by Age Group', 
                   fontsize=14, fontweight='bold')

# Bar chart with comparison
axes[1].bar(labels, age_totals/10**6, color=['#ff9999', '#66b3ff', '#99ff99'],
            alpha=0.8, edgecolor='black')
axes[1].set_ylabel('Enrollments (Millions)', fontweight='bold')
axes[1].set_title('Total Enrollments by Age Group', fontsize=14, fontweight='bold')
axes[1].grid(axis='y', alpha=0.3)

for i, v in enumerate(age_totals/10**6):
    axes[1].text(i, v, f'{v:.1f}M', ha='center', va='bottom', 
                  fontweight='bold', fontsize=11)

plt.tight_layout()
plt.savefig(VIZ_PATH / 'age_group_analysis.png', dpi=300, bbox_inches='tight')
print("‚úÖ Saved: age_group_analysis.png")
plt.close()


[VIZ 3] Creating age group analysis...
‚úÖ Saved: age_group_analysis.png


# VISUALIZATION 4: CAMP DEPLOYMENT ANALYSIS

In [6]:
print("\n[VIZ 4] Creating camp deployment analysis...")

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Coverage per camp distribution
axes[0, 0].hist(camps_df['coverage_population'], bins=40, 
                 color='steelblue', edgecolor='black', alpha=0.7)
axes[0, 0].axvline(camps_df['coverage_population'].mean(), color='red',
                    linestyle='--', linewidth=2, 
                    label=f'Mean: {camps_df["coverage_population"].mean():.0f}')
axes[0, 0].set_xlabel('Coverage Population per Camp', fontweight='bold')
axes[0, 0].set_ylabel('Number of Camps', fontweight='bold')
axes[0, 0].set_title('Distribution of Camp Coverage', fontsize=12, fontweight='bold')
axes[0, 0].legend()
axes[0, 0].grid(alpha=0.3)

# 2. Cost per camp
axes[0, 1].hist(camps_df['total_cost']/10**5, bins=40,
                 color='coral', edgecolor='black', alpha=0.7)
axes[0, 1].axvline(camps_df['total_cost'].mean()/10**5, color='red',
                    linestyle='--', linewidth=2,
                    label=f'Mean: ‚Çπ{camps_df["total_cost"].mean()/10**5:.1f}L')
axes[0, 1].set_xlabel('Cost per Camp (Lakhs)', fontweight='bold')
axes[0, 1].set_ylabel('Number of Camps', fontweight='bold')
axes[0, 1].set_title('Distribution of Camp Costs', fontsize=12, fontweight='bold')
axes[0, 1].legend()
axes[0, 1].grid(alpha=0.3)

# 3. Top 15 states by camp count
camp_by_state = camps_df.groupby('state').size().nlargest(15)
axes[1, 0].barh(range(len(camp_by_state)), camp_by_state.values, color='seagreen')
axes[1, 0].set_yticks(range(len(camp_by_state)))
axes[1, 0].set_yticklabels(camp_by_state.index)
axes[1, 0].set_xlabel('Number of Camps', fontweight='bold')
axes[1, 0].set_title('Top 15 States by Camp Count', fontsize=12, fontweight='bold')
axes[1, 0].grid(axis='x', alpha=0.3)

# 4. Coverage vs Days
axes[1, 1].scatter(camps_df['coverage_population'], camps_df['estimated_days'],
                    alpha=0.6, s=50, c=camps_df['camp_priority'].map(colors),
                    edgecolors='black', linewidth=0.5)
axes[1, 1].set_xlabel('Coverage Population', fontweight='bold')
axes[1, 1].set_ylabel('Estimated Operational Days', fontweight='bold')
axes[1, 1].set_title('Coverage vs Operational Timeline', fontsize=12, fontweight='bold')
axes[1, 1].grid(alpha=0.3)

plt.suptitle('Mobile Camp Deployment Analysis', fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout()
plt.savefig(VIZ_PATH / 'camp_deployment_analysis.png', dpi=300, bbox_inches='tight')
print("‚úÖ Saved: camp_deployment_analysis.png")
plt.close()



[VIZ 4] Creating camp deployment analysis...
‚úÖ Saved: camp_deployment_analysis.png


# VISUALIZATION 5: BUDGET BREAKDOWN

In [7]:
print("\n[VIZ 5] Creating budget visualization...")

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Budget by component
cost_components = {
    'Setup Costs': camps_df['setup_cost'].sum(),
    'Operational Costs': camps_df['operational_cost'].sum(),
    'Enrollment Costs': camps_df['enrollment_cost'].sum()
}

axes[0].pie(cost_components.values(), labels=cost_components.keys(),
            autopct='%1.1f%%', startangle=90, 
            colors=['#ff9999', '#66b3ff', '#99ff99'],
            textprops={'fontsize': 12, 'fontweight': 'bold'})
axes[0].set_title('Budget Breakdown by Component', fontsize=14, fontweight='bold')

# Budget by priority level
budget_by_priority = camps_df.groupby('camp_priority')['total_cost'].sum() / 10**7
priority_order = ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']
budget_ordered = [budget_by_priority.get(p, 0) for p in priority_order]
priority_colors_list = ['#d32f2f', '#f57c00', '#fbc02d', '#388e3c']

bars = axes[1].bar(priority_order, budget_ordered, color=priority_colors_list, 
                    alpha=0.8, edgecolor='black')
axes[1].set_ylabel('Budget (Crores)', fontweight='bold')
axes[1].set_title('Budget Allocation by Camp Priority', fontsize=14, fontweight='bold')
axes[1].grid(axis='y', alpha=0.3)

for i, v in enumerate(budget_ordered):
    axes[1].text(i, v, f'‚Çπ{v:.1f}Cr', ha='center', va='bottom', 
                  fontweight='bold', fontsize=11)

plt.tight_layout()
plt.savefig(VIZ_PATH / 'budget_breakdown.png', dpi=300, bbox_inches='tight')
print("‚úÖ Saved: budget_breakdown.png")
plt.close()


[VIZ 5] Creating budget visualization...
‚úÖ Saved: budget_breakdown.png


# SUMMARY

In [8]:
print("\n" + "="*80)
print("‚ú® VISUALIZATION SUITE COMPLETED!")
print("="*80)

print(f"\nüìä Created 5 comprehensive visualizations:")
print(f"   1. Executive Dashboard - Overview of key metrics")
print(f"   2. State Comparison - Coverage analysis by state")
print(f"   3. Age Group Analysis - Demographic breakdown")
print(f"   4. Camp Deployment - Mobile camp statistics")
print(f"   5. Budget Breakdown - Cost allocation")

print(f"\nüìÅ All visualizations saved to: {VIZ_PATH}/")


‚ú® VISUALIZATION SUITE COMPLETED!

üìä Created 5 comprehensive visualizations:
   1. Executive Dashboard - Overview of key metrics
   2. State Comparison - Coverage analysis by state
   3. Age Group Analysis - Demographic breakdown
   4. Camp Deployment - Mobile camp statistics
   5. Budget Breakdown - Cost allocation

üìÅ All visualizations saved to: ..\docs\visualizations/
