# 08: Pilot Design

Select treatment and control districts for simulated pilot evaluation.

In [None]:
import pandas as pd
import numpy as np
import os

os.chdir(os.path.dirname(os.path.abspath('__file__')))
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir('..')
    
priority = pd.read_csv('outputs/priority_scores.csv')
print(f"Loaded {len(priority)} districts")

## Select Treatment Districts
Top 5 priority districts with diverse bottleneck types.

In [None]:
# Select top 5 treatment districts with diverse bottleneck types
treatment = priority.nsmallest(20, 'priority_rank').groupby('bottleneck_label').head(2).head(5)
treatment = treatment[['state', 'district', 'priority_rank', 'priority_score', 'bottleneck_label']].copy()
treatment['group'] = 'treatment'
print("Treatment Districts:")
treatment

## Match Control Districts
Select similar districts from lower priority tiers.

In [None]:
# Control: similar states, lower priority (rank 100-200)
control_pool = priority[(priority['priority_rank'] >= 100) & (priority['priority_rank'] <= 200)]

control_list = []
for _, t_row in treatment.iterrows():
    # Match by state if possible
    same_state = control_pool[control_pool['state'] == t_row['state']]
    if not same_state.empty:
        match = same_state.iloc[0]
    else:
        match = control_pool.iloc[len(control_list) % len(control_pool)]
    
    control_list.append({
        'state': match['state'],
        'district': match['district'],
        'priority_rank': match['priority_rank'],
        'priority_score': match['priority_score'],
        'bottleneck_label': match.get('bottleneck_label', 'NORMAL'),
        'group': 'control',
        'matched_to': t_row['district']
    })

control = pd.DataFrame(control_list)
print("Control Districts:")
control

## Create Pilot Regions File

In [None]:
# Combine and save
treatment['matched_to'] = treatment['district']  # Self-reference for treatment
pilot_regions = pd.concat([treatment, control], ignore_index=True)
pilot_regions.to_csv('outputs/pilot_regions.csv', index=False)
print(f"Saved {len(pilot_regions)} pilot regions")
pilot_regions

## Pilot KPIs
Primary metrics to track during the pilot.

In [None]:
kpis = {
    'primary': [
        ('bio_update_child', 'Weekly biometric updates for children', 'Increase 30%'),
        ('completion_rate_child', 'Updates / Demand ratio', 'Increase to >0.8'),
        ('update_backlog_child', 'Pending updates queue', 'Decrease 50%')
    ],
    'secondary': [
        ('demo_update_child', 'Demographic updates', 'Stable or increase'),
        ('failure_rate', 'Biometric capture failures', 'Decrease 10%')
    ]
}

print("=" * 50)
print("PILOT KPIs")
print("=" * 50)
print("\nPrimary:")
for name, desc, target in kpis['primary']:
    print(f"  • {name}: {desc} (Target: {target})")
print("\nSecondary:")
for name, desc, target in kpis['secondary']:
    print(f"  • {name}: {desc} (Target: {target})")