# Microplan Excel Validator

Run all cells (Shift+Enter) to start.

In [1]:
# Setup and imports
import subprocess, sys, os, shutil, importlib

if os.path.exists('requirements.txt'):
    subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt'], capture_output=True)

import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from collections import defaultdict
import base64

import validator as validator_module
importlib.reload(validator_module)
from validator import Validator

for folder in ['uploads', 'error']:
    if os.path.exists(folder):
        shutil.rmtree(folder)
    os.makedirs(folder, exist_ok=True)

validator = Validator()
print("Ready!")

Ready!


In [2]:
# Column Configuration

config_state = {
    'level_columns': [], 'target_columns': [], 'num_targets': 0,
    'facility_col': '', 'district_col': '', 'state_col': '',
    'alignment_mapping': {}, 'configured': False
}

level_boxes = []
target_boxes = []
out_config = widgets.Output()

# Boundary config widgets
first_level = widgets.Text(value='', placeholder='e.g., COUNTRY', description='Level 1:',
                           style={'description_width': '80px'}, layout=widgets.Layout(width='400px'))
level_boxes.append(first_level)
level_container = widgets.VBox([first_level])

btn_add_level = widgets.Button(description='+ Add Level', button_style='info', layout=widgets.Layout(width='120px'))

def add_level(btn):
    n = len(level_boxes) + 1
    box = widgets.Text(value='', placeholder='e.g., District', description=f'Level {n}:',
                       style={'description_width': '80px'}, layout=widgets.Layout(width='400px'))
    level_boxes.append(box)
    level_container.children = list(level_boxes)

btn_add_level.on_click(add_level)

num_targets = widgets.IntText(value=0, description='# Targets:', style={'description_width': '80px'},
                              layout=widgets.Layout(width='200px'))
target_container = widgets.VBox([])

def update_targets(change):
    target_boxes.clear()
    for i in range(change['new']):
        box = widgets.Text(value=f'target_{i+1}', description=f'Target {i+1}:',
                           style={'description_width': '80px'}, layout=widgets.Layout(width='400px'))
        target_boxes.append(box)
    target_container.children = target_boxes

num_targets.observe(update_targets, names='value')

# Facility config with mapping
facility_col = widgets.Text(value='Facility Name', description='Facility:', 
                            style={'description_width': '100px'}, layout=widgets.Layout(width='280px'))
facility_map = widgets.Text(value='Unidade Sanitaria', description='Maps to:',
                            style={'description_width': '70px'}, layout=widgets.Layout(width='220px'))

district_col = widgets.Text(value='District', description='District:',
                            style={'description_width': '100px'}, layout=widgets.Layout(width='280px'))
district_map = widgets.Text(value='Distrito', description='Maps to:',
                            style={'description_width': '70px'}, layout=widgets.Layout(width='220px'))

state_col = widgets.Text(value='State', description='State:',
                         style={'description_width': '100px'}, layout=widgets.Layout(width='280px'))
state_map = widgets.Text(value='Provincia', description='Maps to:',
                         style={'description_width': '70px'}, layout=widgets.Layout(width='220px'))

btn_save = widgets.Button(description='Save Config', button_style='success', layout=widgets.Layout(width='150px'))

def save_config(btn):
    with out_config:
        clear_output(wait=True)
        levels = [b.value.strip() for b in level_boxes if b.value.strip()]
        targets = [b.value.strip() for b in target_boxes if b.value.strip()]
        
        if not levels:
            display(HTML("<p style='color:red'>Enter at least one level!</p>"))
            return
        
        mapping = {}
        if facility_col.value.strip() and facility_map.value.strip():
            mapping[facility_col.value.strip()] = facility_map.value.strip()
        if district_col.value.strip() and district_map.value.strip():
            mapping[district_col.value.strip()] = district_map.value.strip()
        if state_col.value.strip() and state_map.value.strip():
            mapping[state_col.value.strip()] = state_map.value.strip()
        
        config_state.update({
            'level_columns': levels, 'target_columns': targets, 'num_targets': len(targets),
            'facility_col': facility_col.value.strip(), 'district_col': district_col.value.strip(),
            'state_col': state_col.value.strip(), 'alignment_mapping': mapping, 'configured': True
        })
        
        validator.set_columns(boundary_cols=levels, facility_cols=[facility_col.value.strip()] if facility_col.value.strip() else [],
                              target_cols=targets, num_targets=len(targets))
        
        map_str = ', '.join([f'{k}→{v}' for k,v in mapping.items()])
        display(HTML(f"""<div style='padding:10px; background:#d4edda; border-radius:5px;'>
            <b>Saved!</b><br>Levels: {', '.join(levels)}<br>Targets: {', '.join(targets) or 'none'}
            <br>Mapping: {map_str or 'none'}</div>"""))

btn_save.on_click(save_config)

display(widgets.VBox([
    widgets.HTML("<h3>Boundary File</h3>"),
    level_container, btn_add_level,
    widgets.HTML("<b>Targets:</b>"), num_targets, target_container,
    widgets.HTML("<h3>Facility File (with mapping to boundary)</h3>"),
    widgets.HBox([facility_col, facility_map]),
    widgets.HBox([district_col, district_map]),
    widgets.HBox([state_col, state_map]),
    btn_save, out_config
]))

VBox(children=(HTML(value='<h3>Boundary File</h3>'), VBox(children=(Text(value='', description='Level 1:', lay…

In [3]:
# File Upload & Validation

file_state = {'boundary_file': None, 'facility_file': None}
out_status = widgets.Output()
out_results = widgets.Output()
out_downloads = widgets.Output()


upload1 = widgets.FileUpload(accept='.xlsx,.xls,.csv', multiple=False)
upload2 = widgets.FileUpload(accept='.xlsx,.xls,.csv', multiple=False)

btn_validate = widgets.Button(description='VALIDATE', button_style='primary', layout=widgets.Layout(width='120px'))
btn_clear = widgets.Button(description='Clear', button_style='warning', layout=widgets.Layout(width='80px'))

def show_status(msg, color='black'):
    with out_status:
        clear_output(wait=True)
        display(HTML(f'<p style="color:{color}; font-weight:bold">{msg}</p>'))

def save_upload(uploader, key):
    if not uploader.value: return
    files = uploader.value
    info = files[0] if isinstance(files, tuple) else list(files.values())[0]
    name = info.name if hasattr(info, 'name') else info['name']
    content = info.content if hasattr(info, 'content') else info['content']
    path = os.path.join('uploads', name)
    with open(path, 'wb') as f: f.write(content)
    file_state[key] = path
    show_status(f'Loaded: {name}', 'green')

def on_validate(b):
    if not config_state['configured']:
        show_status('Configure columns first!', 'red')
        return
    if not file_state['boundary_file']:
        show_status('Upload boundary file!', 'red')
        return
    if not file_state['facility_file']:
        show_status('Upload facility file!', 'red')
        return
    
    validator.reset()
    show_status('Validating...', 'blue')
    all_issues = []
    summary = {'total': 0, 'errors': 0, 'warnings': 0, 'by_rule': defaultdict(int)}
    
    # Validate boundary file
    validator.set_columns(boundary_cols=config_state['level_columns'],
                          facility_cols=[config_state['facility_col']] if config_state['facility_col'] else [],
                          target_cols=config_state['target_columns'], num_targets=config_state['num_targets'])
    
    issues, s = validator.validate_file(file_state['boundary_file'])
    all_issues.extend(issues)
    summary['total'] += s['total']; summary['errors'] += s['errors']; summary['warnings'] += s['warnings']
    for r, c in s['by_rule'].items(): summary['by_rule'][r] += c
    
    # Get boundary data for alignment
    b_sheets = validator.read_file(file_state['boundary_file'])
    b_sheet = list(b_sheets.keys())[0]
    b_df = b_sheets[b_sheet]
    
    # Validate facility file
    fac_cols = [c for c in [config_state['facility_col'], config_state['district_col'], config_state['state_col']] if c]
    validator.set_columns(boundary_cols=fac_cols,
                          facility_cols=[config_state['facility_col']] if config_state['facility_col'] else [],
                          target_cols=[], num_targets=0)
    validator.set_alignment_mapping(config_state['alignment_mapping'])
    
    issues2, s2 = validator.validate_file(file_state['facility_file'])
    all_issues.extend(issues2)
    summary['total'] += s2['total']; summary['errors'] += s2['errors']; summary['warnings'] += s2['warnings']
    for r, c in s2['by_rule'].items(): summary['by_rule'][r] += c
    
    # Run alignment check
    if config_state['alignment_mapping']:
        f_sheets = validator.read_file(file_state['facility_file'])
        f_sheet = list(f_sheets.keys())[0]
        f_df = f_sheets[f_sheet]
        f_label = os.path.basename(file_state['facility_file'])
        
        if f_label not in validator.row_status:
            validator.init_row_status(f_df, f_label)
        
        align_issues = validator.check_alignment(b_df, f_df, b_sheet, f_label)
        all_issues.extend(align_issues)
        summary['total'] += len(align_issues)
        summary['errors'] += len([i for i in align_issues if i['severity'] == 'error'])
        for i in align_issues: summary['by_rule'][i['rule']] += 1
        
        if file_state['facility_file'] in validator.file_data:
            for sn, df in validator.file_data[file_state['facility_file']].items():
                if f_label in validator.row_status:
                    for idx, info in validator.row_status[f_label].items():
                        if idx in df.index:
                            df.loc[idx, 'VALIDATION_STATUS'] = info['status']
                            df.loc[idx, 'VALIDATION_ERRORS'] = '; '.join(info['errors'])
    
    output_files = validator.save_validated_files('error')
    display_results(all_issues, summary)
    display_downloads(output_files)
    show_status('Done!', 'green')

def on_clear(b):
    file_state['boundary_file'] = None
    file_state['facility_file'] = None
    validator.reset()
    with out_results: clear_output()
    with out_downloads: clear_output()
    show_status('Cleared', 'orange')

def display_results(issues, summary):
    with out_results:
        clear_output(wait=True)
        p, f = validator.get_stats()
        color = '#27ae60' if summary['errors'] == 0 else '#e74c3c'
        
        html = f'''<div style="padding:10px; background:#f0f0f0; border-left:4px solid {color}; margin:10px 0;">
            <b style="color:{color}">{'All Passed!' if summary['errors']==0 else 'Issues Found'}</b>
            | PASS: {p} | FAIL: {f} | Warnings: {summary['warnings']}</div>'''
        
        if summary['by_rule']:
            html += '<b>By Rule:</b> ' + ', '.join([f'{r}: {c}' for r,c in summary['by_rule'].items()])
        
        if issues:
            html += '<div style="max-height:250px; overflow-y:auto; margin-top:10px;">'
            html += '<table style="width:100%; font-size:11px; border-collapse:collapse;">'
            html += '<tr style="background:#333; color:white;"><th>Sev</th><th>Rule</th><th>Sheet</th><th>Col</th><th>Row</th><th>Value</th><th>Message</th></tr>'
            for i in issues[:50]:
                c = '#c00' if i['severity']=='error' else '#d80'
                html += f'<tr><td style="color:{c}">{i["severity"][:3].upper()}</td><td>{i["rule"]}</td>'
                html += f'<td>{str(i["sheet"])[:20]}</td><td>{str(i["column"])[:12]}</td><td>{i["row"]}</td>'
                html += f'<td>{str(i["value"])[:15]}</td><td>{i["message"]}</td></tr>'
            html += '</table></div>'
        display(HTML(html))

def display_downloads(files):
    with out_downloads:
        clear_output(wait=True)
        for fp in files:
            name = os.path.basename(fp)
            with open(fp, 'rb') as f: b64 = base64.b64encode(f.read()).decode()
            display(HTML(f'<a href="data:application/octet-stream;base64,{b64}" download="{name}" '
                         f'style="display:inline-block; padding:8px 15px; background:#4caf50; color:white; '
                         f'text-decoration:none; border-radius:4px; margin:5px 0;">Download {name}</a>'))

upload1.observe(lambda c: save_upload(upload1, 'boundary_file'), names='value')
upload2.observe(lambda c: save_upload(upload2, 'facility_file'), names='value')
btn_validate.on_click(on_validate)
btn_clear.on_click(on_clear)

display(widgets.VBox([
    widgets.HTML('<h3>Upload & Validate</h3>'),
    widgets.HTML('<b>Boundary File:</b>'), upload1,
    widgets.HTML('<b>Facility File:</b>'), upload2,
    out_status,
    widgets.HBox([btn_validate, btn_clear]),
    out_results, out_downloads
]))

VBox(children=(HTML(value='<h3>Upload & Validate</h3>'), HTML(value='<b>Boundary File:</b>'), FileUpload(value…

## Transformation

After validation passes (0 errors), configure and run the transformation below to generate a SQLite database.

In [None]:
# ── Cell 5: Transformation UI ──────────────────────────────────
import importlib, os, sys, base64, time, datetime
import openpyxl
from openpyxl.utils import get_column_letter
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets

out_transform = widgets.Output()
out_transform_status = widgets.Output()

# ── Live progress log widget ───────────────────────────────────
progress_log = widgets.HTML(value='')

def log_progress(msg):
    """Append a timestamped line to the live progress log."""
    ts = time.strftime('%H:%M:%S')
    current = progress_log.value
    progress_log.value = current + f"<div style='font-family:monospace; font-size:12px; color:#555;'>[{ts}] {msg}</div>"

# ── Gate check ─────────────────────────────────────────────────
pass_count, fail_count = validator.get_stats()
gate_ok = (fail_count == 0 and pass_count > 0)

if not gate_ok:
    with out_transform_status:
        display(HTML(
            "<div style='padding:12px; background:#ffeeba; border-left:4px solid #e74c3c; margin:10px 0;'>"
            f"<b style='color:#e74c3c'>Transformation blocked.</b> "
            f"Validation has {fail_count} failed row(s) and {pass_count} passed row(s).<br>"
            "Fix all validation errors first, then re-run this cell."
            "</div>"
        ))

# ── Auto-populated info (read-only display) ────────────────────
info_html = "<h3>Transformation Configuration</h3>"
if config_state.get('configured'):
    levels = config_state.get('level_columns', [])
    targets = config_state.get('target_columns', [])
    fac = config_state.get('facility_col', '')
    mapping = config_state.get('alignment_mapping', {})
    b_file = file_state.get('boundary_file', 'N/A')
    f_file = file_state.get('facility_file', 'N/A')
    info_html += f"""
    <div style='padding:10px; background:#e8f4fd; border-radius:5px; margin-bottom:10px;'>
        <b>From Validation Config (auto-populated):</b><br>
        Boundary levels: {', '.join(levels)}<br>
        Target columns: {', '.join(targets) or 'none'}<br>
        Facility column: {fac or 'none'}<br>
        Mapping: {', '.join(f'{k}&rarr;{v}' for k,v in mapping.items()) or 'none'}<br>
        Boundary file: {os.path.basename(b_file) if b_file else 'N/A'}<br>
        Facility file: {os.path.basename(f_file) if f_file else 'N/A'}<br>
        <em>Column letters and province name will be auto-detected from uploaded files.</em>
    </div>"""

# ── Minimal settings widgets ───────────────────────────────────
style = {'description_width': '140px'}
w_db_name = widgets.Text(
    value='microplan.db', description='DB Filename:',
    style=style, layout=widgets.Layout(width='400px'))
w_country_code = widgets.Text(
    value='mz', description='Country Code:',
    style=style, layout=widgets.Layout(width='400px'))
w_project_name = widgets.Text(
    value='IRS', description='Project Name:',
    style=style, layout=widgets.Layout(width='400px'))

# ── Campaign date pickers ──────────────────────────────────────
today = datetime.date.today()
w_campaign_start = widgets.DatePicker(
    description='Campaign Start:',
    value=today,
    style=style, layout=widgets.Layout(width='300px'))
w_campaign_end = widgets.DatePicker(
    description='Campaign End:',
    value=today + datetime.timedelta(days=30),
    style=style, layout=widgets.Layout(width='300px'))

w_boundary_start_row = widgets.IntText(
    value=1, description='Boundary Header Row:',
    style=style, layout=widgets.Layout(width='250px'))
w_facility_start_row = widgets.IntText(
    value=1, description='Facility Header Row:',
    style=style, layout=widgets.Layout(width='250px'))

# ── Checklist upload (optional) ────────────────────────────────
w_checklist = widgets.FileUpload(
    accept='.xlsx,.xls', multiple=False, description='Checklist File')

btn_transform = widgets.Button(
    description='TRANSFORM', button_style='success',
    layout=widgets.Layout(width='150px', height='40px'),
    disabled=not gate_ok)


def detect_columns(file_path, header_row=1):
    """Read Excel header row → {column_name: column_letter} mapping."""
    wb = openpyxl.load_workbook(file_path, data_only=True, read_only=True)
    ws = None
    # Pick the first visible sheet
    for s in wb.worksheets:
        if s.sheet_state == 'visible':
            ws = s
            break
    if ws is None:
        ws = wb.active
    mapping = {}
    for row in ws.iter_rows(min_row=header_row, max_row=header_row):
        for cell in row:
            if cell.value is not None:
                col_letter = get_column_letter(cell.column)
                mapping[str(cell.value).strip()] = col_letter
    wb.close()
    return mapping


def on_transform(b):
    # Reset progress log and disable button during run
    progress_log.value = ''
    btn_transform.disabled = True
    btn_transform.description = 'RUNNING...'

    with out_transform:
        clear_output(wait=True)

        if not config_state.get('configured'):
            display(HTML("<p style='color:red'><b>Column configuration not saved!</b></p>"))
            btn_transform.disabled = False
            btn_transform.description = 'TRANSFORM'
            return
        if not file_state.get('boundary_file') or not file_state.get('facility_file'):
            display(HTML("<p style='color:red'><b>Boundary and Facility files required!</b></p>"))
            btn_transform.disabled = False
            btn_transform.description = 'TRANSFORM'
            return

        # Validate dates
        if not w_campaign_start.value or not w_campaign_end.value:
            display(HTML("<p style='color:red'><b>Campaign start and end dates are required!</b></p>"))
            btn_transform.disabled = False
            btn_transform.description = 'TRANSFORM'
            return
        if w_campaign_end.value < w_campaign_start.value:
            display(HTML("<p style='color:red'><b>Campaign end date must be after start date!</b></p>"))
            btn_transform.disabled = False
            btn_transform.description = 'TRANSFORM'
            return

        try:
            log_progress("Starting transformation...")
            level_columns = config_state.get('level_columns', [])
            target_columns = config_state.get('target_columns', [])
            header_row = w_boundary_start_row.value

            # ── Auto-detect column letters from boundary file header ──
            log_progress("Detecting column letters from Excel headers...")
            header_map = detect_columns(file_state['boundary_file'], header_row)

            boundary_columns = {}   # {level_num: col_letter}
            missing = []
            for i, level_name in enumerate(level_columns):
                if i == 0:  # Level 1 (Country) - no Excel column
                    continue
                level_num = i + 1
                if level_name in header_map:
                    boundary_columns[level_num] = header_map[level_name]
                else:
                    missing.append(level_name)

            target_column_letters = {}
            for t_name in target_columns:
                if t_name in header_map:
                    target_column_letters[t_name] = header_map[t_name]
                else:
                    missing.append(t_name)

            if missing:
                display(HTML(
                    f"<p style='color:red'><b>Columns not found in boundary file header row {header_row}:</b> "
                    f"{', '.join(missing)}<br>"
                    f"Found headers: {', '.join(header_map.keys())}</p>"))
                btn_transform.disabled = False
                btn_transform.description = 'TRANSFORM'
                return

            log_progress(f"Detected {len(boundary_columns)} boundary columns, {len(target_column_letters)} target columns.")

            # ── Auto-detect province name from the first data row ──
            log_progress("Detecting province name from data...")
            province_col = boundary_columns.get(2)
            province_name = ''
            if province_col:
                wb_tmp = openpyxl.load_workbook(file_state['boundary_file'], data_only=True, read_only=True)
                ws_tmp = None
                for s in wb_tmp.worksheets:
                    if s.sheet_state == 'visible':
                        ws_tmp = s
                        break
                if ws_tmp is None:
                    ws_tmp = wb_tmp.active
                for row in ws_tmp.iter_rows(min_row=header_row + 1, max_row=header_row + 1):
                    for cell in row:
                        if get_column_letter(cell.column) == province_col and cell.value:
                            province_name = str(cell.value).strip()
                            break
                wb_tmp.close()

            if not province_name:
                display(HTML(
                    "<p style='color:red'><b>Could not detect province name from boundary file.</b></p>"))
                btn_transform.disabled = False
                btn_transform.description = 'TRANSFORM'
                return

            log_progress(f"Province detected: {province_name}")

            # ── Format campaign dates ──
            date_fmt = '%d/%m/%Y'
            campaign_start = w_campaign_start.value.strftime(date_fmt)
            campaign_end = w_campaign_end.value.strftime(date_fmt)
            log_progress(f"Campaign: {campaign_start} to {campaign_end}")

            # ── Show detected mapping ──
            detect_html = "<div style='padding:8px; background:#f0f8e8; border-radius:4px; margin:8px 0; font-size:12px;'>"
            detect_html += f"<b>Auto-detected:</b> Province = <b>{province_name}</b><br>"
            detect_html += "Columns: " + ", ".join(
                f"{name}={boundary_columns.get(i+1, '?')}"
                for i, name in enumerate(level_columns) if i > 0
            )
            if target_column_letters:
                detect_html += "<br>Targets: " + ", ".join(
                    f"{n}={c}" for n, c in target_column_letters.items()
                )
            detect_html += f"<br>Campaign: {campaign_start} &rarr; {campaign_end}"
            detect_html += "</div>"
            display(HTML(detect_html))

            # ── Build user inputs ──
            user_inputs = {
                'db_name': w_db_name.value.strip() or 'microplan.db',
                'country_code': w_country_code.value.strip() or 'mz',
                'province_name': province_name,
                'province_code': '',  # auto-generated
                'project_name': w_project_name.value.strip() or 'IRS',
                'boundary_start_row': header_row,
                'facility_start_row': w_facility_start_row.value,
                'campaign_start_date': campaign_start,
                'campaign_end_date': campaign_end,
            }

            config_with_cols = dict(config_state)
            config_with_cols['boundary_columns'] = boundary_columns
            config_with_cols['target_column_letters'] = target_column_letters

            # ── Reload modules (clears stale state) ──
            log_progress("Reloading modules...")
            _modules_to_reload = [
                'constants.constants',
                'models.db.Base',
                'models.db.Boundary',
                'models.db.Facility',
                'models.db',
                'models',
                'utils.common',
                'utils.boundary',
                'utils.facility',
                'utils',
                'transform',
            ]
            for mod_name in _modules_to_reload:
                if mod_name in sys.modules:
                    importlib.reload(sys.modules[mod_name])

            # ── Build and apply config AFTER reload ──
            log_progress("Applying configuration...")
            from constants.constants import TransformConfig
            cfg = TransformConfig.from_notebook(config_with_cols, user_inputs)
            cfg.apply_to_module()

            # ── Import and run ──
            from transform import run_transform

            checklist_path = None
            if w_checklist.value:
                files = w_checklist.value
                info = files[0] if isinstance(files, tuple) else list(files.values())[0]
                name = info.name if hasattr(info, 'name') else info['name']
                content = info.content if hasattr(info, 'content') else info['content']
                checklist_path = os.path.join('uploads', name)
                with open(checklist_path, 'wb') as f:
                    f.write(content)
            elif os.path.exists('checklist_targets.xlsx'):
                checklist_path = 'checklist_targets.xlsx'

            log_progress("Starting data transformation...")
            result = run_transform(
                boundary_file=file_state['boundary_file'],
                facility_file=file_state['facility_file'],
                checklist_file=checklist_path,
                progress=log_progress,
            )

            # ── Show results + download ──
            db_path = result['db_path']
            html = f"""
            <div style='padding:12px; background:#d4edda; border-left:4px solid #27ae60; margin:10px 0;'>
                <b style='color:#27ae60; font-size:16px;'>Transformation Complete!</b><br><br>
                <table style='font-size:13px;'>
                    <tr><td><b>Database:</b></td><td>{db_path}</td></tr>
                    <tr><td><b>Province:</b></td><td>{province_name}</td></tr>
                    <tr><td><b>Campaign:</b></td><td>{campaign_start} &rarr; {campaign_end}</td></tr>
                    <tr><td><b>Boundaries created:</b></td><td>{result['boundaries_count']}</td></tr>
                    <tr><td><b>Facilities created:</b></td><td>{result['facilities_count']}</td></tr>
                </table>
            </div>"""

            if os.path.exists(db_path):
                with open(db_path, 'rb') as f:
                    b64 = base64.b64encode(f.read()).decode()
                html += (
                    f'<a href="data:application/octet-stream;base64,{b64}" '
                    f'download="{os.path.basename(db_path)}" '
                    f'style="display:inline-block; padding:10px 20px; background:#2196F3; color:white; '
                    f'text-decoration:none; border-radius:4px; margin:8px 4px; font-weight:bold;">'
                    f'Download {os.path.basename(db_path)}</a>'
                )

            display(HTML(html))

        except Exception as ex:
            import traceback
            log_progress(f"ERROR: {ex}")
            display(HTML(
                f"<div style='padding:10px; background:#ffc7ce; border-radius:5px;'>"
                f"<b style='color:red'>Transformation Error:</b><br>"
                f"<pre>{traceback.format_exc()}</pre></div>"
            ))
        finally:
            btn_transform.disabled = False
            btn_transform.description = 'TRANSFORM'


btn_transform.on_click(on_transform)

display(widgets.VBox([
    widgets.HTML(info_html),
    out_transform_status,
    widgets.HTML("<b>Settings:</b>"),
    w_db_name, w_country_code, w_project_name,
    widgets.HTML("<b>Campaign Dates:</b>"),
    w_campaign_start, w_campaign_end,
    widgets.HTML("<b>Excel Row Settings:</b>"),
    w_boundary_start_row, w_facility_start_row,
    widgets.HTML("<b>Optional Checklist File:</b>"), w_checklist,
    widgets.HTML("<br>"),
    btn_transform,
    widgets.HTML("<b>Progress:</b>"),
    progress_log,
    out_transform,
]))

In [5]:
# ── Cell 6: CSV Export & Downloads ─────────────────────────────
import sqlite3, csv, os, base64
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets

out_csv = widgets.Output()

btn_export_csv = widgets.Button(
    description='Export CSVs', button_style='info',
    layout=widgets.Layout(width='150px', height='35px'))


def export_data_to_csv(db_file, output_folder):
    """Export all tables from the SQLite DB to CSV files."""
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    # Get all table names
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = [row[0] for row in cursor.fetchall()]

    os.makedirs(output_folder, exist_ok=True)
    exported = []

    for table in tables:
        csv_path = os.path.join(output_folder, f'{table}.csv')
        with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            cursor.execute(f'SELECT * FROM [{table}]')
            writer.writerow([desc[0] for desc in cursor.description])
            writer.writerows(cursor)
        exported.append(csv_path)

    conn.close()
    return exported


def on_export_csv(b):
    with out_csv:
        clear_output(wait=True)

        # Find the DB file from the transform widget
        db_name = w_db_name.value.strip() if 'w_db_name' in dir() else 'microplan.db'
        if not db_name:
            db_name = 'microplan.db'

        if not os.path.exists(db_name):
            display(HTML(
                "<p style='color:red'><b>Database file not found.</b> "
                "Run the transformation first.</p>"
            ))
            return

        try:
            output_folder = f"csv_export_{os.path.splitext(db_name)[0]}"
            csv_files = export_data_to_csv(db_name, output_folder)

            html = ("<div style='padding:12px; background:#e8f4fd; border-left:4px solid #2196F3; "
                    "margin:10px 0;'>"
                    f"<b style='color:#2196F3;'>Exported {len(csv_files)} CSV file(s)</b><br><br>")

            for fp in csv_files:
                fname = os.path.basename(fp)
                with open(fp, 'rb') as f:
                    b64 = base64.b64encode(f.read()).decode()
                html += (
                    f'<a href="data:text/csv;base64,{b64}" download="{fname}" '
                    f'style="display:inline-block; padding:8px 15px; background:#4caf50; color:white; '
                    f'text-decoration:none; border-radius:4px; margin:4px 2px;">'
                    f'Download {fname}</a> '
                )

            html += "</div>"
            display(HTML(html))

        except Exception as ex:
            import traceback
            display(HTML(
                f"<div style='padding:10px; background:#ffc7ce; border-radius:5px;'>"
                f"<b style='color:red'>CSV Export Error:</b><br>"
                f"<pre>{traceback.format_exc()}</pre></div>"
            ))


btn_export_csv.on_click(on_export_csv)

display(widgets.VBox([
    widgets.HTML("<h3>Export to CSV</h3>"),
    widgets.HTML("<p>Export all database tables to individual CSV files for inspection.</p>"),
    btn_export_csv,
    out_csv,
]))

VBox(children=(HTML(value='<h3>Export to CSV</h3>'), HTML(value='<p>Export all database tables to individual C…