# Microplan Excel Validation Tool

Run all cells below (Shift+Enter) to start the validator.

**Important:** If you see errors about missing methods, restart the kernel (Kernel > Restart) and run all cells again.

In [None]:
# Cell 1: Install Requirements & Setup
import subprocess, sys, os, shutil, importlib

# Install from requirements.txt
print("Installing requirements from requirements.txt...")
if os.path.exists('requirements.txt'):
    try:
        result = subprocess.run(
            [sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'],
            capture_output=True, text=True
        )
        if result.returncode == 0:
            print("Requirements installed!")
        else:
            print("Some packages may have failed. Continuing anyway...")
            print(result.stderr[-500:] if result.stderr else "")
    except Exception as e:
        print(f"Warning: {e}. Continuing anyway...")
else:
    print("Warning: requirements.txt not found!")

# Imports
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from collections import defaultdict
import base64

# Import validator with forced reload to get latest changes
import validator as validator_module
importlib.reload(validator_module)
from validator import Validator

# Clear and recreate folders
for folder in ['uploads', 'error']:
    if os.path.exists(folder):
        shutil.rmtree(folder)
    os.makedirs(folder, exist_ok=True)

# Initialize validator
validator = Validator()

# Verify the new methods exist
if hasattr(validator, 'set_columns'):
    print("Setup complete! Validator loaded with set_columns method.")
else:
    print("ERROR: Validator is outdated. Please restart the kernel (Kernel > Restart) and run all cells again.")

In [None]:
# Cell 2: Column Configuration UI

# ==================== STATE ====================
config_state = {
    'level_columns': [],      # List of boundary/hierarchy column names
    'target_columns': [],     # List of target column names
    'num_targets': 0,         # Number of targets
    'configured': False       # Whether config is complete
}

# ==================== WIDGETS ====================
level_boxes = []  # Dynamic list of level input boxes
target_boxes = [] # Dynamic list of target input boxes
facility_box = None

out_config_status = widgets.Output()

# Title
config_title = widgets.HTML("<h2 style='color:#2c3e50; border-bottom:2px solid #3498db;'>Step 1: Configure Excel Columns</h2>")

# ==================== LEVEL CONFIGURATION ====================
level_section = widgets.HTML("<h4>Boundary/Hierarchy Levels</h4><p style='color:#666;'>Enter the exact column header names from your Excel file (in order from highest to lowest level)</p>")

# First level input
first_level = widgets.Text(
    value='',
    placeholder='e.g., Province or COUNTRY',
    description='Level 1:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='400px')
)
level_boxes.append(first_level)

# Container for level inputs
level_container = widgets.VBox([first_level])

# Add Level button
btn_add_level = widgets.Button(
    description='+ Add Level',
    button_style='info',
    icon='plus',
    layout=widgets.Layout(width='120px')
)

def add_level(btn):
    level_num = len(level_boxes) + 1
    new_box = widgets.Text(
        value='',
        placeholder=f'e.g., District or Locality',
        description=f'Level {level_num}:',
        style={'description_width': '80px'},
        layout=widgets.Layout(width='400px')
    )
    level_boxes.append(new_box)
    level_container.children = list(level_boxes)

btn_add_level.on_click(add_level)

# ==================== FACILITY CONFIGURATION ====================
facility_section = widgets.HTML("<h4>Facility Column (Optional)</h4><p style='color:#666;'>Enter the facility/site column header name if your data has facilities</p>")

facility_box = widgets.Text(
    value='',
    placeholder='e.g., Facility Name or Unidade Sanitaria',
    description='Facility:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='400px')
)

# ==================== TARGET CONFIGURATION ====================
target_section = widgets.HTML("<h4>Target Columns</h4><p style='color:#666;'>Enter the number of target columns, then specify each column name</p>")

num_targets_input = widgets.IntText(
    value=0,
    description='# Targets:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='200px')
)

# Container for target column inputs
target_container = widgets.VBox([])

def update_target_inputs(change):
    num = change['new']
    target_boxes.clear()
    
    if num > 0:
        for i in range(num):
            box = widgets.Text(
                value='',
                placeholder=f'e.g., target_{i+1} or population',
                description=f'Target {i+1}:',
                style={'description_width': '80px'},
                layout=widgets.Layout(width='400px')
            )
            target_boxes.append(box)
        target_container.children = target_boxes
    else:
        target_container.children = []

num_targets_input.observe(update_target_inputs, names='value')

# ==================== SAVE CONFIG BUTTON ====================
btn_save_config = widgets.Button(
    description='Save Configuration',
    button_style='success',
    icon='check',
    layout=widgets.Layout(width='180px', height='40px')
)

def save_config(btn):
    with out_config_status:
        clear_output(wait=True)
        
        # Collect level columns (non-empty)
        levels = [box.value.strip() for box in level_boxes if box.value.strip()]
        
        # Collect facility column
        facility = [facility_box.value.strip()] if facility_box.value.strip() else []
        
        # Collect target columns (non-empty)
        targets = [box.value.strip() for box in target_boxes if box.value.strip()]
        
        # Validate
        if not levels:
            display(HTML("<p style='color:red; font-weight:bold;'>Please enter at least one boundary/hierarchy level column!</p>"))
            return
        
        # Save to state
        config_state['level_columns'] = levels
        config_state['target_columns'] = targets
        config_state['num_targets'] = len(targets)
        config_state['configured'] = True
        
        # Configure validator
        validator.set_columns(
            boundary_cols=levels,
            facility_cols=facility,
            target_cols=targets,
            num_targets=len(targets)
        )
        
        # Show summary
        html = f"""
        <div style='padding:15px; background:#d4edda; border-radius:8px; border:2px solid #28a745;'>
            <h4 style='color:#155724; margin:0 0 10px 0;'>Configuration Saved!</h4>
            <p><b>Boundary Levels:</b> {', '.join(levels)}</p>
            <p><b>Facility Column:</b> {facility[0] if facility else '(none)'}</p>
            <p><b>Target Columns ({len(targets)}):</b> {', '.join(targets) if targets else '(none)'}</p>
            <p style='color:#155724; margin-top:10px;'>Proceed to Step 2 to upload and validate your file.</p>
        </div>
        """
        display(HTML(html))

btn_save_config.on_click(save_config)

# ==================== BUILD CONFIG UI ====================
config_ui = widgets.VBox([
    config_title,
    level_section,
    level_container,
    btn_add_level,
    target_section,
    num_targets_input,
    target_container,
    widgets.HTML("<br>"),
    btn_save_config,
    out_config_status
], layout=widgets.Layout(padding='15px'))

display(config_ui)

In [None]:
# Cell 3: File Upload & Validation UI

# ==================== STATE ====================
file_state = {
    'boundary_file': None,
    'facility_file': None
}

# ==================== WIDGETS ====================
out_status = widgets.Output()
out_results = widgets.Output()
out_downloads = widgets.Output()

mode = widgets.RadioButtons(
    options=['Single File (Excel with multiple sheets or CSV)',
             'Two Files (separate Boundary & Facility files - second file optional)'],
    value='Single File (Excel with multiple sheets or CSV)',
    description='', layout=widgets.Layout(width='100%')
)

upload1 = widgets.FileUpload(accept='.xlsx,.xls,.csv', multiple=False)
upload2 = widgets.FileUpload(accept='.xlsx,.xls,.csv', multiple=False)
label1 = widgets.HTML('<b>Upload File (Excel or CSV):</b>')
label2 = widgets.HTML('<b>Upload Facility File (Optional):</b>')
box2 = widgets.VBox([label2, upload2], layout=widgets.Layout(display='none'))

dd_boundary = widgets.Dropdown(options=['(auto-detect)'], value='(auto-detect)', description='Boundary Sheet:')
dd_facility = widgets.Dropdown(options=['(auto-detect)'], value='(auto-detect)', description='Facility Sheet:')
sheet_box = widgets.VBox([dd_boundary, dd_facility], layout=widgets.Layout(display='none'))

btn_validate = widgets.Button(description='VALIDATE', button_style='primary', icon='check',
                               layout=widgets.Layout(width='150px', height='40px'))
btn_clear = widgets.Button(description='Clear', button_style='warning', icon='trash',
                            layout=widgets.Layout(width='100px', height='40px'))

# ==================== HELPER FUNCTIONS ====================

def show_status(msg, color='black'):
    with out_status:
        clear_output(wait=True)
        display(HTML(f'<p style="color:{color}; font-weight:bold; font-size:14px;">{msg}</p>'))

def is_csv(filename):
    return filename.lower().endswith('.csv')

def create_download_link(filepath):
    filename = os.path.basename(filepath)
    with open(filepath, 'rb') as f:
        data = f.read()
    b64 = base64.b64encode(data).decode()
    mime = 'text/csv' if filepath.endswith('.csv') else 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
    return f'''
    <div style="margin:10px 0; padding:15px; background:#e8f5e9; border-radius:8px; border:2px solid #4caf50;">
        <div style="margin-bottom:10px;">
            <b style="color:#2e7d32;">Saved:</b> <code>{filepath}</code>
        </div>
        <a href="data:{mime};base64,{b64}" download="{filename}"
           style="display:inline-block; padding:12px 25px; background:#4caf50; color:white; 
                  text-decoration:none; border-radius:5px; font-weight:bold;">
            DOWNLOAD: {filename}
        </a>
    </div>'''

# ==================== EVENT HANDLERS ====================

def on_mode_change(change):
    if 'Two' in change['new']:
        box2.layout.display = 'block'
        label1.value = '<b>Upload Boundary File:</b>'
    else:
        box2.layout.display = 'none'
        label1.value = '<b>Upload File (Excel or CSV):</b>'

def save_upload(uploader, key):
    if not uploader.value:
        return
    files = uploader.value
    file_info = files[0] if isinstance(files, tuple) else list(files.values())[0]
    name = file_info.name if hasattr(file_info, 'name') else file_info['name']
    content = file_info.content if hasattr(file_info, 'content') else file_info['content']
    path = os.path.join('uploads', name)
    with open(path, 'wb') as f:
        f.write(content)
    file_state[key] = path
    try:
        if is_csv(name):
            sheet_box.layout.display = 'none'
            show_status(f'Loaded CSV: {name}', 'green')
        else:
            xls = pd.ExcelFile(path)
            sheets = ['(auto-detect)'] + xls.sheet_names
            if 'boundary' in key:
                dd_boundary.options = sheets
                if 'Single' in mode.value:
                    dd_facility.options = sheets
            else:
                dd_facility.options = sheets
            sheet_box.layout.display = 'block'
            show_status(f'Loaded Excel: {name} ({len(xls.sheet_names)} sheets)', 'green')
    except Exception as e:
        show_status(f'Error: {e}', 'red')

def on_upload1(change): save_upload(upload1, 'boundary_file')
def on_upload2(change): save_upload(upload2, 'facility_file')

def on_clear(b):
    file_state['boundary_file'] = None
    file_state['facility_file'] = None
    dd_boundary.options = dd_facility.options = ['(auto-detect)']
    sheet_box.layout.display = 'none'
    validator.reset()
    with out_results: clear_output()
    with out_downloads: clear_output()
    show_status('Cleared', 'orange')

def on_validate(b):
    # Check if configuration is done
    if not config_state['configured']:
        show_status('Please complete Step 1 (Configure Excel Columns) first!', 'red')
        return
    
    validator.reset()
    
    # Re-apply column configuration (in case validator was reset)
    validator.set_columns(
        boundary_cols=config_state['level_columns'],
        facility_cols=config_state['facility_columns'],
        target_cols=config_state['target_columns'],
        num_targets=config_state['num_targets']
    )
    
    files = []
    if 'Two' in mode.value:
        if file_state['boundary_file']: files.append(file_state['boundary_file'])
        if file_state['facility_file']: files.append(file_state['facility_file'])
        if not files:
            show_status('Please upload at least one file!', 'red')
            return
    else:
        if not file_state['boundary_file']:
            show_status('Please upload a file first!', 'red')
            return
        files = [file_state['boundary_file']]
    
    show_status('Validating...', 'blue')
    b_sheet = None if dd_boundary.value == '(auto-detect)' else dd_boundary.value
    f_sheet = None if dd_facility.value == '(auto-detect)' else dd_facility.value
    
    all_issues = []
    total_summary = {'total': 0, 'errors': 0, 'warnings': 0, 'by_rule': defaultdict(int)}
    
    for fp in files:
        issues, summary = validator.validate_file(fp, b_sheet, f_sheet)
        all_issues.extend(issues)
        total_summary['total'] += summary['total']
        total_summary['errors'] += summary['errors']
        total_summary['warnings'] += summary['warnings']
        for r, c in summary['by_rule'].items():
            total_summary['by_rule'][r] += c
    
    if 'Two' in mode.value and file_state['boundary_file'] and file_state['facility_file']:
        try:
            b_df = pd.read_csv(file_state['boundary_file']) if is_csv(file_state['boundary_file']) else pd.read_excel(file_state['boundary_file'], sheet_name=b_sheet or 0)
            f_df = pd.read_csv(file_state['facility_file']) if is_csv(file_state['facility_file']) else pd.read_excel(file_state['facility_file'], sheet_name=f_sheet or 0)
            align_issues = validator.check_alignment(b_df, f_df, os.path.basename(file_state['boundary_file']), os.path.basename(file_state['facility_file']))
            all_issues.extend(align_issues)
            for i in align_issues:
                total_summary['total'] += 1
                total_summary['errors' if i['severity'] == 'error' else 'warnings'] += 1
                total_summary['by_rule'][i['rule']] += 1
        except:
            pass
    
    output_files = validator.save_validated_files('error')
    display_results(all_issues, total_summary)
    display_downloads(output_files)
    show_status('Validation Complete!', 'green')

# ==================== DISPLAY FUNCTIONS ====================

def display_results(issues, summary):
    with out_results:
        clear_output(wait=True)
        color = '#27ae60' if summary['errors'] == 0 else '#e74c3c'
        status = 'All Passed!' if summary['errors'] == 0 and summary['warnings'] == 0 else ('Warnings Only' if summary['errors'] == 0 else 'Issues Found')
        pass_count, fail_count = validator.get_stats()
        
        # Show configured columns
        config_html = f"""
        <div style='padding:10px; background:#e3f2fd; border-radius:5px; margin-bottom:15px;'>
            <b>Validating columns:</b><br>
            Levels: {', '.join(config_state['level_columns'])}<br>
            Facility: {config_state['facility_columns'][0] if config_state['facility_columns'] else '(none)'}<br>
            Targets: {', '.join(config_state['target_columns']) if config_state['target_columns'] else '(none)'}
        </div>
        """
        
        html = f'''
        {config_html}
        <div style="padding:15px; background:#f0f0f0; border-radius:8px; border-left:5px solid {color};">
            <h3 style="color:{color}; margin:0 0 10px 0;">{status}</h3>
            <div style="display:flex; gap:15px; flex-wrap:wrap;">
                <span style="padding:8px 15px; background:#27ae60; color:white; border-radius:4px;"><b>PASS:</b> {pass_count}</span>
                <span style="padding:8px 15px; background:#c0392b; color:white; border-radius:4px;"><b>FAIL:</b> {fail_count}</span>
                <span style="padding:8px 15px; background:#d68910; color:white; border-radius:4px;"><b>Warnings:</b> {summary['warnings']}</span>
            </div>
        </div>
        <h4>Issues by Rule:</h4><ul>'''
        for r, c in summary['by_rule'].items():
            html += f'<li><b>{r}:</b> {c}</li>'
        html += '</ul>' if summary['by_rule'] else '<p style="color:green;">No issues!</p>'
        if issues:
            html += '''<h4>Details (first 50):</h4>
            <div style="max-height:300px; overflow-y:auto;">
            <table style="width:100%; border-collapse:collapse; font-size:11px;">
            <tr style="background:#2c3e50; color:white;">
                <th style="padding:6px;">Sev</th><th>Rule</th><th>Sheet</th><th>Column</th><th>Row</th><th>Value</th><th>Message</th>
            </tr>'''
            for i in issues[:50]:
                c = '#c0392b' if i['severity'] == 'error' else '#d68910'
                html += f'''<tr>
                    <td style="padding:4px; border:1px solid #ddd; color:{c}; font-weight:bold;">{i['severity'][:3].upper()}</td>
                    <td style="border:1px solid #ddd;">{i['rule']}</td>
                    <td style="border:1px solid #ddd; font-size:10px;">{str(i['sheet'])[:25]}</td>
                    <td style="border:1px solid #ddd;">{str(i['column'])[:15]}</td>
                    <td style="border:1px solid #ddd;">{i['row']}</td>
                    <td style="border:1px solid #ddd;">{str(i['value'])[:20]}</td>
                    <td style="border:1px solid #ddd;">{i['message']}</td>
                </tr>'''
            html += '</table></div>'
        display(HTML(html))

def display_downloads(output_files):
    with out_downloads:
        clear_output(wait=True)
        if output_files:
            display(HTML('<h4 style="color:#1565c0;">Download Validated Files:</h4>'))
            for fp in output_files:
                display(HTML(create_download_link(fp)))

# ==================== CONNECT EVENTS ====================
mode.observe(on_mode_change, names='value')
upload1.observe(on_upload1, names='value')
upload2.observe(on_upload2, names='value')
btn_validate.on_click(on_validate)
btn_clear.on_click(on_clear)

# ==================== BUILD UI ====================
validation_ui = widgets.VBox([
    widgets.HTML('<h2 style="color:#2c3e50; border-bottom:2px solid #3498db;">Step 2: Upload & Validate</h2>'),
    widgets.HTML('<p style="color:#666;">Supports: .xlsx, .xls, .csv</p>'),
    widgets.HTML('<h4>Select Mode</h4>'), mode,
    widgets.HTML('<h4>Upload File(s)</h4>'), widgets.VBox([label1, upload1]), box2, out_status, sheet_box,
    widgets.HTML('<h4>Run Validation</h4>'), widgets.HBox([btn_validate, btn_clear]),
    widgets.HTML('<h4>Results:</h4>'), out_results,
    out_downloads
], layout=widgets.Layout(padding='15px'))

display(validation_ui)