# üöÄ User Ingestion Application
### CSV Validation & API Upload Tool

**Workflow:**
1. ‚úÖ Validates CSV against schema rules
2. ‚úÖ **If ALL pass** ‚Üí Auto-uploads to API
3. ‚ùå **If ANY fail** ‚Üí Shows error report (downloadable)

---
### ‚ö° Quick Start: Run all cells below (Cell ‚Üí Run All)

In [None]:
# Import required libraries
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import pandas as pd
import os
from datetime import datetime
import sys
import glob
import time

# Import our custom modules
from utils.validators import CSVValidator
from utils.api_client import APIClient

# Clear uploads folder on startup
for file in glob.glob("uploads/*"):
    if os.path.isfile(file):
        os.remove(file)

print("‚úì All libraries loaded successfully!")
print("‚úì Uploads folder cleared!")

In [None]:
# ============================================
# MAIN APPLICATION UI & LOGIC
# ============================================

# Create UI widgets
api_url_input = widgets.Text(
    value='http://hcm-moz-impl.abuja:8080/hcm-moz-impl/v1/dhis2/users/ingest?source=EXCEL',
    placeholder='Enter API URL',
    description='API URL:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='800px')
)

tenant_id_input = widgets.Text(
    value='bi',
    placeholder='Enter Tenant ID',
    description='Tenant ID:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='300px')
)

auth_token_input = widgets.Text(
    value='ee36fdd7-64e7-4583-9c16-998479ff53c0',
    placeholder='Enter Auth Token',
    description='Auth Token:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='500px')
)

file_upload = widgets.FileUpload(
    accept='.csv',
    multiple=False,
    description='Upload CSV:',
    style={'description_width': '120px'}
)

process_button = widgets.Button(
    description='üöÄ Process & Upload',
    button_style='primary',
    icon='upload',
    layout=widgets.Layout(width='250px', height='50px')
)

# Output for processing logs ONLY
process_output = widgets.Output()
status_label = widgets.HTML(value="<h3 style='color: #3498DB;'>üì§ Ready to process CSV</h3>")

# Global variables - MUST be initialized here
summary_data = None


# ============================================
# HELPER FUNCTION - Clear uploads folder
# ============================================
def clear_uploads_folder():
    """Clear all files in uploads folder"""
    import glob
    for file in glob.glob("uploads/*"):
        if os.path.isfile(file):
            os.remove(file)


# ============================================
# EVENT HANDLER
# ============================================

def on_process_click(b):
    """
    Handle process button - validates then auto-uploads if all pass
    """
    global summary_data
    
    # Clear process output
    with process_output:
        clear_output()
        
    # Check if file uploaded
    if not file_upload.value:
        status_label.value = "<h3 style='color: red;'>‚ùå Please upload a CSV file first!</h3>"
        return
    
    # Disable button during processing
    process_button.disabled = True
    status_label.value = "<h3 style='color: orange;'>‚è≥ Processing...</h3>"
    
    try:
        # Clear uploads folder before processing
        clear_uploads_folder()
        
        # Save uploaded file
        uploaded_file = file_upload.value[0]
        uploaded_filename = uploaded_file['name']
        content = uploaded_file['content']
        
        upload_path = f"uploads/{uploaded_filename}"
        with open(upload_path, 'wb') as f:
            f.write(content)
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # ============================================
        # PHASE 1: VALIDATION
        # ============================================
        with process_output:
            print("=" * 70)
            print("[PHASE 1] CSV VALIDATION")
            print("=" * 70)
            print(f"\n[INFO] File uploaded: {uploaded_filename}")
            print(f"[INFO] Upload path: {upload_path}")
        
        # Initialize validator
        validator = CSVValidator()
        
        with process_output:
            print(f"\n[VALIDATING] Running validation checks...")
        
        # Validate CSV
        validated_df, summary = validator.validate_csv(upload_path)
        
        with process_output:
            print(f"\n{'=' * 70}")
            print("[VALIDATION SUMMARY]")
            print(f"{'=' * 70}")
            print(f"[HEADER STATUS] {summary['header_status']}")
            if summary['header_status'] == 'ERROR':
                print(f"[HEADER ERROR] {summary['header_message']}")
            print(f"[TOTAL USERS] {summary['total_users']}")
            print(f"[‚úì VALID USERS] {summary['correct_users']}")
            print(f"[‚úó INVALID USERS] {summary['error_users']}")
            print(f"{'=' * 70}\n")
        
        # Check if ALL validations passed
        if summary['error_users'] > 0 or summary['header_status'] == 'ERROR':
            # VALIDATION FAILED
            base_name = uploaded_filename.rsplit('.', 1)[0]
            error_report_path = f"uploads/{base_name}_errors.csv"
            
            # Get only error rows
            error_df = validated_df[validated_df['validation_status'] == 'ERROR']
            error_df.to_csv(error_report_path, index=False)
            
            with process_output:
                print(f"[FAILED] ‚ùå Validation failed with {summary['error_users']} errors\n")
                print(f"üìù Updating error report: {error_report_path}")
                print(f"   ‚úÖ Error report saved successfully!")
            
            status_label.value = f"<h3 style='color: red;'>‚ùå Validation Failed: {summary['error_users']} errors. Run the Summary cell below!</h3>"
            
            # Store summary data for next cell - THIS IS THE FIX
            summary_data = {
                'status': 'FAILED',
                'summary': summary,
                'error_report': error_report_path
            }
            
        else:
            # ALL VALIDATIONS PASSED
            with process_output:
                print(f"[SUCCESS] ‚úÖ All validations passed!\n")
                print(f"{'=' * 70}")
                print("[PHASE 2] API UPLOAD")
                print(f"{'=' * 70}\n")
            
            # Save validated file in uploads (overwrite original)
            validated_df.to_csv(upload_path, index=False)
            
            # Initialize API client
            api_url = api_url_input.value
            tenant_id = tenant_id_input.value
            auth_token = auth_token_input.value
            
            with process_output:
                print(f"[UPLOADING] User Data")
                print(f"   Total Users: {summary['total_users']}")
                print(f"   API URL: {api_url}")
                print(f"   Tenant ID: {tenant_id}")
                print(f"{'=' * 70}\n")
            
            client = APIClient(api_url, tenant_id, auth_token)
            
            # Process and upload row by row
            base_name = uploaded_filename.rsplit('.', 1)[0]
            final_report_path = f"uploads/{base_name}_result.csv"
            
            # Read validated CSV
            df = pd.read_csv(upload_path)
            
            # Initialize API response columns
            df['api_status'] = ''
            df['api_status_code'] = ''
            df['api_message'] = ''
            
            success_count = 0
            error_count = 0
            skipped_count = 0
            
            # Only process rows with validation_status = 'CORRECT'
            for idx, row in df.iterrows():
                if row.get('validation_status') == 'CORRECT':
                    with process_output:
                        print(f"[PROCESSING] Row {idx + 1}/{len(df)}: {row.get('username', 'N/A')}", end='')
                    
                    # Create temp CSV for single row
                    temp_file = f"temp_upload_{idx}.csv"
                    single_row_df = pd.DataFrame([row])
                    single_row_df.to_csv(temp_file, index=False)
                    
                    # Upload
                    result = client.upload_file(temp_file)
                    
                    # Update DataFrame with API response
                    df.at[idx, 'api_status'] = result['status']
                    df.at[idx, 'api_status_code'] = result['status_code']
                    df.at[idx, 'api_message'] = result['message']
                    
                    if result['status'] == 'SUCCESS':
                        success_count += 1
                        with process_output:
                            print(f" ‚Üí ‚úÖ SUCCESS (Status: {result['status_code']})")
                    else:
                        error_count += 1
                        with process_output:
                            print(f" ‚Üí ‚ùå ERROR (Status: {result['status_code']})")
                    
                    # Clean up temp file
                    if os.path.exists(temp_file):
                        os.remove(temp_file)
                    
                    # Delay between requests
                    time.sleep(5)
                else:
                    # Skip rows with validation errors
                    df.at[idx, 'api_status'] = 'SKIPPED'
                    df.at[idx, 'api_status_code'] = 'N/A'
                    df.at[idx, 'api_message'] = 'Validation failed'
                    skipped_count += 1
            
            # Save output
            df.to_csv(final_report_path, index=False)
            
            with process_output:
                print(f"\n{'=' * 70}")
                print("[API UPLOAD SUMMARY]")
                print(f"{'=' * 70}")
                print(f"[TOTAL UPLOADED] {len(df)}")
                print(f"[‚úì SUCCESSFUL] {success_count}")
                print(f"[‚úó FAILED] {error_count}")
                print(f"[‚äò SKIPPED] {skipped_count}")
                print(f"{'=' * 70}\n")
                print(f"üìù Updating result file: {final_report_path}")
                print(f"   ‚úÖ Result file updated successfully!")
                print(f"   üìä Updated {len(df)} rows\n")
                print("üéâ DATA UPLOAD COMPLETED!")
            
            status_label.value = f"<h3 style='color: green;'>‚úÖ Complete! {success_count} successful, {error_count} failed. Run Summary cell below!</h3>"
            
            # Store summary data for next cell - THIS IS THE FIX
            summary_data = {
                'status': 'SUCCESS',
                'summary': summary,
                'success_count': success_count,
                'failed_count': error_count,
                'final_report': final_report_path
            }
        
    except Exception as e:
        with process_output:
            print(f"\n‚ùå Error: {str(e)}")
            import traceback
            traceback.print_exc()
        status_label.value = f"<h3 style='color: red;'>‚ùå Process failed: {str(e)}</h3>"
    
    finally:
        # Re-enable button
        process_button.disabled = False


# Attach event handler
process_button.on_click(on_process_click)

# ============================================
# DISPLAY UI
# ============================================

display(HTML("<hr><h2 style='color: #2E86C1;'>üîß Configuration</h2>"))
display(widgets.VBox([
    api_url_input,
    widgets.HBox([tenant_id_input, auth_token_input]),
]))

display(HTML("<hr><h2 style='color: #2E86C1;'>üìÅ Upload & Process</h2>"))
display(file_upload)
display(process_button)
display(status_label)

display(HTML("<hr><h2 style='color: #2E86C1;'>üìã Processing Logs</h2>"))
display(process_output)

print("\n‚úì Application ready! Upload a CSV and click 'Process & Upload'")

---
## üìä Summary Report & Downloads

**Run the cell below after processing to see results:**

In [None]:
# Display Summary and Downloads
if 'summary_data' in globals() and summary_data:
    timestamp_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    
    if summary_data['status'] == 'FAILED':
        # Validation Failed
        summary = summary_data['summary']
        error_report = summary_data['error_report']
        
        display(HTML(f"""
        <div style='background-color: #FADBD8; padding: 20px; border-radius: 10px; border-left: 5px solid #E74C3C; margin: 20px 0;'>
            <h2 style='color: #E74C3C; margin-top: 0;'>‚ùå VALIDATION FAILED</h2>
            <table style='width: 100%; border-collapse: collapse; background-color: white; margin-top: 15px;'>
                <tr style='background-color: #E74C3C; color: white;'>
                    <th style='padding: 10px; text-align: left;'>Metric</th>
                    <th style='padding: 10px; text-align: center;'>Count</th>
                </tr>
                <tr>
                    <td style='padding: 8px; border: 1px solid #ddd;'>Total Users</td>
                    <td style='padding: 8px; border: 1px solid #ddd; text-align: center; font-weight: bold;'>{summary['total_users']}</td>
                </tr>
                <tr>
                    <td style='padding: 8px; border: 1px solid #ddd;'>‚úì Valid Users</td>
                    <td style='padding: 8px; border: 1px solid #ddd; text-align: center; color: green; font-weight: bold;'>{summary['correct_users']}</td>
                </tr>
                <tr>
                    <td style='padding: 8px; border: 1px solid #ddd;'>‚úó Invalid Users</td>
                    <td style='padding: 8px; border: 1px solid #ddd; text-align: center; color: red; font-weight: bold;'>{summary['error_users']}</td>
                </tr>
            </table>
            <p style='margin-top: 15px; font-weight: bold;'>‚ùå Fix errors and re-upload</p>
        </div>
        <h3 style='color: #E74C3C;'>üì• Download Error Report:</h3>
        <div style="padding: 10px; background-color: white; border-radius: 5px; border: 1px solid #E74C3C; display: inline-block; margin-top: 10px;">
            <a href="{error_report}" download="{os.path.basename(error_report)}"
               style="display: inline-block; padding: 10px 20px; background-color: #E74C3C; color: white;
                      text-decoration: none; border-radius: 5px; font-weight: bold; font-size: 14px;">
                ‚¨áÔ∏è Download Error Report ({os.path.basename(error_report)})
            </a>
        </div>
        """))
        
    elif summary_data['status'] == 'SUCCESS':
        # API Upload Success
        summary = summary_data['summary']
        success_count = summary_data['success_count']
        failed_count = summary_data['failed_count']
        final_report = summary_data['final_report']
        
        display(HTML(f"""
        <div style="font-family: Arial, sans-serif; padding: 20px; border: 2px solid #007bff; border-radius: 10px; background-color: #f8f9fa; margin: 20px 0;">
            <h2 style="color: #007bff; margin-top: 0;">üìä Data Upload Summary Report</h2>
            <p style="color: #666; margin-bottom: 20px;">Generated: {timestamp_str}</p>
            
            <table style="width: 100%; border-collapse: collapse; margin-bottom: 20px; background-color: white;">
                <thead>
                    <tr style="background-color: #007bff; color: white;">
                        <th style="padding: 12px; border: 1px solid #ddd; text-align: left;">Module</th>
                        <th style="padding: 12px; border: 1px solid #ddd; text-align: center;">‚úÖ Created</th>
                        <th style="padding: 12px; border: 1px solid #ddd; text-align: center;">‚ö†Ô∏è Already Exists</th>
                        <th style="padding: 12px; border: 1px solid #ddd; text-align: center;">‚ùå Failed</th>
                        <th style="padding: 12px; border: 1px solid #ddd; text-align: center;">üìä Total</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td style="padding: 8px; border: 1px solid #ddd;">Users (DHIS2)</td>
                        <td style="padding: 8px; border: 1px solid #ddd; text-align: center; color: green; font-weight: bold;">{success_count}</td>
                        <td style="padding: 8px; border: 1px solid #ddd; text-align: center; color: orange; font-weight: bold;">0</td>
                        <td style="padding: 8px; border: 1px solid #ddd; text-align: center; color: red; font-weight: bold;">{failed_count}</td>
                        <td style="padding: 8px; border: 1px solid #ddd; text-align: center; font-weight: bold;">{summary['total_users']}</td>
                    </tr>
                    <tr style="background-color: #e9ecef; font-weight: bold;">
                        <td style="padding: 12px; border: 1px solid #ddd;">TOTAL</td>
                        <td style="padding: 12px; border: 1px solid #ddd; text-align: center; color: green;">{success_count}</td>
                        <td style="padding: 12px; border: 1px solid #ddd; text-align: center; color: orange;">0</td>
                        <td style="padding: 12px; border: 1px solid #ddd; text-align: center; color: red;">{failed_count}</td>
                        <td style="padding: 12px; border: 1px solid #ddd; text-align: center;">{summary['total_users']}</td>
                    </tr>
                </tbody>
            </table>
            
            <div style="display: flex; justify-content: space-around; margin: 20px 0; flex-wrap: wrap;">
                <div style="text-align: center; padding: 15px; background-color: #d4edda; border-radius: 5px; flex: 1; margin: 5px; min-width: 150px;">
                    <div style="font-size: 32px; font-weight: bold; color: #155724;">{success_count}</div>
                    <div style="color: #155724;">Created</div>
                </div>
                <div style="text-align: center; padding: 15px; background-color: #fff3cd; border-radius: 5px; flex: 1; margin: 5px; min-width: 150px;">
                    <div style="font-size: 32px; font-weight: bold; color: #856404;">0</div>
                    <div style="color: #856404;">Already Exists</div>
                </div>
                <div style="text-align: center; padding: 15px; background-color: #f8d7da; border-radius: 5px; flex: 1; margin: 5px; min-width: 150px;">
                    <div style="font-size: 32px; font-weight: bold; color: #721c24;">{failed_count}</div>
                    <div style="color: #721c24;">Failed</div>
                </div>
            </div>
        </div>
        """))
        
        status_badge = '‚úÖ ALL SUCCESS' if failed_count == 0 else '‚ö†Ô∏è HAS ERRORS'
        badge_color = '#28a745' if failed_count == 0 else '#dc3545'
        
        display(HTML(f"""
        <div style="margin: 20px 0; padding: 15px; background-color: #e7f3ff; border-left: 4px solid #007bff; border-radius: 5px;">
            <h3 style="margin-top: 0; color: #004085;">üì• Updated CSV File with API Status</h3>
            <p style="color: #004085; margin-bottom: 15px;">
                The CSV file has been updated with <b>api_status</b>, <b>api_status_code</b>, and <b>api_message</b> columns:
            </p>
            <div style="padding: 10px; background-color: white; border-radius: 5px; border: 1px solid #ddd;">
                <div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap;">
                    <div style="flex: 1; min-width: 300px;">
                        <strong style="color: #007bff;">üìÑ {os.path.basename(final_report)}</strong>
                        <span style="background-color: {badge_color}; color: white; padding: 3px 8px; border-radius: 3px; font-size: 11px; margin-left: 10px;">{status_badge}</span>
                        <br>
                        <span style="font-size: 12px; color: #666;">Module: User Ingestion (DHIS2)</span>
                    </div>
                    <a href="{final_report}" download="{os.path.basename(final_report)}" 
                       style="display: inline-block; padding: 8px 16px; background-color: #007bff; color: white; 
                              text-decoration: none; border-radius: 5px; font-weight: bold; font-size: 14px; margin-top: 5px;">
                        ‚¨áÔ∏è Download Result File
                    </a>
                </div>
            </div>
            <p style="color: #004085; font-size: 12px; margin-top: 15px; border-top: 1px solid #bee5eb; padding-top: 10px;">
                <b>üí° How to use:</b>
            </p>
            <ul style="font-size: 12px; color: #004085; margin: 5px 0;">
                <li><span style="color: green; font-weight: bold;">SUCCESS:</span> User created successfully in DHIS2</li>
                <li><span style="color: red; font-weight: bold;">ERROR:</span> Failed to create - check api_message column for details</li>
                <li><span style="color: gray; font-weight: bold;">SKIPPED:</span> Validation failed (not uploaded)</li>
            </ul>
        </div>
        """))

else:
    print("‚ö†Ô∏è No results yet. Please upload and process a CSV file in the cell above first.")