# Step 3: Validate Migration from Azure Synapse to Fabric Warehouse

This notebook validates the data migration by comparing source and target databases.

## Validation Checks
1. Table count comparison
2. Row count validation for each table
3. Missing/extra table detection
4. Data integrity verification
5. Generate detailed validation report

## Prerequisites
- Completed Step 1: Data extraction to ADLS
- Completed Step 2: Data loading to Fabric Warehouse
- Access to both source and target databases
- Appropriate permissions (see PERMISSIONS_GUIDE.md)

## Configuration

Update the configuration parameters below with your environment details.

In [None]:
# Source Azure Synapse Configuration
source_server = "<your-synapse-server>.sql.azuresynapse.net"
source_database = "<your-database-name>"

# Target Fabric Warehouse Configuration
target_workspace = "<your-workspace-name>"
target_warehouse = "<your-warehouse-name>"

# Validation Settings
generate_report = True  # Generate detailed HTML validation report
report_path = "/lakehouse/default/Files/validation_report.html"

# Authentication Configuration
# Options: 'token', 'interactive'
auth_type = 'token'  # Use 'token' in Fabric notebooks with managed identity

print("Configuration loaded successfully ✓")
print(f"Source: {source_server}/{source_database}")
print(f"Target: {target_workspace}/{target_warehouse}")
print(f"Report generation: {'Enabled' if generate_report else 'Disabled'}")

## Setup and Import Helper Functions

Load the migration helper functions for database connections and utilities.

In [None]:
# Import helper functions
import sys
sys.path.append('/lakehouse/default/Files/notebooks/utils')

from migration_helpers import ConnectionHelper, MigrationUtils, Colors
import time
from datetime import datetime

print("Helper functions imported successfully ✓")

## Connect to Source and Target Databases

Establish connections to both Azure Synapse and Fabric Warehouse for comparison.

In [None]:
# Get authentication tokens
source_auth_config = {}
target_auth_config = {}

if auth_type == 'token':
    # Get tokens from Fabric runtime
    source_token = ConnectionHelper.get_spark_token("https://database.windows.net/.default")
    target_token = ConnectionHelper.get_spark_token("https://analysis.windows.net/powerbi/api")
    
    source_auth_config = {'auth_type': 'token', 'token': source_token}
    target_auth_config = {'auth_type': 'token', 'token': target_token}
else:
    source_auth_config = {'auth_type': auth_type}
    target_auth_config = {'auth_type': auth_type}

# Connect to source database
print("Connecting to source database...")
source_conn = ConnectionHelper.connect_azure_sql(source_server, source_database, source_auth_config)

# Connect to target Fabric Warehouse
print("\nConnecting to target Fabric Warehouse...")
target_conn = ConnectionHelper.connect_fabric_warehouse(target_workspace, target_warehouse, target_auth_config)

print("\n" + "="*70)
print("Connections established successfully!")
print("="*70)

## Discover Tables in Source and Target

Get lists of all tables from both source and target databases.

In [None]:
print(f"{Colors.BLUE}Discovering tables in source database...{Colors.END}")
source_tables = MigrationUtils.get_tables_list(source_conn)
source_table_set = set((schema, table) for schema, table, _, _ in source_tables)

print(f"\n{Colors.BLUE}Discovering tables in target warehouse...{Colors.END}")

# Get target tables (standard SQL query)
target_cursor = target_conn.cursor()
target_cursor.execute("""
    SELECT s.name, t.name
    FROM sys.tables t
    INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
    WHERE s.name NOT IN ('sys', 'INFORMATION_SCHEMA')
    ORDER BY s.name, t.name
""")
target_tables_raw = target_cursor.fetchall()
target_table_set = set((schema, table) for schema, table in target_tables_raw)

print(f"{Colors.GREEN}✅ Found {len(source_table_set)} tables in source{Colors.END}")
print(f"{Colors.GREEN}✅ Found {len(target_table_set)} tables in target{Colors.END}")

# Identify missing and extra tables
missing_tables = source_table_set - target_table_set
extra_tables = target_table_set - source_table_set
common_tables = source_table_set & target_table_set

print(f"\n{Colors.BOLD}Table Comparison:{Colors.END}")
print(f"  Common tables:  {len(common_tables)}")
print(f"  Missing tables: {Colors.RED if len(missing_tables) > 0 else Colors.GREEN}{len(missing_tables)}{Colors.END}")
print(f"  Extra tables:   {Colors.YELLOW if len(extra_tables) > 0 else Colors.GREEN}{len(extra_tables)}{Colors.END}")

if len(missing_tables) > 0:
    print(f"\n{Colors.RED}Missing tables in target:{Colors.END}")
    for schema, table in sorted(missing_tables)[:10]:
        print(f"  - [{schema}].[{table}]")
    if len(missing_tables) > 10:
        print(f"  ... and {len(missing_tables) - 10} more")

if len(extra_tables) > 0:
    print(f"\n{Colors.YELLOW}Extra tables in target:{Colors.END}")
    for schema, table in sorted(extra_tables)[:10]:
        print(f"  - [{schema}].[{table}]")
    if len(extra_tables) > 10:
        print(f"  ... and {len(extra_tables) - 10} more")

## Validate Row Counts

Compare row counts for all common tables between source and target.

In [None]:
print("\n" + "="*70)
print("Validating Row Counts")
print("="*70 + "\n")

validation_results = []
matched_tables = 0
mismatched_tables = 0
validation_errors = 0

for idx, (schema, table) in enumerate(sorted(common_tables), 1):
    print(f"[{idx}/{len(common_tables)}] Validating [{schema}].[{table}]...", end=' ')
    
    result = MigrationUtils.validate_row_count(source_conn, target_conn, schema, table)
    
    result['schema'] = schema
    result['table'] = table
    validation_results.append(result)
    
    if result['status'] == 'success' and result['match']:
        matched_tables += 1
        print(f"{Colors.GREEN}✓ Match ({result['source_count']:,} rows){Colors.END}")
    elif result['status'] == 'mismatch':
        mismatched_tables += 1
        print(f"{Colors.RED}✗ Mismatch (source: {result['source_count']:,}, target: {result['target_count']:,}, diff: {result['difference']:,}){Colors.END}")
    else:
        validation_errors += 1
        print(f"{Colors.YELLOW}⚠️  Error: {result.get('error', 'Unknown error')}{Colors.END}")

# Print summary
print("\n" + "="*70)
print("VALIDATION SUMMARY")
print("="*70)
print(f"Total tables validated:   {len(common_tables)}")
print(f"Matched:                  {Colors.GREEN}{matched_tables}{Colors.END}")
print(f"Mismatched:               {Colors.RED}{mismatched_tables}{Colors.END}")
print(f"Validation errors:        {Colors.YELLOW}{validation_errors}{Colors.END}")
print(f"Missing in target:        {Colors.RED if len(missing_tables) > 0 else Colors.GREEN}{len(missing_tables)}{Colors.END}")
print(f"Extra in target:          {Colors.YELLOW if len(extra_tables) > 0 else Colors.GREEN}{len(extra_tables)}{Colors.END}")
print("="*70)

# Overall status
if mismatched_tables == 0 and len(missing_tables) == 0 and validation_errors == 0:
    print(f"\n{Colors.GREEN}✅ VALIDATION PASSED - All tables migrated successfully!{Colors.END}")
elif mismatched_tables > 0:
    print(f"\n{Colors.RED}❌ VALIDATION FAILED - {mismatched_tables} tables have row count mismatches{Colors.END}")
elif len(missing_tables) > 0:
    print(f"\n{Colors.RED}❌ VALIDATION FAILED - {len(missing_tables)} tables missing in target{Colors.END}")
else:
    print(f"\n{Colors.YELLOW}⚠️  VALIDATION COMPLETED WITH WARNINGS{Colors.END}")

## Generate Validation Report (Optional)

Generate a detailed HTML report with all validation results.

In [None]:
if generate_report:
    print(f"{Colors.BLUE}Generating validation report...{Colors.END}")
    
    report_timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    
    html_content = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Migration Validation Report</title>
        <style>
            body {{ font-family: Arial, sans-serif; margin: 20px; }}
            h1 {{ color: #0078D4; }}
            h2 {{ color: #333; margin-top: 30px; }}
            .summary {{ background-color: #f0f0f0; padding: 15px; border-radius: 5px; margin-bottom: 20px; }}
            .success {{ color: #107C10; }}
            .error {{ color: #D13438; }}
            .warning {{ color: #FF8C00; }}
            table {{ border-collapse: collapse; width: 100%; margin-top: 20px; }}
            th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
            th {{ background-color: #0078D4; color: white; }}
            tr:nth-child(even) {{ background-color: #f2f2f2; }}
            .status-match {{ color: #107C10; font-weight: bold; }}
            .status-mismatch {{ color: #D13438; font-weight: bold; }}
            .status-error {{ color: #FF8C00; font-weight: bold; }}
        </style>
    </head>
    <body>
        <h1>Migration Validation Report</h1>
        <p><strong>Generated:</strong> {report_timestamp}</p>
        <p><strong>Source:</strong> {source_server}/{source_database}</p>
        <p><strong>Target:</strong> {target_workspace}/{target_warehouse}</p>
        
        <div class="summary">
            <h2>Summary</h2>
            <p><strong>Total Tables Validated:</strong> {len(common_tables)}</p>
            <p><strong>Matched:</strong> <span class="success">{matched_tables}</span></p>
            <p><strong>Mismatched:</strong> <span class="error">{mismatched_tables}</span></p>
            <p><strong>Validation Errors:</strong> <span class="warning">{validation_errors}</span></p>
            <p><strong>Missing in Target:</strong> <span class="{'error' if len(missing_tables) > 0 else 'success'}">{len(missing_tables)}</span></p>
            <p><strong>Extra in Target:</strong> <span class="{'warning' if len(extra_tables) > 0 else 'success'}">{len(extra_tables)}</span></p>
        </div>
        
        <h2>Row Count Validation Details</h2>
        <table>
            <thead>
                <tr>
                    <th>Schema</th>
                    <th>Table</th>
                    <th>Source Rows</th>
                    <th>Target Rows</th>
                    <th>Difference</th>
                    <th>Status</th>
                </tr>
            </thead>
            <tbody>
    """
    
    # Add validation results
    for result in sorted(validation_results, key=lambda x: (x['schema'], x['table'])):
        status_class = "status-match"
        status_text = "✓ Match"
        
        if result['status'] == 'mismatch':
            status_class = "status-mismatch"
            status_text = "✗ Mismatch"
        elif result['status'] == 'error':
            status_class = "status-error"
            status_text = f"⚠️  Error: {result.get('error', 'Unknown')}"
        
        html_content += f"""
                <tr>
                    <td>{result['schema']}</td>
                    <td>{result['table']}</td>
                    <td>{result.get('source_count', 'N/A'):,}</td>
                    <td>{result.get('target_count', 'N/A'):,}</td>
                    <td>{result.get('difference', 'N/A')}</td>
                    <td class="{status_class}">{status_text}</td>
                </tr>
        """
    
    html_content += """
            </tbody>
        </table>
    """
    
    # Add missing tables section
    if len(missing_tables) > 0:
        html_content += """
        <h2>Missing Tables in Target</h2>
        <ul>
        """
        for schema, table in sorted(missing_tables):
            html_content += f"<li class='error'>[{schema}].[{table}]</li>"
        html_content += "</ul>"
    
    # Add extra tables section
    if len(extra_tables) > 0:
        html_content += """
        <h2>Extra Tables in Target</h2>
        <ul>
        """
        for schema, table in sorted(extra_tables):
            html_content += f"<li class='warning'>[{schema}].[{table}]</li>"
        html_content += "</ul>"
    
    html_content += """
    </body>
    </html>
    """
    
    # Write report to file
    with open(report_path, 'w') as f:
        f.write(html_content)
    
    print(f"{Colors.GREEN}✅ Validation report generated: {report_path}{Colors.END}")
else:
    print("Report generation skipped (generate_report = False)")

## Cleanup and Close Connections

Close all database connections.

In [None]:
# Close connections
if source_conn:
    source_conn.close()
    print(f"{Colors.GREEN}✅ Source connection closed{Colors.END}")

if target_conn:
    target_conn.close()
    print(f"{Colors.GREEN}✅ Target connection closed{Colors.END}")

print("\n" + "="*70)
print("Validation process completed!")
print("="*70)

if generate_report:
    print(f"\nView the detailed validation report at: {report_path}")