In [0]:

%run ../00_Config

In [0]:
# ============================================================================
# INTERACTIVE RESOLUTION INTERFACE
# ============================================================================



# Load quarantine data
from pyspark.sql.functions import col, count, current_timestamp, lit

quarantine_df = spark.read.format("delta").load(QUARANTINE_BRIGHTSPACE)

print("=" * 70)
print("üìã QUARANTINE DASHBOARD")
print("=" * 70)

total = quarantine_df.count()
pending = quarantine_df.filter(col("resolution_status") == "pending").count()
resolved = quarantine_df.filter(col("resolution_status") == "resolved").count()

print(f"Total Issues:        {total}")
print(f"  ‚îî‚îÄ Pending:        {pending}")
print(f"  ‚îî‚îÄ Resolved:       {resolved}")
print()

# Severity breakdown
print("By Severity:")
severity_counts = quarantine_df.groupBy("violation_severity").count().collect()
for row in sorted(severity_counts, key=lambda x: x['count'], reverse=True):
    severity_emoji = {"critical": "‚ö†Ô∏è", "high": "üî¥", "medium": "üü°", "low": "üü¢"}.get(row['violation_severity'], "‚Ä¢")
    print(f"  {severity_emoji}  {row['violation_severity'].capitalize()}: {row['count']}")

print("=" * 70)

In [0]:
# ============================================================================
# CREATE INTERACTIVE FILTERS
# ============================================================================

# Remove any existing widgets
dbutils.widgets.removeAll()

# Get unique values for dropdowns
severity_options = ["All"] + [row.violation_severity for row in 
                               quarantine_df.select("violation_severity").distinct().collect()]

type_options = ["All"] + [row.violation_type for row in 
                          quarantine_df.select("violation_type").distinct().collect()]

status_options = ["All", "pending", "resolved"]

# Create filter widgets
dbutils.widgets.dropdown("severity_filter", "All", severity_options, "1. Filter by Severity")
dbutils.widgets.dropdown("type_filter", "All", type_options, "2. Filter by Type")
dbutils.widgets.dropdown("status_filter", "pending", status_options, "3. Filter by Status")
dbutils.widgets.text("limit_records", "50", "4. Max Records to Show")

print("‚úì Filter widgets created!")
print("  Use the dropdowns above to filter quarantine data")

In [0]:
# ============================================================================
# APPLY FILTERS AND DISPLAY RECORDS
# ============================================================================

# Get widget values
severity = dbutils.widgets.get("severity_filter")
vtype = dbutils.widgets.get("type_filter")
status = dbutils.widgets.get("status_filter")
limit = int(dbutils.widgets.get("limit_records"))

# Apply filters
filtered_df = quarantine_df

if severity != "All":
    filtered_df = filtered_df.filter(col("violation_severity") == severity)

if vtype != "All":
    filtered_df = filtered_df.filter(col("violation_type") == vtype)

if status != "All":
    filtered_df = filtered_df.filter(col("resolution_status") == status)

filtered_df = filtered_df.limit(limit)

print(f"\nüìã Filtered Results: {filtered_df.count()} records")
print(f"  Severity: {severity}")
print(f"  Type: {vtype}")
print(f"  Status: {status}")

# Display key columns
display_columns = [
    "_quarantine_id",  
    "student_id", "first_name", "last_name", "email", 
    "violation_type", "violation_severity", "violation_description",
    "resolution_status"
]

available_columns = [c for c in display_columns if c in filtered_df.columns]

print("\nüìä Quarantined Records:")
display(filtered_df.select(available_columns).orderBy(
    col("violation_severity").desc(),
    col("_quarantine_timestamp").desc()
))

In [0]:
# ============================================================================
# SELECT SPECIFIC RECORD FOR RESOLUTION 
# ============================================================================

# Create widget for record selection BY QUARANTINE ID
dbutils.widgets.text("quarantine_id_select", "", "5. Enter Quarantine ID to Review")

selected_quarantine_id = dbutils.widgets.get("quarantine_id_select")

if selected_quarantine_id:
    # Find the selected record BY UNIQUE QUARANTINE ID
    selected_records = quarantine_df.filter(col("_quarantine_id") == selected_quarantine_id)
    
    if selected_records.count() > 0:
        print(f"‚úì Found record with Quarantine ID: {selected_quarantine_id}")
        print("\n" + "=" * 70)
        print("RECORD DETAILS")
        print("=" * 70)
        
        # Display full record details
        display(selected_records)
        
        # Store for resolution
        selected_records.createOrReplaceTempView("selected_record_for_resolution")
        
    else:
        print(f"‚úó No record found with Quarantine ID: {selected_quarantine_id}")
        print("  Copy the _quarantine_id from the table above")
else:
    print("‚ÑπÔ∏è  Enter a Quarantine ID in the widget above")
    print("   Tip: Look at the _quarantine_id column in the table")
    print("   Example: Q_8589934592")

In [0]:
# ============================================================================
# CHOOSE RESOLUTION ACTION
# ============================================================================

# Create resolution action widgets
resolution_options = ["", "Accept (move to clean)", "Reject (delete)", "Edit (modify values)", "Escalate (flag for review)"]
dbutils.widgets.dropdown("resolution_action", "", resolution_options, "6. Choose Resolution Action")

# Notes widget
dbutils.widgets.text("resolution_notes", "", "7. Add Resolution Notes (optional)")

selected_action = dbutils.widgets.get("resolution_action")
notes = dbutils.widgets.get("resolution_notes")

if selected_action:
    print(f"‚úì Action selected: {selected_action}")
    if notes:
        print(f"  Notes: {notes}")
    print("\n‚ö†Ô∏è  Run the NEXT CELL to apply this resolution")
else:
    print("‚ÑπÔ∏è  Select a resolution action from the dropdown above")
    print("\nüìã Resolution Actions Explained:")
    print("  ‚Ä¢ Accept: Record is valid, move to Silver (clean data)")
    print("  ‚Ä¢ Reject: Record is invalid, mark for deletion")
    print("  ‚Ä¢ Edit: Flag for manual correction (you'll fix it separately)")
    print("  ‚Ä¢ Escalate: Send to senior data steward for review")

In [0]:
# ============================================================================
# EXECUTE RESOLUTION 
# ============================================================================

from pyspark.sql.functions import when

selected_quarantine_id = dbutils.widgets.get("quarantine_id_select")
selected_action = dbutils.widgets.get("resolution_action")
resolution_notes = dbutils.widgets.get("resolution_notes")

if selected_quarantine_id and selected_action and selected_action != "":
    
    try:
        # Get the selected record
        record_to_resolve = spark.table("selected_record_for_resolution")
        
        print("=" * 70)
        print("EXECUTING RESOLUTION")
        print("=" * 70)
        print(f"Quarantine ID: {selected_quarantine_id}")
        print(f"Action: {selected_action}")
        print(f"Notes: {resolution_notes if resolution_notes else 'None'}")
        print()
        
        # Load all quarantine data
        all_quarantine = spark.read.format("delta").load(QUARANTINE_BRIGHTSPACE)
        
        # Update ONLY the selected record by unique _quarantine_id
        updated_quarantine = all_quarantine.withColumn(
            "resolution_status",
            when(col("_quarantine_id") == selected_quarantine_id, "resolved")
            .otherwise(col("resolution_status"))
        ).withColumn(
            "resolution_action",
            when(col("_quarantine_id") == selected_quarantine_id, selected_action)
            .otherwise(col("resolution_action"))
        ).withColumn(
            "resolved_by",
            when(col("_quarantine_id") == selected_quarantine_id, USER_EMAIL)
            .otherwise(col("resolved_by"))
        ).withColumn(
            "resolution_timestamp",
            when(col("_quarantine_id") == selected_quarantine_id, current_timestamp())
            .otherwise(col("resolution_timestamp"))
        ).withColumn(
            "resolution_notes",
            when(col("_quarantine_id") == selected_quarantine_id, 
                 resolution_notes if resolution_notes else "No notes provided")
            .otherwise(col("resolution_notes"))
        )
        
        # Write back to quarantine table
        updated_quarantine.write \
            .format("delta") \
            .mode("overwrite") \
            .save(QUARANTINE_BRIGHTSPACE)
        
        print("‚úì Quarantine table updated!")
        
        # Handle specific actions
        if "Accept" in selected_action:
            # Move to Silver (clean data)
            clean_record = record_to_resolve.drop(
                "_quarantine_id",
                "violation_type", "violation_severity", "violation_description",
                "violation_column", "resolution_status", "_quarantine_timestamp",
                "resolved_by", "resolution_timestamp", "resolution_action", "resolution_notes"
            )
            
            clean_record.write \
                .format("delta") \
                .mode("append") \
                .save(SILVER_BRIGHTSPACE)
            
            print(f"‚úì Record moved to Silver layer!")
        
        elif "Reject" in selected_action:
            print(f"‚úì Record marked for deletion")
        
        elif "Edit" in selected_action:
            print(f"‚úì Record flagged for manual editing")
        
        elif "Escalate" in selected_action:
            print(f"‚úì Record escalated for senior review")
        
        print("\nüéâ Resolution completed successfully!")
        print("   ‚úì This resolved ONLY the specific violation you selected")
        print("   ‚úì Other violations for the same student remain in quarantine")
        print("=" * 70)
        
    except Exception as e:
        print(f"‚úó Error: {str(e)}")
        import traceback
        traceback.print_exc()
        
else:
    print("‚ÑπÔ∏è  To execute resolution:")

In [0]:
# ============================================================================
# VIEW RESOLUTION HISTORY
# ============================================================================

print("üìú Recent Resolutions:")

resolved_records = quarantine_df.filter(col("resolution_status") == "resolved")

if resolved_records.count() > 0:
    display(resolved_records.select(
        "student_id", "email", "violation_type", "violation_severity",
        "resolution_action", "resolved_by", "resolution_timestamp", "resolution_notes"
    ).orderBy(col("resolution_timestamp").desc()).limit(20))
else:
    print("‚ÑπÔ∏è  No resolutions yet. Start resolving records above!")