In [None]:
from pyspark.sql.types import *
from datetime import datetime

## Step 1: Create `feature_releases` Table (Original)

In [None]:
print("üîÑ Creating table: feature_releases")
print("=" * 60)

schema_feature_releases = StructType([
    StructField("feature_id", StringType(), False),
    StructField("feature_name", StringType(), False),
    StructField("workload", StringType(), True),
    StructField("release_date", TimestampType(), False),
    StructField("status", StringType(), True),
    StructField("is_preview", BooleanType(), False),
    StructField("source_url", StringType(), True),
    StructField("extracted_date", TimestampType(), False)
])

# Create empty DataFrame
df_feature_releases = spark.createDataFrame([], schema_feature_releases)

# Write to Delta
table_path = "Tables/feature_releases"
df_feature_releases.write.format("delta").mode("overwrite").save(table_path)

print("‚úÖ Table created: feature_releases")
print(f"   Location: {table_path}")
print("\n   Schema:")
for field in schema_feature_releases.fields:
    print(f"     - {field.name}: {field.dataType.simpleString()}")

## Step 1b: Create `feature_releases_roadmap` Table (Enhanced)

In [None]:
print("\nüîÑ Creating table: feature_releases_roadmap (Fabric GPS)")
print("=" * 60)

schema_roadmap = StructType([
    StructField("feature_id", StringType(), False),
    StructField("feature_name", StringType(), False),
    StructField("feature_description", StringType(), True),
    StructField("workload", StringType(), True),
    StructField("product_name", StringType(), True),
    StructField("release_date", TimestampType(), True),  # Nullable for planned features
    StructField("release_type", StringType(), True),
    StructField("release_status", StringType(), True),
    StructField("is_preview", BooleanType(), False),
    StructField("is_planned", BooleanType(), False),
    StructField("is_shipped", BooleanType(), False),
    StructField("last_modified", TimestampType(), False),
    StructField("source_url", StringType(), True),
    StructField("source", StringType(), True),
    StructField("extracted_date", TimestampType(), False)
])

# Create empty DataFrame
df_roadmap = spark.createDataFrame([], schema_roadmap)

# Write to Delta
table_path = "Tables/feature_releases_roadmap"
df_roadmap.write.format("delta").mode("overwrite").save(table_path)

print("‚úÖ Table created: feature_releases_roadmap")
print(f"   Location: {table_path}")
print("\n   Schema:")
for field in schema_roadmap.fields:
    print(f"     - {field.name}: {field.dataType.simpleString()}")
print("\n   üí° This table includes:")
print("      - Complete feature descriptions")
print("      - Planned/future features (roadmap)")
print("      - Historical change tracking (last_modified)")
print("      - Release status (Planned, In Development, Shipped)")

## Step 2: Create `preview_features_active` Table

In [None]:
print("\nüîÑ Creating table: preview_features_active")
print("=" * 60)

schema_preview_active = StructType([
    StructField("setting_name", StringType(), False),
    StructField("feature_id", StringType(), False),
    StructField("feature_name", StringType(), False),
    StructField("workload", StringType(), True),
    StructField("similarity_score", DoubleType(), False),
    StructField("is_enabled", BooleanType(), False),
    StructField("delegate_to_tenant", BooleanType(), True),
    StructField("detected_date", TimestampType(), False),
    StructField("release_date", TimestampType(), True),
    StructField("status", StringType(), True),
    StructField("source_url", StringType(), True),
    StructField("days_since_release", IntegerType(), True)
])

# Create empty DataFrame
df_preview_active = spark.createDataFrame([], schema_preview_active)

# Write to Delta
table_path = "Tables/preview_features_active"
df_preview_active.write.format("delta").mode("overwrite").save(table_path)

print("‚úÖ Table created: preview_features_active")
print(f"   Location: {table_path}")
print("\n   Schema:")
for field in schema_preview_active.fields:
    print(f"     - {field.name}: {field.dataType.simpleString()}")

## Step 3: Create `feature_alerts` Table

In [None]:
print("\nüîÑ Creating table: feature_alerts")
print("=" * 60)

schema_alerts = StructType([
    StructField("alert_id", StringType(), False),
    StructField("feature_id", StringType(), False),
    StructField("feature_name", StringType(), False),
    StructField("workload", StringType(), True),
    StructField("alert_type", StringType(), False),
    StructField("severity", StringType(), False),
    StructField("message", StringType(), False),
    StructField("setting_name", StringType(), True),
    StructField("similarity_score", DoubleType(), True),
    StructField("days_since_release", IntegerType(), True),
    StructField("alert_date", TimestampType(), False),
    StructField("acknowledged", BooleanType(), False),
    StructField("acknowledged_date", TimestampType(), True),
    StructField("acknowledged_by", StringType(), True)
])

# Create empty DataFrame
df_alerts = spark.createDataFrame([], schema_alerts)

# Write to Delta
table_path = "Tables/feature_alerts"
df_alerts.write.format("delta").mode("overwrite").save(table_path)

print("‚úÖ Table created: feature_alerts")
print(f"   Location: {table_path}")
print("\n   Schema:")
for field in schema_alerts.fields:
    print(f"     - {field.name}: {field.dataType.simpleString()}")

## Step 4: Create Helper Views

In [None]:
print("\nüîÑ Creating helper views for SQL Endpoint...")

# View 1: Active Preview Features (for quick querying)
spark.sql("""
    CREATE OR REPLACE VIEW vw_active_preview_features AS
    SELECT 
        feature_name,
        workload,
        setting_name,
        days_since_release,
        similarity_score,
        release_date,
        detected_date
    FROM preview_features_active
    WHERE is_enabled = true
    ORDER BY detected_date DESC
""")
print("‚úÖ Created view: vw_active_preview_features")

# View 2: Unacknowledged Critical Alerts
spark.sql("""
    CREATE OR REPLACE VIEW vw_critical_alerts AS
    SELECT 
        alert_id,
        feature_name,
        workload,
        alert_type,
        severity,
        message,
        alert_date
    FROM feature_alerts
    WHERE acknowledged = false 
      AND severity IN ('Critical', 'Warning')
    ORDER BY alert_date DESC
""")
print("‚úÖ Created view: vw_critical_alerts")

# View 3: Feature Release Timeline
spark.sql("""
    CREATE OR REPLACE VIEW vw_feature_timeline AS
    SELECT 
        feature_name,
        workload,
        status,
        is_preview,
        release_date,
        DATEDIFF(CURRENT_DATE(), release_date) as days_since_release
    FROM feature_releases
    ORDER BY release_date DESC
""")
print("‚úÖ Created view: vw_feature_timeline")

# View 4: Roadmap Upcoming Features (NEW)
spark.sql("""
    CREATE OR REPLACE VIEW vw_roadmap_upcoming AS
    SELECT 
        feature_name,
        feature_description,
        product_name,
        workload,
        release_type,
        release_status,
        release_date,
        is_preview,
        is_planned,
        last_modified,
        CASE 
            WHEN release_date IS NULL THEN NULL
            ELSE DATEDIFF(release_date, CURRENT_DATE())
        END as days_until_release
    FROM feature_releases_roadmap
    WHERE is_planned = true
      AND (release_date IS NULL OR release_date >= CURRENT_DATE())
    ORDER BY release_date ASC NULLS LAST, last_modified DESC
""")
print("‚úÖ Created view: vw_roadmap_upcoming")

print("\n‚úÖ All views created successfully")
print("   ‚Üí These views are accessible via SQL Endpoint")

## ‚úÖ Setup Complete!

In [None]:
print("\n" + "=" * 60)
print("üéâ FEATURE TRACKING SETUP COMPLETED!")
print("=" * 60)

# Verify all tables exist
tables = [
    "feature_releases",
    "feature_releases_roadmap", 
    "preview_features_active", 
    "feature_alerts"
]

print("\nüìã Verifying tables...")
for table in tables:
    try:
        count = spark.read.format("delta").load(f"Tables/{table}").count()
        print(f"  ‚úÖ {table}: {count} rows")
    except:
        print(f"  ‚ùå {table}: ERROR")

# Verify views
views = [
    "vw_active_preview_features",
    "vw_critical_alerts",
    "vw_feature_timeline",
    "vw_roadmap_upcoming"
]

print("\nüìã Verifying views...")
for view in views:
    try:
        spark.sql(f"SELECT * FROM {view} LIMIT 1")
        print(f"  ‚úÖ {view}")
    except:
        print(f"  ‚ùå {view}: ERROR")

print("\n" + "=" * 60)
print("üìö Next Steps:")
print("=" * 60)
print("\n1. Choose your data source:")
print("   a) Microsoft Learn (original):")
print("      ‚Üí Run '01_Transfer_Feature_Releases_Unit'")
print("   b) Fabric GPS API (enhanced with roadmap):")
    "print(\"      ‚Üí Run '01_Transfer_Feature_Releases_GpsApi_Unit'\")\n",
print("\n2. Run '02_Transfer_Preview_Features_Unit' to detect activated previews")
print("   Note: This requires 'tenant_settings' table from FUAM core")
print("\n3. Run '03_Transfer_Feature_Alerts_Unit' to generate alerts")
print("\n4. OR run the full pipeline: 'Load_Feature_Tracking_E2E'")
print("\nüí° Recommended:")
print("   - Use Enhanced version (Fabric GPS) for complete roadmap visibility")
print("   - Run both versions if you want dual data sources")
print("   - Schedule pipeline to run daily")
print("\n" + "=" * 60)