# 0. Set the default lakehouse for notebook to run

In [None]:
%%configure
{ 
    "defaultLakehouse": { 
        "name": {
                  "parameterName": "lakehouseName",
                  "defaultValue": "defaultlakehousename"
        }
    }
}

# 1. Initialize Parameters

In [None]:
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.time.temporal.ChronoUnit
import java.util.UUID
import java.text.SimpleDateFormat
import java.time.{LocalDate, LocalDateTime, Period}
import java.time.format.DateTimeFormatter
import java.time.temporal.ChronoUnit
import java.util.Calendar

val runId  = "00000000-0000-0000-0000-000000000000"
val workspaceId =  spark.conf.get("trident.workspace.id")
val workspaceName =  "LakeHouseTesting"
val lakehouseId = spark.conf.get("trident.lakehouse.id")
val lakehouseName = spark.conf.get("trident.lakehouse.name")
val sitesStagingTableName = "Sites_Staging"
val sitesFinalTableName = "Sites"
val permissionsStagingTableName = "Permissions_Staging"
val permissionsFinalTableName = "Permissions"
spark.conf.set("spark.sql.caseSensitive", true)




# 2. Read Sites Dataset from Staging Table

In [None]:
val lakehouse  = mssparkutils.lakehouse.get(lakehouseName)
val lakehouseId  = lakehouse.id
val workspaceName = notebookutils.runtime.context("currentWorkspaceName")
println("Started reading Sites dataset")
val sitesStagingLocation = s"abfss://${workspaceId}@onelake.dfs.fabric.microsoft.com/${lakehouseId}/Tables/${sitesStagingTableName}"
val dfSitesStaging = spark.read.format("delta").load(sitesStagingLocation)
println("Completed reading Sites dataset")

# 3. Read Permissions Dataset from Staging Table

In [None]:
println("Started reading Permissions dataset")
val permissionsStagingLocation = s"abfss://${workspaceId}@onelake.dfs.fabric.microsoft.com/${lakehouseId}/Tables/${permissionsStagingTableName}"
val dfPermissionsStaging = spark.read.format("delta").load(permissionsStagingLocation)
println("Completed reading Permissions dataset")

# 4. Check Final Tables Exists or not 

In [None]:
import io.delta.tables.DeltaTable
val sitesFinalLocation = s"abfss://${workspaceId}@onelake.dfs.fabric.microsoft.com/${lakehouseId}/Tables/${sitesFinalTableName}"
val permissionsFinalLocation = s"abfss://${workspaceId}@onelake.dfs.fabric.microsoft.com/${lakehouseId}/Tables/${permissionsFinalTableName}"


val sitesFinalTableExists = DeltaTable.isDeltaTable(spark, sitesFinalLocation)
if (!sitesFinalTableExists) {
    println("Final Sites table not exists. Creating final Sites table with schema only")
    dfSitesStaging.filter("1=2").write.format("delta").mode("overwrite").save(sitesFinalLocation)
    println("Final Sites table created")
}else {
    println("Final Sites table exists already")
}



val permissionsFinalTableExists = DeltaTable.isDeltaTable(spark, permissionsFinalLocation)
if (!permissionsFinalTableExists) {
    println("Final Permissions table not exists. Creating final Permissions table with schema only")
    dfPermissionsStaging.filter("1=2").write.format("delta").mode("overwrite").save(permissionsFinalLocation)
    println("Final Permissions table created")
}else {
    println("Final Permissions table exists already")
}


# 5. Merge Sites Data from Staging table to Final table

In [None]:
import io.delta.tables._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions.{Window, WindowSpec}
import org.apache.spark.sql.functions.{coalesce, lit, sum, col, _}
import org.apache.spark.sql.types.{StructField, _}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.storage.StorageLevel

val deltaTableSource = DeltaTable.forPath(spark, sitesStagingLocation)
val deltaTableTarget = DeltaTable.forPath(spark, sitesFinalLocation)

import spark.implicits._
val dfSource = deltaTableSource.toDF

//Delete records that have Operation as Deleted 
println("Merging Sites dataset from current staging table")
deltaTableTarget
  .as("target")
  .merge(
    dfSource.as("source"),
    "source.Id = target.Id")
  .whenMatched("source.Operation = 'Deleted'")
  .delete()
  .whenMatched("source.Operation != 'Deleted'")
  .updateAll()
  .whenNotMatched("source.Operation != 'Deleted'")
  .insertAll()
  .execute()
println("Merging of Sites dataset completed")

# 6. Merge Permissions Data from Staging table to Final table

In [None]:
import io.delta.tables._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions.{Window, WindowSpec}
import org.apache.spark.sql.functions.{coalesce, lit, sum, col, _}
import org.apache.spark.sql.types.{StructField, _}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.storage.StorageLevel

val deltaTablePermissionsSource = DeltaTable.forPath(spark, permissionsStagingLocation)
val deltaTablePermissionsTarget = DeltaTable.forPath(spark, permissionsFinalLocation)

import spark.implicits._
val dfPermissionsSource = deltaTablePermissionsSource.toDF

//Delete records that have Operation as Deleted 
println("Merging Permissions dataset from current staging table")

//Step 1: Delete all rows based on keys

println("Started: Cleaned up older permissions")
deltaTablePermissionsTarget
  .as("target")
  .merge(
    dfPermissionsSource.as("source"),
    """source.SiteId = target.SiteId and source.ScopeId = target.ScopeId and coalesce(source.LinkId,'') = coalesce(target.LinkId,'') and source.RoleDefinition = target.RoleDefinition """)
  .whenMatched()
  .delete()
  .execute()
println("Complted: Cleaned up older permissions")

deltaTablePermissionsTarget
  .as("target")
  .merge(
    dfPermissionsSource.as("source"),
    """source.SiteId = target.SiteId and source.ScopeId = target.ScopeId and coalesce(source.LinkId,'') = coalesce(target.LinkId,'') and source.RoleDefinition = target.RoleDefinition and 
     coalesce(source.SharedWith_Name,"") = coalesce(target.SharedWith_Name,"") and coalesce(source.SharedWith_TypeV2,"") = coalesce(target.SharedWith_TypeV2,"") and 
     coalesce(source.SharedWith_Email,"") = coalesce(target.SharedWith_Email,"") and coalesce(source.SharedWith_AADObjectId,"") = coalesce(target.SharedWith_AADObjectId,"") """)
  .whenMatched("source.Operation = 'Deleted'")
  .delete()
  .whenMatched("source.Operation != 'Deleted'")
  .updateAll()
  .whenNotMatched("source.Operation != 'Deleted'")
  .insertAll()
  .execute()
  println("Merging of Permissions dataset completed")
  

# 7. Read Sites and Permissions dataset - Sample TOP 10 Rows

In [None]:
var sqlQuery = s"SELECT * FROM ${lakehouseName}.${sitesFinalTableName} order by SnapshotDate DESC LIMIT 10"
val dfSitesAll = spark.sql(sqlQuery)
display(dfSitesAll)

sqlQuery = s"SELECT * FROM ${lakehouseName}.${permissionsFinalTableName}  order by SnapshotDate DESC LIMIT 10"
val dfPermissionsAll = spark.sql(sqlQuery)
display(dfPermissionsAll)