# JobInsight Sample Notebook
This notebook demonstrates how to use the JobInsight diagnostic library to analyze completed Spark jobs in Microsoft Fabric.
It includes:
- Analyzing a completed Spark job
- Persisting metrics to Lakehouse tables
- Reloading previously analyzed results
- Copying event logs to ABFSS

**Note:** This notebook uses Scala.

In [None]:
// Step 0: Configure Spark session for large event logs
import org.apache.spark.sql.SparkSession

val spark = SparkSession.builder()
  .appName("JobInsightApp")
  .config("spark.eventLog.jsonOption.maxStringLength", "2000000000")
  .config("spark.eventLog.jsonOption.maxDepth", "2000000000")
  .getOrCreate()

In [None]:
// Step 1: Analyze a completed Spark job
import com.microsoft.jobinsight.diagnostic.SparkDiagnostic

// Replace the placeholders with actual values from your Spark application
val workspaceId    = "<your-workspace-id>"
val artifactId     = "<your-artifact-id>"
val livyId         = "<your-livy-id>"
val jobType        = "sessions"  // "sessions" for notebooks, "batches" for job definitions
val attemptId      = 1           // Optional: default is 1

val stateStorePath = "abfss://<container>@<storage_account>.dfs.core.windows.net/<path>/state_store"

if (mssparkutils.fs.exists(stateStorePath)) {
    mssparkutils.fs.rm(stateStorePath, true)
}

val jobInsight = SparkDiagnostic.analyze(
    workspaceId,
    artifactId,
    livyId,
    jobType,
    stateStorePath
)

val queries    = jobInsight.queries
val jobs       = jobInsight.jobs
val stages     = jobInsight.stages
val tasks      = jobInsight.tasks
val executors  = jobInsight.executors

queries.show()
jobs.show()

In [None]:
// Step 2: Save metrics to a Lakehouse table
queries.write
  .format("delta")
  .mode("overwrite")
  .saveAsTable("Queries")

// Repeat for jobs, stages, etc., if needed
// jobs.write.format("delta").mode("overwrite").saveAsTable("Jobs")

In [None]:
// Step 3: Reload a previously analyzed job
val jobInsightReloaded = SparkDiagnostic.loadJobInsight(stateStorePath)

val queriesReloaded    = jobInsightReloaded.queries
val jobsReloaded       = jobInsightReloaded.jobs
val stagesReloaded     = jobInsightReloaded.stages
val tasksReloaded      = jobInsightReloaded.tasks
val executorsReloaded  = jobInsightReloaded.executors

queriesReloaded.show()

In [None]:
// Step 4: Copy Spark event logs to ABFSS
import com.microsoft.jobinsight.diagnostic.LogUtils

val targetDirectory = "abfss://<container>@<storage_account>.dfs.core.windows.net/<path>/logs"
val asyncMode       = true

val contentLength = LogUtils.copyEventLog(
  workspaceId,
  artifactId,
  livyId,
  jobType,
  targetDirectory,
  asyncMode,
  attemptId
)

println(s"Copied event log content size: $contentLength bytes")