#### Define DapIO Object

In [None]:
import org.apache.spark.sql.{DataFrame, SparkSession, Column, Row}
import org.apache.spark.sql.functions._
import io.delta.tables.DeltaTable
import scala.jdk.CollectionConverters._
import java.sql.Timestamp
import spark.implicits._
import org.yaml.snakeyaml.Yaml
import java.io.FileWriter
import java.nio.file.{Files, Paths}
import java.nio.charset.StandardCharsets
import java.sql.Timestamp
import java.time.Instant
import org.apache.spark.sql.{DataFrame, SparkSession, Column}
import org.apache.spark.sql.functions._
import io.delta.tables.DeltaTable
import scala.jdk.CollectionConverters._
import spark.implicits._
import org.yaml.snakeyaml.DumperOptions
import java.io.{FileInputStream, InputStream}
import scala.io.Source



// 3. Spark Read / Write Utilities
object DapIO    {

  val basePath =  SchemaResolver.DAP_VOLUME

  def readTable(schema: String, table: String): DataFrame =
    spark.table(s"$schema.$table")

  def writeTable(
      df: DataFrame, 
      schema: String, 
      table: String,
      overwrite: Boolean = true
    ): Unit =

    val mode = if (overwrite) "overwrite" else "append"
    df.write
      .mode("overwrite")
      .saveAsTable(s"$schema.$table")


  def readFileAsString(fileName: String): String = {
    val fileFullPath = s"$basePath$fileName"

    // Read the SQL file as a DataFrame of lines
    val dfLines = spark.read.text(fileFullPath)

    // Convert to a single string
    dfLines.collect().map(_.getString(0)).mkString("\n")
  }

  def readSQL(fileName: String): List[String] = {
    val fileFullPath = s"$basePath$fileName"
    // Read the SQL file as a DataFrame of lines
    val dfLines = spark.read.text(fileFullPath)

    // Convert to a single string
    dfLines.collect().map(_.getString(0)).mkString("\n")
        .split(";")
        .map(_.trim)
        .filter(_.nonEmpty)
        .toList
  }



  def readCSV( fileName:String): DataFrame = {
      val fileFullPath = s"$basePath$fileName"
      spark.read
          .option("header", "true") // first row is header
          .option("inferSchema", "true")
          .option("delimiter", ",")   // custom delimiter
          .csv(fileFullPath)
  }

  def writeCSV(df: DataFrame, fileName:String): Unit = {
    
    val fileFullPath = s"$basePath$fileName"
    df.write
      .mode(SaveMode.Overwrite)   // Overwrite existing file
      .option("header", "true")   // Include column names
      .option("delimiter", ",")   // Custom delimiter
      .csv(fileFullPath)
  }



  def readJSON( fileName:String): DataFrame = {
      val fullFileName = s"$basePath$fileName"
      spark.read
          .option("multiLine", "true")
          .json(fullFileName)
  }

  def  writeJSON(
      df: DataFrame,
      path: String,
      overwrite: Boolean = true
    ): DataFrame = {
      val fullFileName = s"$basePath$path"
      val mode = if (overwrite) "overwrite" else "append"
      df.write
        .mode(mode)
        .json(fullFileName)
  }


  def readYAML(fileName: String): Map[String, Any] = {
    val fileFullPath = s"$basePath$fileName"
    val yamlContent = spark.read.textFile(fileFullPath).collect().mkString("\n")

    val yaml = new Yaml()
    val javaMap =
      yaml.load(yamlContent).asInstanceOf[java.util.Map[String, Any]]

    javaMap.asScala.toMap
  }

  def writeYAML(
      data: String,
      fileName:String
    ): Unit = {
    
    val fileFullPath = s"$basePath$fileName"

    // Optional: prettier formatting
    val options = new DumperOptions()
    options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK)

    val yamlWriter = new Yaml(options)
    val writer = new FileWriter(fileFullPath)

    // Write the same data or any Map/Seq
    yamlWriter.dump(data, writer)
    writer.close()

  }
  




#### Test

In [None]:

val df  = DapIO.readJSON("dap_job_list.json")
display(df)

In [None]:
 val config = DapIO.readYAML("1p-common-analytics-service-doc.yaml")
config.map(println)

In [None]:
 val df = DapIO.readCSV("dap_lineage.csv")
display(df)

In [None]:
val sql_stmts = DapIO.readSQL("insert_dap_pipeline_registry.sql")
sql_stmts.foreach(println)
