#### PipelineRegistry

In [0]:

case class Lineage(
  pipeline_name: String,
  task_name: String,
  downstream_tables: String,
  upstream_tables: Seq[String]
)

case class Pipeline(
  pipeline_name: String,
  `type`: String,
  product: String,
  upstream_tables: Seq[String]
)



  // ---------------------------------------------------------
  // define the functions for pipeline_registry tables
  // ---------------------------------------------------------

object PipelineRegistry {

  def parseLineages(
      csv: String
  ):  Seq(Lineage) ={

    val pipelineName = "example_pipeline"
      
    // Parse CSV rows (skip header)
    val parsedRows: Seq[(String, String)] =
      csv
        .split("\n")
        .map(_.trim)
        .filter(_.nonEmpty)
        .drop(1)
        .map { line =>
          val Array(downstream, upstream) = line.split("\\s+")
          downstream -> upstream
        }

    // Group upstream tables by downstream table
    val lineageSeq: Seq[Lineage] =
      parsedRows
        .groupBy(_._1)                     // group by downstream table
        .map { case (downstreamTable, rows) =>
          Lineage(
            pipeline_name = pipelineName,
            task_name = downstreamTable.split("\\.").last,   // task = table name
            downstream_tables = fullDownstreamTable,
            upstream_tables = rows.map(_._2).distinct.sorted
          )
        }
        .toSeq
  }

  def parsePipelines(
    csv: String
  ): Seq[Pipeline] = {

    // Split into lines, drop header and empty lines
    val lines = csv.stripMargin.trim.split("\n").toSeq.drop(1).filter(_.nonEmpty)

    lines.map { line =>
      // Split into at least 3 fixed columns + rest for upstream_tables
      val parts = line.split(",", 4).map(_.trim)

      val upstreamTables = if (parts.length >= 4) {
        parts(3)
          .split(",")
          .map(_.trim)
          .filter(_.nonEmpty)
          .toSeq
      } else Seq.empty[String]

      Pipeline(
        pipeline_name = parts(0),
        `type` = parts(1).capitalize,   // e.g., "data" -> "Data"
        product = parts(2).toUpperCase,  // e.g., "wosri" -> "WOSRI"
        upstream_tables = upstreamTables
      )
    }
  }

  def buildLineageMap(
    csv: String
  ): Map[String, List[String]] = {
    
    csv
      .split("\n")
      .map(_.trim)
      .filter(_.nonEmpty)
      .drop(1) // drop header
      .map { line =>
        val Array(downstream, upstream) = line.split("\\s+")
        downstream -> upstream
      }
      .groupBy(_._1)                      // group by downstream
      .view
      .mapValues(_.map(_._2).distinct.sorted.toList)
      .toMap
  }


  // Function to get the registered a list of pipeline name: 
  def getPipelines(): DataFrame = {

      spark.table(DapOps.REGISTRY)
        .filter(
          $"type" === lit(PipelineType.Data) && 
          $"product" === lit(ProductType.WOSRI) 
        ) 
        .select($"pipeline_name")
        .distinct()   // Optional: unique pipeline names only
  }

  def getPipelinesWithTables(): DataFrame = {

    spark.table(DapOps.REGISTRY)
      .filter(
        $"type" === lit(PipelineType.Data) && 
        $"product" === lit(ProductType.WOSRI) 
      )
      .select($"pipeline_name", $"upstream_tables")
      .distinct()   // Optional: unique pipeline names only
  }

  def getMasterTables(
    pipelineName: String
  ): Seq[String] = {

    spark.table(DapOps.REGISTRY)
      .filter($"pipeline_name" === pipelineName)
      .select($"upstream_tables")         // column is Array[String]
      .as[Seq[String]]                    // convert to Dataset[Seq[String]]
      .flatMap(identity)                  // flatten: all array elements in one Seq[String]
      .collect()
      .toSeq
  }

  def getMasterTablesDf(
    pipelineName: String
  ): DataFrame = {

    spark.table(DapOps.REGISTRY)
      .filter($"pipeline_name" === pipelineName)
      .select($"upstream_tables")
  }

  def updatePipelineMeta(
      pipelineName: String, 
      upstreamTables: Seq[String]
  ): Unit = {

    spark.table(DapOps.REGISTRY)
      .filter($"pipeline_name" === pipelineName)
      .withColumn("upstream_tables", typedLit(upstreamTables))
      .write
      .format("delta")
      .mode("overwrite")
      .option("replaceWhere", s"pipeline_name = '$pipelineName'")
      .saveAsTable(DapOps.REGISTRY)
  }

  def updateTaskMeta(
      pipelineName: String,
      taskName: String,
      upstreamTables: Seq[String],
      updatedBy: String = DapOps.PIPELNE
  ): Unit = {

    // Create a DataFrame for the new/updated row
    val newRowDF = Seq(
      (pipelineName, taskName, upstreamTables, updatedBy, current_timestamp())
    ).toDF("pipeline_name", "task_name", "upstream_tables", "updated_by", "updated_at")

    // Reference the Delta table
    val deltaTable = DeltaTable.forName(DapOps.PIPELINE_TASK_UPSTREAM)

    // Merge logic: update if exists, insert if not
    deltaTable.as("t")
      .merge(
        newRowDF.as("s"),
        "t.pipeline_name = s.pipeline_name AND t.task_name = s.task_name"
      )
      .whenMatched()
      .updateExpr(Map(
        "upstream_tables" -> "s.upstream_tables",
        "updated_by" -> "s.updated_by",
        "updated_at" -> "s.updated_at"
      ))
      .whenNotMatched()
      .insertAll()
      .execute()
  }

  def saveToTable(
    df: DataFrame,
    tableName:String
  ): Unit ={

    if (!spark.catalog.tableExists(tableName)) {
  
      df.write
        .format("delta")
        .mode("overwrite")
        .option("mergeSchema", "true")
        .saveAsTable(tableName)
    }
  }

}


In [0]:
//%run ./sub_notebook

print("This will not print if sub_notebook exited early")
dbutils.notebook.exit("Stopping here")

#### Test

In [0]:

val pipelineCsv = """
pipeline_name, type, product, upstream_tables
agra-sa-category-metrics-pipeline, data, wosri,  
agra-sa-jcr-metrics-pipeline, data, wosri, 
agra-sa-metrics-meta-pipeline, data, wosri, 
agra-sa-normalized-metrics-pipeline, data, wosri, 
agra-sa-researchtopics-pipeline, data, wosri, 
agra-sa-wos-dict-pipeline, data, wosri, 
agra-sa-region-pipeline, data, wosri, 
agra-sa-funding-organization-pipeline, data, wosri, 
agra-sa-grants-pipeline, data, wosri, 
agra-sa-journal-pipeline, data, wosri, 
agra-sa-dap-elt-pipeline, data, wosri, 
agra-sa-authorprofile-pipeline, data, wosri, 
agra-sa-dap-assembler-pipeline, assembler, wosri, 
agra-sa-organization-pipeline, data, wosri, 
agra-sa-doc-grant-master-pipeline, data, wosri, 
agra-sa-doc-patent-pipeline, data, wosri, 
agra-sa-doc-wos-pipeline, data, wosri, 
agra-sa-metrics-meta-pipeline, data, wosri, 
agra-sa-grants-ri-pipeline, data, wosri, 
agra-sa-societal-impact-metrics-pipeline, data, wosri, 
agra-sa-metadata-pipeline, data, wosri, 
agra-sa-dap-ldr-automation, workflow, wosri, 
agra-sa-dap-ppl-automation, workflow, wosri, 
agra-sa-research-intelligence-loader, loader, wosri, 
""".trim


In [0]:

val lineageCsv =
"""
downstream_table	upstream_table
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.alma	ag_content_ims_acs_prod.gold_wos.d_publisher
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.alma	ag_content_ims_acs_prod.gold_wos.journal_acs_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.alma	ag_content_ims_acs_prod.gold_wos.publisher_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.alma	ag_content_ims_acs_prod.gold_entity.d_alma_subscriptions
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.alma	ag_content_ims_acs_prod.gold_wos.d_edition
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.alma	ag_content_ims_acs_prod.gold_wos.d_journal_acs
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.alma	ag_content_ims_acs_prod.gold_entity.d_alma_openaccess
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.alma	ag_content_ims_acs_prod.gold_wos.f_publication
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.ap_article	ag_content_ims_acs_prod.gold_entity.d_orgmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.ap_article	ag_content_ims_acs_prod.gold_entity.d_spmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.ap_article	ag_content_ims_acs_prod.gold_wos.d_author
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.ap_article	ag_content_ims_acs_prod.gold_wos.d_researcher_citation_count_woscore
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.ap_article	ag_content_ims_acs_prod.gold_entity.orgmaster_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.ap_article	ag_content_ims_acs_prod.gold_wos.author_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.ap_article	ag_content_ims_acs_prod.gold_entity.spmaster_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.ap_article	ag_content_ims_acs_prod.gold_wos.f_publication
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.authorprofile	ag_content_ims_acs_prod.gold_entity.d_spmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.authorprofile	ag_content_ims_acs_prod.gold_wos.d_daisng_ranked_affiliation
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.category	ag_content_ims_acs_prod.gold_wos.d_category
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.category_article_metrics	ag_content_ims_acs_prod.gold_entity.f_jcr_journal_jci
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.category_article_metrics	ag_content_ims_acs_prod.gold_wos.category_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.category_article_metrics	ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.category_article_metrics	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.category_article_metrics	ag_content_ims_acs_prod.gold_wos.journal_acs_publication_link
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_entity.d_spmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_entity.orgmaster_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_entity.orgmaster_hierarchy_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_wos.author_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_entity.d_orgmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_wos.d_organization_citation_count
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_wos.d_organization_citation_count_woscore
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_entity.spmaster_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.dept_article	ag_content_ims_acs_prod.gold_wos.d_author
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency	ag_content_ims_acs_prod.gold_entity.orgmaster_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency	ag_ra_search_analytics_data_prod.dap_reference_v1_0.funder_catalog
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency	ag_content_ims_acs_prod.gold_entity.d_orgmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency	ag_content_ims_acs_prod.gold_wos.d_funding_organization
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency	ag_content_ims_acs_prod.gold_wos.d_grant_dataitem
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency	ag_content_ims_acs_prod.gold_entity.d_funding_organization
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency	ag_content_ims_acs_prod.gold_entity.funding_org_publication_link
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency_article_metrics	ag_content_ims_acs_prod.gold_wos.d_grant_dataitem
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency_article_metrics	ag_content_ims_acs_prod.gold_wos.d_funding_organization
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency_article_metrics	ag_ra_search_analytics_data_prod.dap_reference_v1_0.funder_catalog
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency_article_metrics	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency_article_metrics	ag_content_ims_acs_prod.gold_entity.d_orgmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency_article_metrics	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency_article_metrics	ag_content_ims_acs_prod.gold_entity.orgmaster_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency_article_metrics	ag_content_ims_acs_prod.gold_entity.d_funding_organization
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.funding_agency_article_metrics	ag_content_ims_acs_prod.gold_entity.funding_org_publication_link
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics	ag_content_ims_acs_prod.gold_entity.f_jcr_journal
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics	ag_content_ims_acs_prod.gold_wos.journal_acs_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics	ag_content_ims_acs_prod.gold_entity.f_jcr_journal_category
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics	ag_content_ims_acs_prod.gold_wos.d_category
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics	ag_content_ims_acs_prod.gold_wos.category_publication_link
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics_more	ag_content_ims_acs_prod.gold_entity.f_jcr_journal_category
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics_more	ag_content_ims_acs_prod.gold_wos.d_category
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics_more	ag_content_ims_acs_prod.gold_wos.journal_acs_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics_more	ag_content_ims_acs_prod.gold_wos.category_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics_more	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.jcr_metrics_more	ag_content_ims_acs_prod.gold_entity.f_jcr_journal_jci
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_entity.f_jcr_journal
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_wos.d_publisher
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_wos.journal_acs_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_entity.d_jcr_journals
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_ra_search_analytics_data_prod.dap_reference_v1_0.revised_publisher_unification
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_entity.journal_publisher_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_wos.d_journal_acs
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_entity.f_jcr_journal_category
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_entity.d_alma_openaccess
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_wos.d_category
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_wos.journal_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.journal	ag_content_ims_acs_prod.gold_wos.d_article_identifiers
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_entity.d_spmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_entity.d_orgmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_wos.d_author
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_wos.d_organization_citation_count_woscore
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_entity.orgmaster_hierarchy_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_wos.d_organization_citation_count
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_entity.spmaster_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_entity.orgmaster_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article	ag_content_ims_acs_prod.gold_wos.author_publication_link
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.organization	ag_content_ims_acs_prod.gold_entity.d_orgmaster
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.organization	ag_content_ims_acs_prod.gold_entity.d_orgmaster_association
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.organization	ag_content_ims_acs_prod.gold_entity.f_ip_institution_reputation
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.organization	ag_content_ims_acs_prod.gold_entity.d_esi_institution_indicator
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.organization	ag_content_ims_acs_prod.gold_entity.d_parent_child
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.organization	ag_content_ims_acs_prod.gold_wos.d_category
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.orgc_ip	ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.organization
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.orgc_ip	ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.org_article
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.orgc_ip	ag_content_ims_acs_prod.gold_entity.f_ip_institution_reputation
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.orgc_ip	ag_content_ims_acs_prod.gold_wos.d_category
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.d_territory
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.state_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_entity.d_nuts
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.d_country
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.territory_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.nuts_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_ra_search_analytics_data_prod.dap_reference_v1_0.nuts_code_region
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.country_territory_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_entity.d_nuts_code
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.reprint_address_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.country_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.address_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.d_state
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_entity.d_nuts_code
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.country_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.address_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.d_territory
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.d_country
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_ra_search_analytics_data_prod.dap_reference_v1_0.nuts_code_region
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.territory_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.nuts_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.d_state
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_entity.d_nuts
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.country_territory_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.state_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region_article_metrics	ag_content_ims_acs_prod.gold_wos.reprint_address_publication_link
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.d_state
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_entity.d_nuts_code
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.country_territory_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.nuts_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_entity.d_nuts
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.d_country
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_ra_search_analytics_data_prod.dap_reference_v1_0.nuts_code_region
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.country_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.state_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.territory_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.address_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.reprint_address_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.region	ag_content_ims_acs_prod.gold_wos.d_territory
	
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_content_ims_acs_prod.gold_wos.d_citation
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_content_ims_acs_prod.gold_entity.orgmaster_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_content_ims_acs_prod.gold_wos.d_oa_article
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_content_ims_acs_prod.gold_wos.author_publication_link
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_content_ims_acs_prod.gold_wos.f_publication
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_ra_search_analytics_data_prod.dap_work_v1_0.temp_full_wos
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_content_ims_acs_prod.gold_entity.d_esi_papers
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_content_ims_acs_prod.gold_wos.d_domestic_collab
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_content_ims_acs_prod.gold_wos.d_domestic_international_collabtype
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos	ag_content_ims_acs_prod.gold_wos.d_author
ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos_original	ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.wos
	
ag_ra_search_analytics_data_prod.dap_metrics_pprn_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_pprn.d_article_total_cites
ag_ra_search_analytics_data_prod.dap_metrics_pprn_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_pprn.f_category
ag_ra_search_analytics_data_prod.dap_metrics_pprn_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_pprn.d_article_metrics
ag_ra_search_analytics_data_prod.dap_metrics_pprn_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_pprn.d_publication_source_citation_count
ag_ra_search_analytics_data_prod.dap_metrics_pprn_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_pprn.d_yearwise_citation_count
ag_ra_search_analytics_data_prod.dap_metrics_pprn_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_pprn.d_domestic_international_collab
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_wos.d_citation_patent
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_wos.d_article_metrics
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_wos.d_article_total_cites
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_wos.d_publication_source_citation_count
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_wos.d_yearwise_citation_count
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_wos.f_category
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics	ag_content_ims_acs_prod.gold_wos.d_domestic_international_collab
	
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics_woscore	ag_content_ims_acs_prod.gold_wos.d_article_total_cites_woscore
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics_woscore	ag_content_ims_acs_prod.gold_wos.d_publication_source_citation_count_woscore
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics_woscore	ag_content_ims_acs_prod.gold_wos.f_category_woscore
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics_woscore	ag_content_ims_acs_prod.gold_wos.d_article_metrics_woscore
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics_woscore	ag_content_ims_acs_prod.gold_wos.d_domestic_international_collab_woscore
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.article_normalized_metrics_woscore	ag_content_ims_acs_prod.gold_wos.d_yearwise_citation_count_woscore
	
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.category_metrics	ag_content_ims_acs_prod.gold_wos.f_category_woscore
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.category_metrics	ag_content_ims_acs_prod.gold_wos.d_domestic_international_collab_woscore
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.category_metrics	ag_content_ims_acs_prod.gold_wos.d_domestic_international_collab
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.category_metrics	ag_content_ims_acs_prod.gold_wos.f_category
	
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.en_research_topics	ag_content_ims_udm_prod.topic_model.item_topic
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.en_societal_facet	ag_content_ims_acs_prod.gold_wos.d_category
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.en_societal_facet	ag_content_ims_acs_prod.gold_wos.category_publication_link
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.en_societal_facet	
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.en_societal_impact	ag_content_ims_acs_prod.gold_wos.d_article_flag
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.en_societal_impact	ag_ra_search_analytics_data_prod.dap_work_v1_0.patent_org
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.en_societal_impact	ag_content_ims_acs_prod.gold_wos.d_citation
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.en_societal_impact	ag_content_ims_acs_prod.gold_wos.d_citation_patent
ag_ra_search_analytics_data_prod.dap_metrics_wos_v1_0.en_societal_impact	
	
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_authorprofile	ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.authorprofile
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_authorprofile	ag_content_ims_udm_prod.researcher_grants_relationship.profile_grant_relation
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_funder	ag_content_ims_acs_prod.gold_entity.d_funding_organization
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_funder	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_funder	
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_org	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_org	ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.organization
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_org	ag_content_ims_acs_prod.gold_entity.orgmaster_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_org	
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_ut_funder	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_ut_funder	ag_content_ims_acs_prod.gold_entity.d_funding_organization
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_ut_funder	
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_ut_org	ag_ra_search_analytics_data_prod.dap_entity_wos_v1_0.organization
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_ut_org	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_ut_org	ag_content_ims_acs_prod.gold_entity.orgmaster_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_ut_org	
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grants_article	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grants_article	ag_ra_search_analytics_data_prod.dap_prod_core_v1_0.incites_wos
ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_research_topic	ag_content_ims_udm_prod.topic_model.grants_topic
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_funding_organizations	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_funding_organizations	ag_content_ims_acs_prod.gold_entity.d_grantmaster_grantid
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_funding_organizations	ag_content_ims_acs_prod.gold_entity.d_funding_organization
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_funding_organizations	ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_org_grants
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_org_grants	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_org_grants	ag_ra_search_analytics_data_prod.dap_work_v1_0.grants_ri_grants_items
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_org_grants	ag_ra_search_analytics_data_prod.dap_grant_v1_0.grants_ri_grant_funder
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_org_grants	ag_content_ims_acs_prod.gold_entity.d_grantmaster_grantid
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_organizations	ag_content_ims_acs_prod.gold_entity.orgmaster_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_organizations	ag_content_ims_acs_prod.gold_entity.d_orgmaster
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_organizations	ag_content_ims_acs_prod.gold_wos.funding_org_publication_link
ag_ra_search_analytics_data_prod.dap_grant_v1_0.incites_ri_organizations	ag_content_ims_acs_prod.gold_entity.d_grantmaster_grantid
""".trim

In [0]:

// Usage
val lineageMap = PipelineRegistry.buildLineageMap(lineageCsv)

// Show
lineageMap.map( println)


In [0]:
// Usage
val pipelinesDF = PipelineRegistry.parseLineages(pipelineCsv).toDF()

// Show
display(pipelinesDF)

val dryRun: Boolean = true
if(!dryRun) {
  PipelineRegistry.saveToTable(pipelinesDF, DapOps.REGISTRY)
}


In [0]:
// Usage
val lineageDF = PipelineRegistry.parsePipelines(lineageCsv).toDF()

// Show
display(lineageDF)

val dryRun: Boolean = true
if(!dryRun) {
  PipelineRegistry.saveToTable(lineageDF, DapOps.PIPELINE_TASK_UPSTREAM)
}
