# Land To Bronze

This notebook sets up the DLT pipeline from the Landing volume to the bronze layer

In [0]:
import dlt

from pyspark.sql import functions as F

source = spark.conf.get("source")


In [0]:
catalog = "project"
br_schema = "bronze"

#Return the rules matching the tag as a format ready for DLT annotation.

def get_rules(tablename, quality):
  """
    loads data quality rules from csv file
    :param tablename: tablename to match
    :param quality: logical quality level
    :return: dictionary of rules that matched the tag
  """
  rules = {}
  df = spark.table(f"{catalog}.{br_schema}.expectations").where(f"tablename = '{tablename}' and quality ='{quality}'")
  for row in df.collect():
    rules[row['name']] = row['constraint']
  return rules

In [0]:
@dlt.view(
  name = "raw_data",
  comment="Raw data from the turbines",
)
def data_raw():
  df = (
    spark.readStream
      .format("cloudFiles")
      .option("cloudFiles.format", "csv")
      .option("cloudFiles.schemaLocation", f"{source}/landing/inferred_schema")
      .option("rescuedDataColumn", "_rescued_data")
      .option("header","true")
      .option("cloudFiles.inferColumnTypes", "true")
      .option("cloudFiles.schemaHints", "timestamp timestamp")
      .load(source +"/")
  )
  return df.withColumn("filename", F.col("_metadata.file_path"))
  


@dlt.table(
    name = "turbine_data",
    comment="New readings",
    table_properties = {
      "quality": "bronze"
      }
)
@dlt.expect_all_or_drop(get_rules('turbine_data','bronze')) #get the rules from our centralized table.
def data_bronze():
  df = dlt.read_stream("raw_data")
  return df.withColumn("processedtime",F.current_timestamp())