# Ingesting Json data using AutoLoader

In [0]:
%run ../../config/project_config

In [0]:
from pyspark.sql.functions import current_timestamp, col

In [0]:
spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {CATALOG_NAME}.{SCHEMA_BRONZE}.county_metrics_json (
        load_dt TIMESTAMP,
        source_file STRING
    )
    USING DELTA
""")

In [0]:
source_path = f"{VOLUME_LANDING}/"
target_table = f"{CATALOG_NAME}.{SCHEMA_BRONZE}.county_metrics_json"

# Specific folder for checkpoints AND schema storage
checkpoint_base = f"/Volumes/{CATALOG_NAME}/{SCHEMA_BRONZE}/checkpoints/json_ingest"
schema_location = f"{checkpoint_base}/schema" 

In [0]:
df_stream = spark.readStream.format("cloudFiles") \
    .option("cloudFiles.format", "json") \
    .option("cloudFiles.inferColumnTypes", "true") \
    .option("cloudFiles.schemaLocation", schema_location) \
    .option("pathGlobFilter", "*chunk_3.json") \
    .option("multiLine", "true") \
    .load(source_path)

In [0]:
df_processed = df_stream \
    .withColumn("load_dt", current_timestamp()) \
    .withColumn("source_file", col("_metadata.file_path"))

In [0]:
query = df_processed.writeStream \
    .trigger(availableNow=True) \
    .option("checkpointLocation", checkpoint_base) \
    .option("mergeSchema", "true") \
    .toTable(target_table)