```
bryan@ichabod mac_bucket % for file in *_vector_log.json; do
  databricks fs cp "$file" dbfs:/Volumes/geodata/petra/vector_log_raw/
done
```


In [None]:
df = spark.read.format("json").load("/Volumes/geodata/petra/vector_log_raw/")

display(df)

In [None]:
from common.transforms import string_to_iso_date, generate_hash, replace_10e30_with_null
import pyspark.sql.functions as F

df_flat = df.select(
    F.col("repo_id"),
    F.col("uwi.uwi").alias("uwi"),
    F.col("uwi.wsn").alias("wsn"),
    # F.col("logdata.digits").alias("digits"),
    F.col("logdata.elev_fid").alias("elev_fid"),
    F.col("logdata.elev_zid").alias("elev_zid"),
    F.col("logdata.ldsn").alias("ldsn"),
    F.col("logdata.lsn").alias("lsn"),
    F.col("logdata.maxval").alias("maxval"),
    F.col("logdata.mean").alias("mean"),
    F.col("logdata.minval").alias("minval"),
    F.col("logdata.nullval").alias("nullval"),
    F.col("logdata.numpts").alias("numpts"),
    F.col("logdata.remarks").alias("remarks"),
    F.col("logdata.source").alias("source"),
    F.col("logdata.start").alias("start"),
    F.col("logdata.stddev").alias("stddev"),
    F.col("logdata.step").alias("step"),
    F.col("logdata.stop").alias("stop"),
    F.col("logdatax.adddate").alias("app_row_created"),
    F.col("logdatax.chgdate").alias("app_row_changed"),
    F.col("logdatax.lasid").alias("lasid"),
    F.col("logdef.desc").alias("desc"),
    F.col("logdef.logname").alias("logname"),
    F.col("logdef.units").alias("units"),
    F.col("loglas.hdrsize").alias("hdrsize"),
    F.col("loglas.lashdr").alias("lashdr"),
)

df_vector_log = df_flat


# enforce timestamp for dates
for col_name in ["app_row_created", "app_row_changed"]:
    df_vector_log = string_to_iso_date(df_vector_log, col_name, col_name)


# ensure real nulls
for col_name in [
    "maxval",
    "mean",
    "minval",
    "nullval",
    "start",
    "stddev",
    "step",
    "stop",
]:
    df_vector_log = replace_10e30_with_null(df_vector_log, col_name, col_name)


# add id hash
id_columns = ["repo_id", "uwi"]
df_vector_log = generate_hash(df_vector_log, "id", "vector_log", *id_columns)


display(df_vector_log)

In [None]:
from common.transforms import upsert_dataframe_to_table

result = upsert_dataframe_to_table(df_vector_log, "geodata.petra.vector_log_bronze")
display(result)