
# Overview


In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS geodata.silver;

DROP TABLE IF EXISTS geodata.silver.well_header_occ;
DROP TABLE IF EXISTS geodata.silver.well_header_sp;
DROP TABLE IF EXISTS geodata.silver.well_header_env;

In [0]:
from pyspark.sql.functions import col, lit, concat, expr, coalesce, current_timestamp


# OCC

In [0]:
bronze_df = spark.table("geodata.bronze.well_header_occ")

SRID = 4326 # WGS84

occ_df = (
    bronze_df.select(
        col('api').alias('uwi_10').cast('string'),
        concat(col('api').cast('string'), lit('0000')).alias('uwi_14'),
        col('sh_lat').alias('surface_latitude'),
        col('sh_lon').alias('surface_longitude'),
        expr(f"ST_POINT(sh_lon, sh_lat, {SRID})").alias('geom'),
        col('county').alias('county'),
        col('operator').alias('operator'),
        col('well_name').alias('well_name'),
        col('well_num').alias('well_number'),
        col('wellstatus').alias('well_status'),
        lit('OCC').alias('vendor'),
        current_timestamp().alias('row_created') 
    )
)

occ_df.write.format('delta').mode('overwrite').saveAsTable("geodata.silver.well_header_occ")

# Enverus

In [0]:
bronze_df = spark.table("geodata.bronze.well_header_env")

SRID = 4326 # WGS84

env_df = (
    bronze_df.select(
        col('API_UWI_Unformatted').alias('uwi_10').cast('string'),
        concat(col('API_UWI_Unformatted').cast('string'), lit('0000')).alias('uwi_14'),
        col('Latitude').alias('surface_latitude'),
        col('Longitude').alias('surface_longitude'),
        expr(f"ST_POINT(Longitude, Latitude, {SRID})").alias('geom'),
        col('County').alias('county'),
        col('ENVOperator').alias('operator'),
        col('WellName').alias('well_name'),
        col('WellNumber').alias('well_number'),
        col('ENVWellStatus').alias('well_status'),
        lit('ENV').alias('vendor'),
        current_timestamp().alias('row_created') 
    )
)

env_df.write.format('delta').mode('overwrite').saveAsTable("geodata.silver.well_header_env")

# S&P

In [0]:

bronze_df = spark.table("geodata.bronze.well_header_sp")

ORIG_SRID = 4276 # NAD27

sp_df = (
    bronze_df.select(
        coalesce(col('Regulatory API'), expr("substring(API, 1, length(API) - 4)")).alias('uwi_10'),
        col('API').alias('uwi_14'),
        col('Latitude').alias('surface_latitude'),
        col('Longitude').alias('surface_longitude'),
        expr(f"ST_TRANSFORM(ST_POINT(Longitude, Latitude, {ORIG_SRID}), 4326)").alias('geom'),
        col('County Name').alias('county'),
        col('Current Operator').alias('operator'),
        col('Lease Name').alias('well_name'),
        col('Well Num').alias('well_number'),
        col('Final Status').alias('well_status'),
        lit('SP').alias('vendor'),
        current_timestamp().alias('row_created') 
    )
)

sp_df.write.format('delta').mode('overwrite').saveAsTable("geodata.silver.well_header_sp")

In [0]:
%sql
SHOW TABLES IN geodata.silver