Title: Interpolation Methods with ArcPy: Interpolating Temperature

Course: GIS 5572: ArcGIS II

Author(s): Mattie Gisselbeck

Date: 3-25-2023

Abstract

Previously, we built a pipeline that (1) extracts, transforms, and loads data; (2) performs QAQC operations on the imported data; (3) saves the data to a local geodatabase; and (4) then saves it to a PostgresSQL database hosted on Google Cloud. The objective of this lab was to create interpolated temperature maps for the state of Minnesota and evaluate their accuracy using the ETL and QAQC pipeline. The resulting maps and accuracy assessment will be stored in a local geodatabase and PostgresSQL database. The interpolated maps will be viewable on ArcGIS Online MapViewer via GeoJSON from a Flask API endpoint.

https://test11-pmz7lxrsca-uc.a.run.app

In [37]:
import arcpy
import requests
import os
import psycopg2
import random
from pathlib import Path

In [2]:
os.chdir(r"\\Mac\Home\Documents\git")
wksp = os.getcwd()

In [None]:
1. Querying Temperature Data from PostgresSQL Database

In [None]:
# Retrieve temperature data from PostGIS database
arcpy.management.MakeQueryLayer(
    input_database=os.path.join(wksp, ""),
    out_layer_name="rwis_stations",
    query="SELECT id, min_tmpf, geom FROM stations WHERE date = '2023-03'",
    oid_fields="id",
    shape_type="POINT",
    srid="4326",
    spatial_reference='GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]];-400 -400 1000000000;0 1;0 1;8.98315284119521E-09;2;2;IsHighPrecision',
    spatial_properties="DO_NOT_DEFINE_SPATIAL_PROPERTIES",
    m_values="DO_NOT_INCLUDE_M_VALUES",
    z_values="DO_NOT_INCLUDE_Z_VALUES",
    extent='-98.0690216979786 43.2052294998382 -88.6618510633838 49.6779752981444 GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]'
)

In [4]:
# Create a copy of the temperature as a shapefile in the workspace
arcpy.management.CopyFeatures(
    in_features="rwis_stations",
    out_feature_class=os.path.join(wksp, "temperature.shp"),
    config_keyword="",
    spatial_grid_1=None,
    spatial_grid_2=None,
    spatial_grid_3=None
)

In [None]:
2. Sampling Temperature Data

In [5]:
# Set Input Temperature .shp File Path, and Output Training and Validation .shp File Paths
input_shapefile = "temperature.shp"
training_shapefile = os.path.join(wksp, "training_shapefile.shp")
validation_shapefile = os.path.join(wksp, "validation_shapefile.shp")

# Set Training Percentage
training_percent = 70

# Generate a List of ObjectID(s) for Features in 'input_shapefile'
all_ids = [row[0] for row in arcpy.da.SearchCursor(input_shapefile, ["OID@"])]

# Calculate the Number of Features to Use for Training
num_training = int((len(all_ids) * training_percent) / 100)

# Randomly Select the ObjectID(s) for the Training Features
training_ids = random.sample(all_ids, num_training)

# Create Lists of ObjectID(s) for the Validation and Training Features
validation_ids = [id for id in all_ids if id not in training_ids]

In [6]:
# Create New .shp Files Using Selected ObjectID(s) for Training and Validation
training = arcpy.management.SelectLayerByAttribute(input_shapefile, "NEW_SELECTION", "FID IN {}".format(tuple(training_ids)))
arcpy.management.CopyFeatures(training, training_shapefile)

validation = arcpy.management.SelectLayerByAttribute(input_shapefile, "NEW_SELECTION", "FID IN {}".format(tuple(validation_ids)))
arcpy.management.CopyFeatures(validation, validation_shapefile)

In [None]:
3. Interpolating Temperature using ArcPy

In [None]:
* **IDW()** uses the measured values surrounding the prediction location to predict a value for any unsampled location, based on the assumption that things that are close to one another are more alike than those that are farther apart.
* **GlobalPolynomialInterpolation()** fits a smooth surface that is defined by a mathematical function (a polynomial) to the input sample points.
* **EmpiricalBayesianKriging()** is an interpolation method that accounts for the error in estimating the underlying semi-variogram through repeated simulations.

In [10]:
arcpy.ddd.Idw(
    in_point_features="training_shapefile.shp",
    z_field="min_tmpf",
    out_raster=os.path.join(wksp, "TMP_IDW.tif"),
    cell_size=0.1,
    power=2,
    search_radius="VARIABLE 12",
    in_barrier_polyline_features=None
)

arcpy.ga.EmpiricalBayesianKriging(
    in_features="training_shapefile",
    z_field="min_tmpf",
    out_ga_layer=None,
    out_raster=os.path.join(wksp, "TMP_EBK.tif"),
    cell_size=0.1,
    transformation_type="NONE",
    max_local_points=100,
    overlap_factor=1,
    number_semivariograms=100,
    search_neighborhood="NBRTYPE=StandardCircular RADIUS=2.3 ANGLE=0 NBR_MAX=15 NBR_MIN=10 SECTOR_TYPE=ONE_SECTOR",
    output_type="PREDICTION",
    quantile_value=0.5,
    threshold_type="EXCEED",
    probability_threshold=None,
    semivariogram_model_type="POWER"
)

arcpy.ga.GlobalPolynomialInterpolation(
    in_features="training_shapefile.shp",
    z_field="min_tmpf",
    out_ga_layer=None,
    out_raster=os.path.join(wksp, "TMP_GPI.tif"),
    cell_size=0.1,
    power=1,
    weight_field=None
)

In [None]:
4. Creating a Point Accuracy Assessment

In [40]:
def create_accuracy_assessment (raster, validation_data):

    # Define Output Path and Name of Ground Truth vs. Classified .shp
    output_acc = Path(raster).stem + '_PointAccuracy' + '.shp'
    acc_table = os.path.join(wksp, output_acc)
    
    # Define Output Path and Name, Saves RMSE for Each Interpolation
    output_stat = Path(raster).stem + '_Statistics.dbf'
    stat_table = os.path.join(wksp, output_stat)
    
    # Extract Predicted Values and Save to Validation Data
    arcpy.sa.ExtractValuesToPoints(
        in_point_features=validation_data,
        in_raster=raster,
        out_point_features=acc_table,
        interpolate_values="NONE",
        add_attributes="ALL"
    )
    # Rename Default Fields
    arcpy.management.CalculateField(
        in_table=acc_table,
        field="GrndTruth",
        expression="!min_tmpf!",
        expression_type="PYTHON3",
        code_block="",
        field_type="FLOAT",
        enforce_domains="NO_ENFORCE_DOMAINS"
    )
    arcpy.management.CalculateField(
        in_table=acc_table,
        field="Classified",
        expression="!RASTERVALU!",
        expression_type="PYTHON3",
        code_block="",
        field_type="FLOAT",
        enforce_domains="NO_ENFORCE_DOMAINS"
    )
    arcpy.management.DeleteField(
        in_table=acc_table,
        drop_field="min_tmpf;RASTERVALU",
        method="DELETE_FIELDS"
    )
    
    # Calculate Squared Error
    arcpy.management.CalculateField(
        in_table=acc_table,
        field="Sq_error",
        expression="math.pow(!GrndTruth! - !Classified!, 2)",
        expression_type="PYTHON3",
        code_block="",
        field_type="FLOAT",
        enforce_domains="NO_ENFORCE_DOMAINS"
    )
    
    # Create Statistics Table and Calculate Squared Error Sum
    arcpy.analysis.Statistics(
        in_table=acc_table,
        out_table=stat_table,
        statistics_fields="Sq_error SUM",
        case_field=None,
        concatenation_separator=""
    )    
    arcpy.management.CalculateField(
        in_table=stat_table,
        field="RMSE",
        expression="math.sqrt(!SUM_Sq_err! / !FREQUENCY!)",
        expression_type="PYTHON3",
        code_block="",
        field_type="FLOAT",
        enforce_domains="NO_ENFORCE_DOMAINS"
    )

In [12]:
# Create Lists with File Names With and Without .tif Extension
interpolations = ['TMP_IDW.tif', 'TMP_EBK.tif', 'TMP_GPI.tif']
interpolators  = ['TMP_IDW', 'TMP_EBK', 'TMP_GPI']

# Run the accuracy assesment for each interpolation
for i in range(len(interpolations)):
    accuracy_assessment(interpolations[i], "validation_shapefile.shp")

In [13]:
# Create lists with the raster names of the interpolations with and without extension
interpolations = ['TMP_IDW.tif', 'TMP_EBK.tif', 'TMP_GPI.tif']
interpolators  = ['TMP_IDW', 'TMP_EBK', 'TMP_GPI']

# Merge Accuracy Assessments for Each Interpolations
for i in range(len(interpolations)):
    accuracy_assessment(interpolations[i], "validation_shapefile.shp")

# Merge the accuracy tables     
arcpy.management.Merge(
    inputs="TMP_IDW_Statistics.dbf;TMP_EBK_Statistics.dbf;TMP_GPI_Statistics.dbf",
    output="TMP_AccuracyAssessment.dbf",
    field_mappings='Interpolat "Interpolat" true true false 255 Text 0 0,First,#;FREQUENCY "FREQUENCY" true true false 10 Long 0 10,First,#,Acc_IDW_stat,FREQUENCY,-1,-1,Acc_Kriging_stat,FREQUENCY,-1,-1,Acc_GPI_stat,FREQUENCY,-1,-1;SUM_Sq_err "SUM_Sq_err" true true false 19 Double 0 0,First,#,Acc_IDW_stat,SUM_Sq_err,-1,-1,Acc_Kriging_stat,SUM_Sq_err,-1,-1,Acc_GPI_stat,SUM_Sq_err,-1,-1;RMSE "RMSE" true true false 13 Float 0 0,First,#,Acc_IDW_stat,RMSE,-1,-1,Acc_Kriging_stat,RMSE,-1,-1,Acc_GPI_stat,RMSE,-1,-1',
    add_source="NO_SOURCE_INFO"
)

# Add Name of Each Interpolator to Merged Table
with arcpy.da.UpdateCursor("TMP_AccuracyAssessment.dbf", ['Interpolat']) as cursor:
    for i, row in enumerate(cursor):
        if i < len(interpolators):
            row[0] = interpolators[i]
        else:
            break
        cursor.updateRow(row)

# Delete Cursor to Release Locks on Data
del cursor

In [35]:
# Find Interpolator with the Lowest Root Mean Square Error (RMSE)
methods = {}
fields = ["Interpolat", "RMSE"]
with arcpy.da.SearchCursor('TMP_AccuracyAssessment.dbf', fields) as cursor:
    for row in cursor:
        methods[row[0]] = row[1]

best_interpolator = min(methods, key=methods.get)

In [15]:
# Clip to Minnesota State Boundary
output_clip = os.path.join(wksp, best_interpolator + '_Minnesota.tif')
out_raster = arcpy.sa.ExtractByMask(
    in_raster=best_interpolator+'.tif',
    in_mask_data="Minnesota_StateBoundary.shp",
    extraction_area="INSIDE",
    analysis_extent='-97.239102895829 43.499445217943 -89.6516983029999 49.0583312990001 GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]'
)
out_raster.save(output_clip)

# Convert Raster to Point Shapefile
output_point_shp = os.path.join(wksp, best_interpolator + '.shp')
arcpy.conversion.RasterToPoint(
    in_raster=output_clip,
    out_point_features=output_point_shp,
    raster_field="Value"
)

In [None]:
6. Saving Layer(s) and Table(s) to PostgresSQL Database Using psycopg2

In [36]:
# Establish Connection to PostgreSQL Database
connection = psycopg2.connect(
    host = '34.133.121.12',
    port = '5432',
    database = 'lab3',
    user = 'postgres',
    password = 'student',
)

In [None]:
6.1. Interpolation: Temperature

In [19]:
# Define .dbf Path
data = os.path.join('TMP_AccuracyAssessment.dbf')

# Define Fields
fields = ["OID", "Interpolat", "FREQUENCY", "SUM_Sq_err", "RMSE"]

# Create Table
cursor = connection.cursor()
cursor.execute("DROP TABLE IF EXISTS TMP_PointAccuracyAssessmentTable")
cursor.execute("""
    CREATE TABLE TMP_PointAccuracyAssessmentTable (
        OID INT,
        Interpolat VARCHAR,
        FREQUENCY INT,
        SUM_Sq_err DOUBLE PRECISION,
        RMSE DOUBLE PRECISION)
""")

# Populate Table
with arcpy.da.SearchCursor(data, fields) as da_cursor:
    for row in da_cursor:
        cursor.execute("INSERT INTO TMP_PointAccuracyAssessmentTable (OID, Interpolat, FREQUENCY, SUM_Sq_err, RMSE) VALUES (%s, %s, %s, %s, %s)", (row[0], row[1], row[2], row[3], row[4]))

connection.commit()

In [None]:
6.2. Point Accuracy Assessment Table: Temperature

In [20]:
# Define Table Name (Best Interpolator)
point_table = best_interpolator.lower()

# Define Fields
fields = ["pointid", "grid_code", "Shape@WKT"]

# Create Table
cursor = connection.cursor()
cursor.execute(f"DROP TABLE IF EXISTS {point_table}")
cursor.execute(f"""
    CREATE TABLE {point_table} (
        pointid INT,
        grid_code DOUBLE PRECISION)
""")

cursor.execute(f"""
    SELECT AddGeometryColumn('{point_table}', 'geom', 4326, 'POINT', 2)
""")

# Populate Table
with arcpy.da.SearchCursor(output_point_shp, fields) as da_cursor:
    for row in da_cursor:
        wkt = row[2]
        cursor.execute(f"INSERT INTO {point_table} (pointid, grid_code, geom) VALUES (%s, %s, ST_GeomFromText(%s, 4326))", (row[0], row[1], wkt))

connection.commit()

In [None]:
6.3. Point Accuracy Assessment Layer: Temperature

In [None]:
# Define .shp Path
data = os.path.join(wksp, 'Acc_' + best_interpolator + '.shp')

# Define Table Name (i.e., DEM_EBK_PointAccuracy)
table_name = best_interpolator.lower() + '_PointAccuracy'

# Define Fields
fields = ["GrndTruth", "Classified", "Sq_error", "Shape@WKT"]

# Create Table
cursor = connection.cursor()
cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
cursor.execute(f"""
    CREATE TABLE {table_name} (
        GrndTruth DOUBLE PRECISION,
        Classified DOUBLE PRECISION,
        Sq_error DOUBLE PRECISION)
""")

cursor.execute(f"""
    SELECT AddGeometryColumn('{table_name}', 'geom', 4326, 'POINT', 2)
""")

# Populate Table
with arcpy.da.SearchCursor(data, fields) as da_cursor:
    for row in da_cursor:
        wkt = row[3]
        cursor.execute(f"INSERT INTO {table_name} (GrndTruth, Classified, Sq_error, geom) VALUES (%s, %s, %s, ST_GeomFromText(%s, 4326))", (row[0], row[1], row[2], wkt))

connection.commit()