In [8]:
import time, math, os, importlib,sys
import sklearn.metrics  
import arcgisscripting
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio as rio
import rasterio.mask
import rasterio.plot as rio_pl
import matplotlib.image as mpimg
from datetime import datetime

from rasterio.plot import show
from rasterio.transform import Affine
from rasterio.mask import mask
from rasterio import MemoryFile
from rasterio.profiles import DefaultGTiffProfile
from scipy.spatial import Voronoi, voronoi_plot_2d
from scipy.stats import sem
from sklearn.metrics import mean_squared_error
from shapely.geometry import box, Polygon, Point
from shapely import wkt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
import contextily as cx

import arcpy
from arcpy.sa import *
import misc.preprocess
import misc.interpolation
arcpy.env.overwriteOutput = True

# 1. Load and Preprocess Data <a class="anchor" id="load_data"></a>

In [2]:
path = "E:/Projects/SEACAR_WQ_Pilot/"
gis_path = path+'GIS_data/'

# path = "D:/Water_Quailty/"
# gis_path = path+'GIS_data/'

In [3]:
# Reload modules in external .py files after editing.
dfAll = pd.read_csv(path + "all_0214.csv").drop(columns=['Unnamed: 0','Unnamed: 0.1','RowID','ValueQualifier'])
# Convert SampleDate froms str to date
dfAll['SampleDate'] = pd.to_datetime(dfAll['SampleDate']).dt.date

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
col_ls = ['RowID','ParameterName','ParameterUnits','ProgramLocationID','ActivityType','ManagedAreaName',
                   'SampleDate','Year','Month','ResultValue','ValueQualifier','Latitude_DD','Longitude_DD']
para_ls = ["Salinity","Total Nitrogen","Dissolved Oxygen","Turbidity","Secchi Depth"]
para_ls_ab = ["S","TN","DO","T","SD"]
# Convert full MA names to short names
dictArea    = {'Gasparilla Sound-Charlotte Harbor Aquatic Preserve':'Charlotte Harbor','Big Bend Seagrasses Aquatic Preserve':'Big Bend',
                'Guana Tolomato Matanzas National Estuarine Research Reserve':'GTM Reserve','Estero Bay Aquatic Preserve':'Estero Bay',
                'Biscayne Bay Aquatic Preserve':'Biscayne Bay','Matlacha Pass Aquatic Preserve':'Matlacha Pass AP',
                'Lemon Bay Aquatic Preserve':'Lemon Bay','Cape Haze Aquatic Preserve':'Cape Haze','Pine Island Sound Aquatic Preserve':'Pine Island'}

# Convert full MA names to MA name in ORCP_Managed_Areas_Oct2021
dictArea2    = {'Gasparilla Sound-Charlotte Harbor Aquatic Preserve':'Gasparilla Sound-Charlotte Harbor','Big Bend Seagrasses Aquatic Preserve':'Big Bend Seagrasses',
                'Guana Tolomato Matanzas National Estuarine Research Reserve':'Guana Tolomato Matanzas NERR','Estero Bay Aquatic Preserve':'Estero Bay',
                'Biscayne Bay Aquatic Preserve':'Biscayne Bay','Matlacha Pass Aquatic Preserve':'Matlacha Pass',
                'Lemon Bay Aquatic Preserve':'Lemon Bay','Cape Haze Aquatic Preserve':'Cape Haze','Pine Island Sound Aquatic Preserve':'Pine Island Sound'}
dictArea3    = {'Gasparilla Sound-Charlotte Harbor Aquatic Preserve':'ch','Big Bend Seagrasses Aquatic Preserve':'bb',
                'Guana Tolomato Matanzas National Estuarine Research Reserve':'gtm','Estero Bay Aquatic Preserve':'eb',
                'Biscayne Bay Aquatic Preserve':'bbay','Matlacha Pass Aquatic Preserve':'Matlacha Pass AP',
                'Lemon Bay Aquatic Preserve':'Lemon Bay','Cape Haze Aquatic Preserve':'Cape Haze','Pine Island Sound Aquatic Preserve':'Pine Island'}

dictPara = {"Salinity":'S','Total Nitrogen':'TN','Dissolved Oxygen':'DO','Turbidity':'T','Secchi Depth':'SD'}
dictUnits   = {"Salinity":"ppt","Total Nitrogen": "mg/L","Dissolved Oxygen": "mg/L","Turbidity": "NTU", "Secchi Depth": "m"}
listArea    = dfAll["ManagedAreaName"].unique()
listPara    = ["Salinity","Total Nitrogen","Dissolved Oxygen","Turbidity","Secchi Depth"]
SpatialRef = '3086'

# 2. Combine Discrete and Continuous Data <a class="anchor" id="combine"></a>

Combine dis and con dataframes

In [75]:
def interpolation_auto(method,dataframe,managed_area,Year,Season,start_date,end_date,parameter,covariates,out_raster,out_ga_layer,predict_std_err):
    method = method
    dataframe = dataframe
    Area   = managed_area
    Year   = Year
    Season = Season
    start_date,end_date = start_date,end_date
    Para   = parameter
    covariates = covariates
    fname = [dictArea[Area],Year,Season[0:3],dictPara[Para]]
    
    input_pt = gis_path+"input_point/{}/{}{}_{}.shp".format(*fname)
    
    df,gdf= misc.preprocess.select_aggr_area_season(dataframe,start_date,end_date, Area, Para)
    gdf   = gdf.to_crs(int(SpatialRef))
        
    boundary_shp = gis_path+ 'managed_area_boundary/{}.shp'.format(dictArea[Area][0:3])
    gdf.to_file(input_pt,driver='ESRI Shapefile',crs="EPSG:"+SpatialRef)
    MA = gpd.read_file(gis_path + r"managed_area_boundary/ORCP_Managed_Areas_Oct2021.shp")
    boundary = MA[MA['MA_Name']==dictArea2[Area]].to_crs(int(SpatialRef))
    boundary.to_file(boundary_shp , driver='ESRI Shapefile',crs="EPSG:"+SpatialRef)
    extent = str(boundary.geometry.total_bounds).replace('[','').replace(']','')

    if type(covariates) == str:
        in_explanatory_rasters = gis_path + "covariates/{}/{}.tif".format(covariates, dictArea[Area])
    elif type(covariates) == list:
        in_explanatory_rasters = []
        for i in range(len(covariates)):
            in_explanatory_raster = str(gis_path + "covariates/{}/{}.tif".format(covariates[i], dictArea[Area]))
            in_explanatory_rasters.append(in_explanatory_raster)

    in_features = input_pt
    out_raster = gis_path +"output_raster/{}/{}{}_{}.tif".format(*out_raster)
    value_field = "ResultValu"
    out_ga_layer = gis_path +"ga_layer/{}/{}{}_{}.lyrx".format(*out_ga_layer)
    ga_to_raster = gis_path + 'standard_error_prediction/{}/{}{}_{}.tif'.format(*predict_std_err)
    in_explanatory_rasters = in_explanatory_rasters
    mask = gis_path+ '{}.shp'.format(dictArea3[Area])
    
    try:
        Result,Stat = misc.interpolation.interpolation(
                        method = method, input_point = in_features, out_raster = out_raster, 
                        z_field = value_field, out_ga_layer = out_ga_layer, extent = extent, 
                        mask = mask, ga_to_raster = ga_to_raster, in_explanatory_rasters = in_explanatory_rasters)
        return out_raster,out_ga_layer,ga_to_raster
    except Exception:
        e = sys.exc_info()[1]
        print(Para + " in " + str(Year) + " " + Season + " caused an error:")
        print(e.args[0])
        return np.nan,np.nan,np.nan

In [7]:
# interpolation_auto(method = "rk",
#                    dataframe = dfAll,
#                    managed_area = "Big Bend Seagrasses Aquatic Preserve",
#                    Year = "2015",
#                    Season = "Summer",
#                    start_date = "2015-06-14",
#                    end_date = "2015-08-30",
#                    parameter = "Dissolved Oxygen",
#                    covariates = ["bathymetry","LDI"],
#                    out_raster = "D:/Water_Quailty/GIS_data/output_raster/Big Bend/Test.tif",
#                    out_ga_layer = "D:/Water_Quailty/GIS_data/ga_layer/Big Bend/Test.lyrx",
#                    predict_std_err = "D:/Water_Quailty/GIS_data/standard_error_prediction/Big Bend/Test.tif"
#                   )

  gdf.to_file(input_pt,driver='ESRI Shapefile',crs="EPSG:"+SpatialRef)


Start the interpolation with the RK method


NameError: name 'ExecuteError' is not defined

In [92]:
dfSeason = pd.read_csv(path + "OEATUSF_Geospatial_TempSeasons_update.csv")
dfSeason.dropna(subset=["s_start","s_end"])

Unnamed: 0.1,Unnamed: 0,param,ma,st_Year,season,med_seamo_ma,s_start,s_end,covariates
0,145,temp,Estero Bay Aquatic Preserve,2022,Winter,25-Dec,12/25/2022,2/26/2022,bathymetry+LDI+popden
3,147,temp,Estero Bay Aquatic Preserve,2022,Summer,7-Jun,6/7/2022,8/26/2022,bathymetry+LDI+popden
4,71,temp,Biscayne Bay Aquatic Preserve,2022,Summer,1-Jun,6/1/2022,8/26/2022,bathymetry+LDI+popden
5,70,temp,Biscayne Bay Aquatic Preserve,2022,Spring,2-Mar,3/2/2022,5/31/2022,bathymetry+LDI+popden
6,146,temp,Estero Bay Aquatic Preserve,2022,Spring,27-Feb,2/27/2022,6/6/2022,bathymetry+LDI+popden
...,...,...,...,...,...,...,...,...,...
295,153,temp,Guana Tolomato Matanzas National Estuarine Res...,2002,Winter,11-Feb,2/11/2002,2/22/2002,LDI
296,152,temp,Guana Tolomato Matanzas National Estuarine Res...,2001,Fall,21-Aug,8/21/2001,2/10/2002,LDI
297,151,temp,Guana Tolomato Matanzas National Estuarine Res...,2001,Summer,24-Jun,6/24/2001,8/20/2001,LDI
298,150,temp,Guana Tolomato Matanzas National Estuarine Res...,2001,Spring,23-Feb,2/23/2001,6/23/2001,LDI


In [93]:
# Print all managed areas
display(dfSeason['ma'].unique())

# Select the first 3 managed areas
data = data.assign(**{'Total Nitrogen': np.nan, 'Dissolved Oxygen': np.nan, 'Salinity': np.nan, 'Secchi Depth': np.nan, 'Turbidity': np.nan})

array(['Estero Bay Aquatic Preserve', 'Biscayne Bay Aquatic Preserve',
       'Gasparilla Sound-Charlotte Harbor Aquatic Preserve',
       'Guana Tolomato Matanzas National Estuarine Research Reserve',
       'Big Bend Seagrasses Aquatic Preserve'], dtype=object)

Run through the seasonality table

In [None]:
for i in data.index:
    for para in listPara[0:3]:
        name = [dictArea[data.iloc[i]["ma"]],data.iloc[i]["st_Year"],data.iloc[i]["season"],dictPara[para]]
        out_raster,out_ga_layer,ga_to_raster = interpolation_auto(method = "rk",
                           dataframe = dfAll,
                           managed_area = data.iloc[i]["ma"],
                           Year = data.iloc[i]["st_Year"],
                           Season = data.iloc[i]["season"],
                           start_date = data.iloc[i]["s_start"],
                           end_date = data.iloc[i]["s_end"],
                           parameter = para,
                           covariates = data.iloc[i]["covariates"].split("+"),
                           out_raster = name,
                           out_ga_layer = name,
                           predict_std_err = name)
        #data.iloc[i]['raster'],data.iloc[i]['GA_layer'],data.iloc[i]['StErrPred'] = out_raster,out_ga_layer,ga_to_raster
        data.loc[i, para] = out_raster
        data.to_csv(path + 'output.csv')
        display('Interpolated row:{}, {}'.format(i,name))

  gdf.to_file(input_pt,driver='ESRI Shapefile',crs="EPSG:"+SpatialRef)


Start the interpolation with the RK method
Salinity in 2022 Winter caused an error:
Failed to execute. Parameters are not valid.
ERROR 000366: Invalid geometry type
Failed to execute (EBKRegressionPrediction).



"Interpolated row:0, ['Estero Bay', 2022, 'Winter', 'S']"

Start the interpolation with the RK method
Total Nitrogen in 2022 Winter caused an error:
Failed to execute. Parameters are not valid.
ERROR 000366: Invalid geometry type
Failed to execute (EBKRegressionPrediction).



"Interpolated row:0, ['Estero Bay', 2022, 'Winter', 'TN']"

Start the interpolation with the RK method
Dissolved Oxygen in 2022 Winter caused an error:
Failed to execute. Parameters are not valid.
ERROR 000366: Invalid geometry type
Failed to execute (EBKRegressionPrediction).

