# RK Interpolation

This document includes Python codes that conduct Regression Kriging (RK) Interpolation on each waterbody, including Guana Tolomato Matanzas (GTM), Estero Bay (EB), Charlotte Harbor (CH), Biscayne Bay (BB), Big Bend Seagrasses (BBS).
parameters, including Dissolved oxygen (DO_mgl), Salinity (Sal_ppt), Turbidity (Turb_ntu), Temperature (T_c), Secchi (Secc_m), Total Nitrogen (TN_mgl) in arcpy environment.

The analysis is conducted in the separate managed parameters of Total Nitrogen (TN_mgl), Dissolved oxygen (DO_mgl), Salinity (Sal_ppt), Turbidity (Turb_ntu), Temperature (T_c), and Secchi (Secc_m) in arcpy environment.

* [1. Data Preprocess](#preprocessing)
* [2. Generate Shapefiles](#create_shp)
* [3. Regression Kriging for All Stations](#rk_all)
* [4. Regression Kriging for Continuous Stations](#rk_con)

In [1]:
import pandas as pd
import numpy  as np
import arcpy
from arcpy.sa import *
import os, time, math, importlib, sys
path = r'../git/misc'
sys.path.insert(0, path)
import RK
import IDW
# !install conda install conda-forge::pyproj
import pyproj,csv

importlib.reload(RK)
importlib.reload(IDW)

import warnings
warnings.filterwarnings('ignore')

# 1. Preprocessing <a class="anchor" id="preprocessing"></a>

## If you are the first time to run this code, you could run the following cell. 
## If you have already generated the combining csv, you could skip this cell and just run the next cell.

In [2]:
gis_path = r'E:/Projects/SEACAR_WQ_2024/GIS_Data/'

dfDis = pd.read_csv(gis_path + 'OEAT_Discrete_WQ-2024-Jan-16.csv', low_memory=False)
dfCon = pd.read_csv(gis_path + 'OEAT_Continuous_WQ-2024-Jan-16.csv', low_memory=False)

dfAll = pd.concat([dfDis, dfCon], ignore_index=True)

### Include the time period from 9 am to 17 pm in a day

In [3]:
# Convert string to datetime
dfAll['SampleDate'] = pd.to_datetime(dfAll['SampleDate'], format='%b %d %Y %I:%M%p')

# Include date from 9:00 am to 17:00 pm
start_time = '09:00'
end_time = '17:00'

dfAllTime = dfAll[dfAll['SampleDate'].dt.time.between(pd.to_datetime(start_time).time(), pd.to_datetime(end_time).time())]
dfAllTime.head()

Unnamed: 0,RowID,ProgramID,ParameterName,ParameterUnits,ProgramLocationID,ActivityType,SampleDate,Year,Month,RelativeDepth,ResultValue,Latitude_DD,Longitude_DD,ManagedAreaName,AreaID,SEACAR_QAQCFlagCode,WaterBody,WbodyAcronym,Season
0,5062527,5014,Salinity,ppt,GTMMKNUT,Field,2017-08-03 11:24:00,2017,8,Surface,0.34,30.160736,-81.360278,Guana Tolomato Matanzas National Estuarine Res...,20,6Q,Guana Tolomato Matanzas,GTM,Summer
1,5062528,5014,Salinity,ppt,GTMMKNUT,Field,2017-09-20 09:06:00,2017,9,Surface,0.32,30.160736,-81.360278,Guana Tolomato Matanzas National Estuarine Res...,20,6Q,Guana Tolomato Matanzas,GTM,Fall
2,5062529,5014,Secchi Depth,m,GTMMKNUT,Field,2017-11-02 13:11:00,2017,11,Surface,1.2,30.160736,-81.360278,Guana Tolomato Matanzas National Estuarine Res...,20,6Q,Guana Tolomato Matanzas,GTM,Fall
3,5062606,5014,Salinity,ppt,GTMMKNUT,Field,2017-10-18 12:52:00,2017,10,Surface,0.34,30.160736,-81.360278,Guana Tolomato Matanzas National Estuarine Res...,20,6Q,Guana Tolomato Matanzas,GTM,Fall
4,5062607,5014,Salinity,ppt,GTMMKNUT,Field,2018-04-24 10:56:00,2018,4,Surface,0.33,30.160736,-81.360278,Guana Tolomato Matanzas National Estuarine Res...,20,6Q,Guana Tolomato Matanzas,GTM,Spring


In [4]:
area_shortnames = {
    'Guana Tolomato Matanzas': 'GTM',
    'Estero Bay': 'EB',
    'Charlotte Harbor': 'CH',
    'Biscayne Bay': 'BB',
    'Big Bend Seagrasses':'BBS'
}

param_shortnames = {
    'Salinity': 'Sal_ppt',
    'Total Nitrogen': 'TN_mgl',
    'Dissolved Oxygen': 'DO_mgl',
    'Turbidity':'Turb_ntu',
    'Secchi Depth':'Secc_m',
    'Water Temperature':'T_c'
}

# Set input parameters
waterbody_names = [
    'Guana Tolomato Matanzas',
    'Estero Bay',
    'Charlotte Harbor',
    'Biscayne Bay',
    'Big Bend Seagrasses'
]

covariates_dict = {
    "GTM":"LDI",
    "EB":"bathymetry+LDI+popden",
    "CH":"bathymetry+LDI+popden+water_flow_wet",
    "BB":"bathymetry+LDI+popden",
    "BBS":"bathymetry+LDI"
}

parameter_names = ['Dissolved Oxygen', 'Salinity', 'Secchi Depth', 'Total Nitrogen', 'Turbidity', 'Water Temperature']
# years = unique_years
seasons = ['Fall', 'Spring', 'Summer', 'Winter']
shp_folder = gis_path + r"shapefiles"

# 2. Generate Shapefiles<a class="anchor" id="create_shp"></a>
## First, you need to aggregate the repeatly observation data into one value in the time period.

In [5]:
dfAll_Mean = dfAllTime.groupby(['WaterBody','ParameterName','ParameterUnits', 'Year','Season','Latitude_DD','Longitude_DD','WbodyAcronym'])["ResultValue"].agg("mean").reset_index()

# Save aggregated data to a csv file
dfAll = dfAll_Mean

### Convert coordinate system to EPSG: 3086

In [6]:
# Define the EPSG codes for source (EPSG:4326) and target (EPSG:3086) coordinate systems
source_epsg = 'EPSG:4326'
target_epsg = 'EPSG:3086'

# Create a PyProj Transformer for the conversion
transformer = pyproj.Transformer.from_crs(source_epsg, target_epsg, always_xy=True)

# Define a function to apply the transformation to each row of the DataFrame
def transform_coordinates(row):
    x, y = transformer.transform(row['Longitude_DD'], row['Latitude_DD'])
    return pd.Series({'x': x, 'y': y})

# Apply the transformation function to the DataFrame and create new columns for the converted coordinates
dfAll[['x', 'y']] = dfAll.apply(transform_coordinates, axis=1)

In [7]:
dfAll.to_csv(gis_path + 'OEAT_All_WQ-2024-Jan-16.csv', index=False)

## Generate the shapefiles of input points for each waterbody, parameter, year, and season

#### Fill NaN RowID with unique ID, IDW function needs unique ID

In [8]:
IDW.fill_nan_rowids(dfAll, 'RowID')

# Keep RowID as integer
dfAll['RowID'] = dfAll['RowID'].astype(int)
dfAll

Unnamed: 0,WaterBody,ParameterName,ParameterUnits,Year,Season,Latitude_DD,Longitude_DD,WbodyAcronym,ResultValue,x,y,RowID
0,Big Bend Seagrasses,Dissolved Oxygen,mg/L,2015,Fall,29.287817,-83.166083,BBS,5.849359,480883.595102,587084.408007,1
1,Big Bend Seagrasses,Dissolved Oxygen,mg/L,2015,Fall,29.813933,-83.628917,BBS,6.660736,435816.660743,645279.231505,2
2,Big Bend Seagrasses,Dissolved Oxygen,mg/L,2015,Spring,29.101817,-83.076467,BBS,7.408284,489730.191291,566492.917816,3
3,Big Bend Seagrasses,Dissolved Oxygen,mg/L,2015,Spring,29.287817,-83.166083,BBS,6.454961,480883.595102,587084.408007,4
4,Big Bend Seagrasses,Dissolved Oxygen,mg/L,2015,Spring,29.813933,-83.628917,BBS,7.590892,435816.660743,645279.231505,5
...,...,...,...,...,...,...,...,...,...,...,...,...
24379,Guana Tolomato Matanzas,Water Temperature,Degrees C,2023,Summer,29.906941,-81.298931,GTM,28.100000,660458.612741,658403.260621,24380
24380,Guana Tolomato Matanzas,Water Temperature,Degrees C,2023,Summer,30.025360,-81.370918,GTM,29.150000,653237.855737,671395.778870,24381
24381,Guana Tolomato Matanzas,Water Temperature,Degrees C,2023,Summer,30.026440,-81.369403,GTM,29.500000,653381.222140,671518.795530,24382
24382,Guana Tolomato Matanzas,Water Temperature,Degrees C,2023,Summer,30.033611,-81.353027,GTM,29.766667,654941.244725,672348.384187,24383


In [9]:
# Read the season table
seasons_all = pd.read_csv(gis_path + 'Seasons_all.csv', low_memory=False)

# Merge interested with latitude and longitude columns
seasons_all_coord = IDW.merge_with_lat_long(seasons_all, dfAll)
seasons_all_coord

Unnamed: 0,WaterBody,Year,Season,Parameter,Filename,NumDataPoints,RMSE,ME,x,y,RowID,ResultValue
0,Guana Tolomato Matanzas,2015,Fall,Total Nitrogen,,0,,,,,,
1,Guana Tolomato Matanzas,2015,Winter,Total Nitrogen,,0,,,,,,
2,Guana Tolomato Matanzas,2016,Spring,Total Nitrogen,,0,,,,,,
3,Guana Tolomato Matanzas,2016,Summer,Total Nitrogen,,0,,,,,,
4,Guana Tolomato Matanzas,2016,Fall,Total Nitrogen,,0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
4682,Big Bend Seagrasses,2022,Spring,Water Temperature,E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_out...,27,-1.797693e+308,-1.797693e+308,374832.258094,689623.109934,4393,20.533333
4683,Big Bend Seagrasses,2022,Spring,Water Temperature,E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_out...,27,-1.797693e+308,-1.797693e+308,371015.252225,692080.963200,4394,20.740000
4684,Big Bend Seagrasses,2022,Spring,Water Temperature,E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_out...,27,-1.797693e+308,-1.797693e+308,401894.709931,699334.789795,4395,19.500000
4685,Big Bend Seagrasses,2022,Spring,Water Temperature,E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_out...,27,-1.797693e+308,-1.797693e+308,401457.471573,702258.561096,4396,20.500000


### Create CSV files

In [10]:
# Clean the shapefile folder if necessary
# RK.delete_all_files(shp_folder)
# Print number of data points in each shapefile
IDW.create_shp_season(seasons_all_coord, shp_folder)

Number of data rows for BBS, DO_mgl, 2020, Fall: 26
Shapefile for BBS: DO_mgl for year 2020 and season Fall has been saved as SHP_BBS_DO_mgl_2020_Fall.shp
Number of data rows for BBS, Sal_ppt, 2020, Fall: 21
Shapefile for BBS: Sal_ppt for year 2020 and season Fall has been saved as SHP_BBS_Sal_ppt_2020_Fall.shp
Number of data rows for BBS, Secc_m, 2020, Fall: 27
Shapefile for BBS: Secc_m for year 2020 and season Fall has been saved as SHP_BBS_Secc_m_2020_Fall.shp
Number of data rows for BBS, TN_mgl, 2020, Fall: 23
Shapefile for BBS: TN_mgl for year 2020 and season Fall has been saved as SHP_BBS_TN_mgl_2020_Fall.shp
Number of data rows for BBS, Turb_ntu, 2020, Fall: 26
Shapefile for BBS: Turb_ntu for year 2020 and season Fall has been saved as SHP_BBS_Turb_ntu_2020_Fall.shp
Number of data rows for BBS, T_c, 2020, Fall: 26
Shapefile for BBS: T_c for year 2020 and season Fall has been saved as SHP_BBS_T_c_2020_Fall.shp
Number of data rows for BBS, DO_mgl, 2020, Summer: 26
Shapefile for BB

Shapefile for BB: Turb_ntu for year 2021 and season Fall has been saved as SHP_BB_Turb_ntu_2021_Fall.shp
Number of data rows for BB, T_c, 2021, Fall: 83
Shapefile for BB: T_c for year 2021 and season Fall has been saved as SHP_BB_T_c_2021_Fall.shp
Number of data rows for BB, DO_mgl, 2021, Summer: 83
Shapefile for BB: DO_mgl for year 2021 and season Summer has been saved as SHP_BB_DO_mgl_2021_Summer.shp
Number of data rows for BB, Sal_ppt, 2021, Summer: 61
Shapefile for BB: Sal_ppt for year 2021 and season Summer has been saved as SHP_BB_Sal_ppt_2021_Summer.shp
Number of data rows for BB, Secc_m, 2021, Summer: 1
Shapefile for BB: Secc_m for year 2021 and season Summer has been saved as SHP_BB_Secc_m_2021_Summer.shp
Number of data rows for BB, TN_mgl, 2021, Summer: 76
Shapefile for BB: TN_mgl for year 2021 and season Summer has been saved as SHP_BB_TN_mgl_2021_Summer.shp
Number of data rows for BB, Turb_ntu, 2021, Summer: 61
Shapefile for BB: Turb_ntu for year 2021 and season Summer has 

Shapefile for CH: Sal_ppt for year 2016 and season Winter has been saved as SHP_CH_Sal_ppt_2016_Winter.shp
Number of data rows for CH, Secc_m, 2016, Winter: 8
Shapefile for CH: Secc_m for year 2016 and season Winter has been saved as SHP_CH_Secc_m_2016_Winter.shp
Number of data rows for CH, TN_mgl, 2016, Winter: 8
Shapefile for CH: TN_mgl for year 2016 and season Winter has been saved as SHP_CH_TN_mgl_2016_Winter.shp
Number of data rows for CH, Turb_ntu, 2016, Winter: 11
Shapefile for CH: Turb_ntu for year 2016 and season Winter has been saved as SHP_CH_Turb_ntu_2016_Winter.shp
Number of data rows for CH, T_c, 2016, Winter: 11
Shapefile for CH: T_c for year 2016 and season Winter has been saved as SHP_CH_T_c_2016_Winter.shp
Number of data rows for CH, DO_mgl, 2017, Fall: 3
Shapefile for CH: DO_mgl for year 2017 and season Fall has been saved as SHP_CH_DO_mgl_2017_Fall.shp
Number of data rows for CH, Sal_ppt, 2017, Fall: 3
Shapefile for CH: Sal_ppt for year 2017 and season Fall has been

Shapefile for EB: Turb_ntu for year 2017 and season Spring has been saved as SHP_EB_Turb_ntu_2017_Spring.shp
Number of data rows for EB, T_c, 2017, Spring: 3
Shapefile for EB: T_c for year 2017 and season Spring has been saved as SHP_EB_T_c_2017_Spring.shp
Number of data rows for EB, DO_mgl, 2017, Summer: 3
Shapefile for EB: DO_mgl for year 2017 and season Summer has been saved as SHP_EB_DO_mgl_2017_Summer.shp
Number of data rows for EB, Sal_ppt, 2017, Summer: 3
Shapefile for EB: Sal_ppt for year 2017 and season Summer has been saved as SHP_EB_Sal_ppt_2017_Summer.shp
No valid data found for area: EB, parameter: Secc_m, year: 2017, and season: Summer
No valid data found for area: EB, parameter: TN_mgl, year: 2017, and season: Summer
Number of data rows for EB, Turb_ntu, 2017, Summer: 3
Shapefile for EB: Turb_ntu for year 2017 and season Summer has been saved as SHP_EB_Turb_ntu_2017_Summer.shp
Number of data rows for EB, T_c, 2017, Summer: 3
Shapefile for EB: T_c for year 2017 and season

Shapefile for GTM: T_c for year 2017 and season Spring has been saved as SHP_GTM_T_c_2017_Spring.shp
Number of data rows for GTM, DO_mgl, 2017, Summer: 15
Shapefile for GTM: DO_mgl for year 2017 and season Summer has been saved as SHP_GTM_DO_mgl_2017_Summer.shp
Number of data rows for GTM, Sal_ppt, 2017, Summer: 16
Shapefile for GTM: Sal_ppt for year 2017 and season Summer has been saved as SHP_GTM_Sal_ppt_2017_Summer.shp
Number of data rows for GTM, Secc_m, 2017, Summer: 9
Shapefile for GTM: Secc_m for year 2017 and season Summer has been saved as SHP_GTM_Secc_m_2017_Summer.shp
Number of data rows for GTM, TN_mgl, 2017, Summer: 13
Shapefile for GTM: TN_mgl for year 2017 and season Summer has been saved as SHP_GTM_TN_mgl_2017_Summer.shp
Number of data rows for GTM, Turb_ntu, 2017, Summer: 4
Shapefile for GTM: Turb_ntu for year 2017 and season Summer has been saved as SHP_GTM_Turb_ntu_2017_Summer.shp
Number of data rows for GTM, T_c, 2017, Summer: 17
Shapefile for GTM: T_c for year 2017


# 3. Regression Kriging for both continuous and discrete data<a class="anchor" id="rk_all"></a>

## Loop for all parameters


### Clean the output folder

In [11]:
out_raster_floder = gis_path + "raster_output_rk/"
out_ga_folder     = gis_path + "ga_output_rk/"
diagnostic_folder = gis_path + "diagnostic_rk/"
RK.delete_all_files(out_raster_floder)
RK.delete_all_files(out_ga_folder)
RK.delete_all_files(diagnostic_folder)

In [16]:
seasons_all.iloc[1]

WaterBody        Guana Tolomato Matanzas
Year                                2015
Season                            Winter
Parameter                 Total Nitrogen
Filename                             NaN
NumDataPoints                          0
RMSE                                 NaN
ME                                   NaN
Name: 1, dtype: object

In [12]:
# Write the output in a csv file
with open(gis_path+"result_RK_all.csv", 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    # Write the header line
    cols = list(seasons_all.columns)
    cols.append('covariates')
    csv_writer.writerow(cols)
    
    for i in seasons_all.index:
    #for i in range(42,48):
        s_time =time.time() 
        process,rmse,me,count,file_loc = RK.rk_interpolation(method = "rk",
                                           folder_path = gis_path,
                                           waterbody = area_shortnames[seasons_all.iloc[i]["WaterBody"]],
                                           parameter = param_shortnames[seasons_all.iloc[i]["Parameter"]],
                                           year      = seasons_all.iloc[i]["Year"],
                                           season    = seasons_all.iloc[i]['Season'],
                                           covariates= covariates_dict[area_shortnames[seasons_all.iloc[i]["WaterBody"]]],
                                           out_raster_folder = out_raster_floder,
                                           out_ga_folder     = out_ga_folder,
                                           diagnostic_folder = diagnostic_folder)
        e_time =time.time()

        print(f"{int(e_time-s_time)} seconds elapsed for processing {count} points in {i}th row: RMSE: {rmse}, ME: {me}, file exported to {file_loc}")
        csv_writer.writerow([seasons_all.iloc[i]["WaterBody"], 
                             seasons_all.iloc[i]["Year"],
                             seasons_all.iloc[i]['Season'],
                             param_shortnames[seasons_all.iloc[i]["Parameter"]],
                             file_loc, count, rmse, me,
                             covariates_dict[area_shortnames[seasons_all.iloc[i]["WaterBody"]]]])
        if i%10 == 0: csvfile.flush() # flush the csv file in every 20 rows.
#         seasons_all['RMSE'][i:i+1] = rmse
#         seasons_all['ME'][i:i+1] = me
#         seasons_all['NumDataPoints'][i:i+1] = count
#         seasons_all['Filename'][i:i+1] = file_loc
#     seasons_all.to_csv(gis_path+"result_RK_all.csv")

No data for RK interpolation in SHP_GTM_TN_mgl_2015_Fall.shp, skipping
0 seconds elapsed for processing 0 points in 0th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_GTM_TN_mgl_2015_Winter.shp, skipping
0 seconds elapsed for processing 0 points in 1th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_GTM_TN_mgl_2016_Spring.shp, skipping
0 seconds elapsed for processing 0 points in 2th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_GTM_TN_mgl_2016_Summer.shp, skipping
0 seconds elapsed for processing 0 points in 3th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_GTM_TN_mgl_2016_Fall.shp, skipping
0 seconds elapsed for processing 0 points in 4th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_GTM_TN_mgl_2016_Winter.shp, skipping
0 seconds elapsed for processing 0 points in 5th row: RMSE: nan, ME: nan, file exported to 

Processing file: SHP_BBS_TN_mgl_2021_Fall.shp
--- Time lapse: 294.26614809036255 seconds ---
294 seconds elapsed for processing 33 points in 37th row: RMSE: -1.79769313486e+308, ME: -1.79769313486e+308, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/BBS_TN_mgl_2021_Fall_RK.tif
Processing file: SHP_BBS_TN_mgl_2021_Winter.shp
--- Time lapse: 323.93779587745667 seconds ---
324 seconds elapsed for processing 31 points in 38th row: RMSE: -1.79769313486e+308, ME: -1.79769313486e+308, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/BBS_TN_mgl_2021_Winter_RK.tif
Processing file: SHP_BBS_TN_mgl_2022_Spring.shp
--- Time lapse: 156.80019235610962 seconds ---
156 seconds elapsed for processing 25 points in 39th row: RMSE: -1.79769313486e+308, ME: -1.79769313486e+308, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/BBS_TN_mgl_2022_Spring_RK.tif
Sal_ppt in 2015 Fall caused an error:
ERROR 040039: Not enough data to compute method.
Fa

Processing file: SHP_BB_Sal_ppt_2022_Winter.shp
--- Time lapse: 134.27708220481873 seconds ---
134 seconds elapsed for processing 50 points in 70th row: RMSE: 3.72300308158, ME: 0.224682961631, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/BB_Sal_ppt_2022_Winter_RK.tif
Processing file: SHP_BB_Sal_ppt_2023_Spring.shp
--- Time lapse: 79.7324607372284 seconds ---
79 seconds elapsed for processing 48 points in 71th row: RMSE: 3.34391301569, ME: 0.248323914762, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/BB_Sal_ppt_2023_Spring_RK.tif
Processing file: SHP_BBS_Sal_ppt_2020_Summer.shp
--- Time lapse: 158.536390542984 seconds ---
158 seconds elapsed for processing 20 points in 72th row: RMSE: -1.79769313486e+308, ME: -1.79769313486e+308, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/BBS_Sal_ppt_2020_Summer_RK.tif
Processing file: SHP_BBS_Sal_ppt_2020_Fall.shp
--- Time lapse: 158.99813532829285 seconds ---
159 seconds elap

DO_mgl in 2017 Winter caused an error:
ERROR 040039: Not enough data to compute method.
Failed to execute (EBKRegressionPrediction).

4 seconds elapsed for processing 3 points in 102th row: RMSE: nan, ME: nan, file exported to nan
Processing file: SHP_CH_DO_mgl_2018_Spring.shp
--- Time lapse: 338.3436236381531 seconds ---
338 seconds elapsed for processing 36 points in 103th row: RMSE: 0.698904322061, ME: -0.0640475843972, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/CH_DO_mgl_2018_Spring_RK.tif
Processing file: SHP_BB_DO_mgl_2021_Summer.shp
--- Time lapse: 209.3016073703766 seconds ---
209 seconds elapsed for processing 83 points in 104th row: RMSE: 1.08321746126, ME: 0.000895091565736, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/BB_DO_mgl_2021_Summer_RK.tif
Processing file: SHP_BB_DO_mgl_2021_Fall.shp
--- Time lapse: 223.28926277160645 seconds ---
223 seconds elapsed for processing 83 points in 105th row: RMSE: 0.815418489492, ME: -0

Turb_ntu in 2017 Fall caused an error:
ERROR 040039: Not enough data to compute method.
Failed to execute (EBKRegressionPrediction).

0 seconds elapsed for processing 3 points in 133th row: RMSE: nan, ME: nan, file exported to nan
Turb_ntu in 2017 Winter caused an error:
ERROR 040039: Not enough data to compute method.
Failed to execute (EBKRegressionPrediction).

0 seconds elapsed for processing 3 points in 134th row: RMSE: nan, ME: nan, file exported to nan
Processing file: SHP_EB_Turb_ntu_2018_Spring.shp
--- Time lapse: 39.64551115036011 seconds ---
39 seconds elapsed for processing 30 points in 135th row: RMSE: 1.49053900674, ME: 0.0388944796704, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/EB_Turb_ntu_2018_Spring_RK.tif
Turb_ntu in 2016 Summer caused an error:
ERROR 040039: Not enough data to compute method.
Failed to execute (EBKRegressionPrediction).

4 seconds elapsed for processing 3 points in 136th row: RMSE: nan, ME: nan, file exported to nan
Turb_nt

Secc_m in 2017 Summer caused an error:
ERROR 040039: Not enough data to compute method.
Failed to execute (EBKRegressionPrediction).

1 seconds elapsed for processing 9 points in 167th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_EB_Secc_m_2016_Summer.shp, skipping
0 seconds elapsed for processing 0 points in 168th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_EB_Secc_m_2016_Fall.shp, skipping
0 seconds elapsed for processing 0 points in 169th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_EB_Secc_m_2016_Winter.shp, skipping
0 seconds elapsed for processing 0 points in 170th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_EB_Secc_m_2017_Spring.shp, skipping
0 seconds elapsed for processing 0 points in 171th row: RMSE: nan, ME: nan, file exported to nan
No data for RK interpolation in SHP_EB_Secc_m_2017_Summer.shp, skipping
0 seconds elapsed for p

T_c in 2017 Spring caused an error:
ERROR 040039: Not enough data to compute method.
Failed to execute (EBKRegressionPrediction).

1 seconds elapsed for processing 8 points in 206th row: RMSE: nan, ME: nan, file exported to nan
Processing file: SHP_GTM_T_c_2017_Summer.shp
--- Time lapse: 36.9335355758667 seconds ---
37 seconds elapsed for processing 17 points in 207th row: RMSE: 0.384421323149, ME: -0.0641628659705, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/GTM_T_c_2017_Summer_RK.tif
T_c in 2016 Summer caused an error:
ERROR 040039: Not enough data to compute method.
Failed to execute (EBKRegressionPrediction).

0 seconds elapsed for processing 3 points in 208th row: RMSE: nan, ME: nan, file exported to nan
T_c in 2016 Fall caused an error:
ERROR 040039: Not enough data to compute method.
Failed to execute (EBKRegressionPrediction).

0 seconds elapsed for processing 3 points in 209th row: RMSE: nan, ME: nan, file exported to nan
T_c in 2016 Winter caused an 

Processing file: SHP_BBS_T_c_2021_Winter.shp
--- Time lapse: 300.27372097969055 seconds ---
300 seconds elapsed for processing 39 points in 238th row: RMSE: -1.79769313486e+308, ME: -1.79769313486e+308, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/BBS_T_c_2021_Winter_RK.tif
Processing file: SHP_BBS_T_c_2022_Spring.shp
--- Time lapse: 192.1791341304779 seconds ---
192 seconds elapsed for processing 27 points in 239th row: RMSE: -1.79769313486e+308, ME: -1.79769313486e+308, file exported to E:/Projects/SEACAR_WQ_2024/GIS_Data/raster_output_rk/BBS_T_c_2022_Spring_RK.tif


# 4. Regression Kriging for Continuous Stations<a class="anchor" id="rk_con"></a>


In [None]:
dfCon_orig = pd.read_csv(gis_path + "OEAT_Continuous_WQ-2024-Jan-16.csv", low_memory=False)

In [None]:
dfMean_con = dfCon_orig.groupby(['WaterBody','ParameterName','Year','Season','Latitude_DD','Longitude_DD'])["ResultValue"].agg("mean").reset_index()    

In [None]:
con_shp_folder = gis_path + r"shapefiles_con"

In [None]:
RK.delete_all_files(con_shp_folder)

In [None]:
RK.create_shp_season(dfMean_con, waterbody_names, parameter_names, years, seasons, con_shp_folder)

In [None]:
season_con = pd.read_csv(gis_path+"Seasons_con.csv")

In [None]:
season_con

In [None]:
# If you would like to partially generate results in some rows, you could use range(a,b):
# If you would like to generate all rows automated, you could use season_result.index:

# for i in season_con.index:
for i in range(0,8):
    process,rmse,me,count,file_loc = RK.rk_interpolation(method = "rk",
                                       folder_path = gis_path,
                                       waterbody = area_shortnames[season_con.iloc[i]["WaterBody"]],
                                       parameter = param_shortnames[season_con.iloc[i]["Parameter"]],
                                       year      = season_con.iloc[i]["Year"],
                                       season    = season_con.iloc[i]['Season'],
                                       covariates= covariates_dict[area_shortnames[season_con.iloc[i]["WaterBody"]]],
                                       out_raster_folder = gis_path + "raster_output_con/",
                                       out_ga_folder     = gis_path + "ga_output_con/",
                                       diagnostic_folder = gis_path + "diagnostic_output_con/")
    print(process,rmse,me,count,file_loc)
    season_con['RMSE'][i:i+1] = rmse
    season_con['ME'][i:i+1] = me
    season_con['NumDataPoints'][i:i+1] = count
    season_con['Filename'][i:i+1] = file_loc
season_con.to_csv(gis_path+"Seasons_con.csv")