# Failure Analysis
The below error analysis is performed using the absolute error in groundwater depth (measured in feet) 2021 predictions.

The test set is for YEAR = 2020
The last year in the train set is 2019 and it contains CURRENT_DEPTH

In [1]:
import sys
sys.path.append('..')

In [2]:
import numpy as np
import pandas as pd
import altair as alt

from lib.viz import  view_trs_side_by_side
from lib.township_range import TownshipRanges
from lib.supervised_tuning import get_model_errors, read_target_shifted_data, final_comparison_sorted
from lib.read_data import read_and_join_output_file
from lib.viz import draw_model_error_distribution, chart_model_error_by_township, chart_model_error_by_depth, chart_model_error_by_depth, chart_model_depth_diff_error

## Load the data
The data are loaded form the `..assets/predictions/` folder which contains
* the 2021 predictions of the best machine learning models (based on 2020 data)
* the 2021 predictions of the deeplearning LSTM model (based on 2014-2020 data)
* the models error measures (MAE, MSE, RMSE, etc.

Then we compute the absolute error for all the models predictions

In [3]:
# Loading models' predictions
models_predictions_df = pd.read_csv("../assets/predictions/ml_predictions.csv")
lstm_predictions_df = pd.read_csv("../assets/predictions/lstm_predictions.csv")
lstm_predictions_df.drop(columns=["2021_GSE_GWE"], inplace=True)
models_predictions_df = models_predictions_df.merge(lstm_predictions_df, how="inner", left_on=["TOWNSHIP_RANGE"], right_on=["TOWNSHIP_RANGE"])
models_predictions_df.set_index(keys=["TOWNSHIP_RANGE"], inplace=True)

# Loading models' errors
models_error_metrics_df = pd.read_csv("../assets/predictions/ml_models_errors.csv")
lstm_model_error_metrics_df = pd.read_csv("../assets/predictions/lstm_model_errors.csv")
# The LSTM model doesn't have an R^2 error so we add an empty colum
lstm_model_error_metrics_df["R^2"] = ""
models_error_metrics_df = pd.concat([models_error_metrics_df, lstm_model_error_metrics_df], axis=0, ignore_index=True)
models_error_metrics_df.set_index(keys=["MODEL"], inplace=True)

# Computing the error
models_errors_df = models_predictions_df.copy()
model_names = list(models_errors_df.columns)
model_names.remove("2021_GSE_GWE")
models_errors_df = models_errors_df[["2021_GSE_GWE"]].merge(models_errors_df[model_names].sub(models_errors_df["2021_GSE_GWE"], axis=0).abs().add_suffix("_ERROR"), how="inner", left_index=True, right_index=True)
models_errors_df.reset_index(drop=False, inplace=True)

## Results Overview
### Comparing the 2021 Predictions

In [4]:
models_predictions_df.head(15)

Unnamed: 0_level_0,2021_GSE_GWE,XGBRegressor,SVR,KNeighborsRegressor,GradientBoostingRegressor,CatBoostRegressor,RandomForestRegressor,LSTM
TOWNSHIP_RANGE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
T01N R02E,53.193636,57.088593,54.559845,67.719193,55.314644,58.568106,95.508892,56.482765
T01N R03E,32.676189,32.906597,38.231296,49.818264,46.235237,30.953399,29.534033,14.86892
T01N R04E,16.672857,20.337458,28.362341,60.111441,23.919901,21.090174,26.918781,27.609758
T01N R05E,19.476364,27.651838,29.102047,60.111441,31.077371,23.708366,26.081469,19.818386
T01N R06E,33.198,42.595863,38.353994,76.510942,43.19615,37.940042,85.370448,49.131123
T01N R07E,45.614286,54.8451,48.542082,101.400331,56.040468,52.171719,96.936805,72.915474
T01N R08E,128.276923,116.16146,98.536638,126.502788,124.621597,119.768021,118.235722,125.07508
T01N R09E,137.337692,138.8854,123.027194,133.59432,141.119616,142.654175,133.989761,158.0531
T01N R10E,179.52,192.51915,167.04846,139.09219,222.631861,192.215773,179.637085,142.08888
T01N R11E,107.955,119.69755,94.875475,88.290657,109.166181,115.206957,114.726992,114.76643


### Comparing the Models' Error Metrics

In [5]:
models_error_metrics_df

Unnamed: 0_level_0,MAE,MSE,RMSE,R^2
MODEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SVR,28.9124,2040.4239,45.1711,0.8783
GradientBoostingRegressor,32.6355,2436.3433,49.3593,0.8547
RandomForestRegressor,31.4379,2612.4553,51.1122,0.8442
XGBRegressor,32.0972,2892.8849,53.7855,0.8275
CatBoostRegressor,32.5387,2920.2615,54.0394,0.8258
KNeighborsRegressor,54.0581,5808.7215,76.215,0.6536
LSTM,23.793732,1208.83,34.76823,


### Comparing the Models' Errors

In [6]:
models_errors_df

Unnamed: 0,TOWNSHIP_RANGE,2021_GSE_GWE,XGBRegressor_ERROR,SVR_ERROR,KNeighborsRegressor_ERROR,GradientBoostingRegressor_ERROR,CatBoostRegressor_ERROR,RandomForestRegressor_ERROR,LSTM_ERROR
0,T01N R02E,53.193636,3.894957,1.366209,14.525556,2.121008,5.374470,42.315256,3.289129
1,T01N R03E,32.676189,0.230408,5.555107,17.142075,13.559048,1.722790,3.142156,17.807269
2,T01N R04E,16.672857,3.664601,11.689483,43.438583,7.247044,4.417317,10.245924,10.936901
3,T01N R05E,19.476364,8.175474,9.625683,40.635077,11.601008,4.232003,6.605106,0.342022
4,T01N R06E,33.198000,9.397863,5.155994,43.312942,9.998150,4.742042,52.172448,15.933123
...,...,...,...,...,...,...,...,...,...
473,T32S R26E,220.866667,23.171867,27.055718,14.193859,19.111954,1.589253,20.351219,49.835997
474,T32S R27E,151.778571,21.336971,49.752804,8.000134,24.642245,38.673318,25.597731,36.858466
475,T32S R28E,174.023077,24.782663,20.824099,18.048013,26.714577,30.508537,22.599171,16.613353
476,T32S R29E,326.627273,10.003767,1.577982,100.158535,2.813672,22.812833,10.554386,41.765547


In [7]:
col_names = list(set(models_errors_df.columns) - set(["TOWNSHIP_RANGE", "2021_GSE_GWE"]))
pd.melt(models_errors_df, id_vars=["TOWNSHIP_RANGE", "2021_GSE_GWE"],  value_vars=col_names, var_name="MODEL", value_name="ABS_ERROR")

Unnamed: 0,TOWNSHIP_RANGE,2021_GSE_GWE,MODEL,ABS_ERROR
0,T01N R02E,53.193636,CatBoostRegressor_ERROR,5.374470
1,T01N R03E,32.676189,CatBoostRegressor_ERROR,1.722790
2,T01N R04E,16.672857,CatBoostRegressor_ERROR,4.417317
3,T01N R05E,19.476364,CatBoostRegressor_ERROR,4.232003
4,T01N R06E,33.198000,CatBoostRegressor_ERROR,4.742042
...,...,...,...,...
3341,T32S R26E,220.866667,KNeighborsRegressor_ERROR,14.193859
3342,T32S R27E,151.778571,KNeighborsRegressor_ERROR,8.000134
3343,T32S R28E,174.023077,KNeighborsRegressor_ERROR,18.048013
3344,T32S R29E,326.627273,KNeighborsRegressor_ERROR,100.158535


## Prediction Error Patterns Analysis

In [8]:
draw_model_error_distribution(models_errors_df)

Observations:
* Most models have a lot of *small* prediction errors between 0~40 feet of groundwater depth.
* The K-Neighbors regressor model shows a more flat distribution of the number of prediction errors. The models has less low-error predictions, and many more
* The K-Neighbors regressor model and XGBoost Regressor models are the two models with the highest prediction errors above 340 feet of groundwater depth
* The Random Forest Regressor, Gradient Boosting Regressor and LSTM models are the models with the lowest maximum prediction error at around 140 feet.

All models have a lot of high prediction errors, but the less such errors the bettor. For that reason it seems that the best models trained on the test data and evaluated on the 2021 groundwater depth values are:
* Random Forest Regressor
* Gradient Boosting Regressor
* LSTM