# Model Testing


In [1]:
from scripts.preprocessing.data_loader import get_train_test_splits, get_combined_dataset
from scripts.preprocessing.preprocessor import apply_minmax_scaling
from scripts.models.model_list import load_models
from scripts.models.model_testing import fit_and_evaluate_model
from IPython.display import display

X_train, X_test, y_train, y_test = get_train_test_splits(test_size=0.2)
X_train, y_train, X_train_scaler, y_train_scaler = apply_minmax_scaling(
    X_train, y_train
)
X_test, y_test, X_test_scaler, y_test_scaler = apply_minmax_scaling(X_test, y_test)
df = get_combined_dataset(X_train, y_train, X_test, y_test)
display(df)

TEST_SET, ALL_SINGLE_TARGET_MODELS, REFINED_SINGLE_TARGET_MODELS, ALL_MULTI_TARGET_MODELS, REFINED_MULTI_TARGET_MODELS = load_models()
print(f"Number of single-target models: {len(ALL_SINGLE_TARGET_MODELS)}")
print(f"Number of multi-target models: {len(ALL_MULTI_TARGET_MODELS)}")

Train years: 2015 to 2083
Test years: 2084 to 2100


Unnamed: 0,x,y,bedrock_elevation,precipitation,air_temperature,ocean_temperature,year,distance_to_pole,bedrock_below_sea_level,set,ice_thickness,ice_velocity,ice_mask
0,0.0,1.0,0.118762,0.180874,0.795808,0.268660,2015,0.0,1.0,train,0.0,0.0,1.0
1,0.0,1.0,0.163009,0.175715,0.793057,0.258708,2015,0.0,1.0,train,0.0,0.0,1.0
2,0.0,1.0,0.042803,0.173047,0.791427,0.251029,2015,0.0,1.0,train,0.0,0.0,1.0
3,0.0,1.0,0.135461,0.178002,0.790789,0.245280,2015,0.0,1.0,train,0.0,0.0,1.0
4,0.0,1.0,0.087251,0.190951,0.790841,0.240754,2015,0.0,1.0,train,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
194097,0.0,0.0,0.347665,0.592466,0.831212,0.272332,2100,0.0,1.0,test,0.0,0.0,1.0
194098,0.0,0.0,0.326669,0.557774,0.838175,0.285703,2100,0.0,1.0,test,0.0,0.0,1.0
194099,0.0,0.0,0.365826,0.535849,0.843655,0.296153,2100,0.0,1.0,test,0.0,0.0,1.0
194100,0.0,0.0,0.416587,0.533697,0.851775,0.315210,2100,0.0,1.0,test,0.0,0.0,1.0


Number of single-target models: 25
Number of multi-target models: 10


## Single Target Models


### Broad Testing


In [5]:
single_target_results = {}
for name, model in ALL_SINGLE_TARGET_MODELS:
    print(f"Evaluating {name} for single-target regression:")
    results = fit_and_evaluate_model(
        model, X_train, y_train, X_test, y_test, multi=False, cv=5
    )
    single_target_results[name] = results

for name, results in single_target_results.items():
    print(f"\n{name}:")
    display(results)

Evaluating Linear Regression for single-target regression:
Evaluating Ridge for single-target regression:
Evaluating Lasso for single-target regression:
Evaluating ElasticNet for single-target regression:
Evaluating Lars for single-target regression:
Evaluating LassoLars for single-target regression:
Evaluating OrthogonalMatchingPursuit for single-target regression:
Evaluating BayesianRidge for single-target regression:
Evaluating ARDRegression for single-target regression:
Evaluating SGDRegressor for single-target regression:
Evaluating PassiveAggressiveRegressor for single-target regression:
Evaluating HuberRegressor for single-target regression:
Evaluating RANSACRegressor for single-target regression:




Evaluating TheilSenRegressor for single-target regression:
Evaluating LinearSVR for single-target regression:




Evaluating KNeighborsRegressor for single-target regression:
Evaluating DecisionTreeRegressor for single-target regression:
Evaluating RandomForestRegressor for single-target regression:
Evaluating ExtraTreesRegressor for single-target regression:
Evaluating GradientBoostingRegressor for single-target regression:
Evaluating AdaBoostRegressor for single-target regression:
Evaluating MLPRegressor for single-target regression:
Evaluating PLSRegression for single-target regression:
Evaluating XGBoost for single-target regression:
Evaluating LightGBM for single-target regression:
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000573 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1083
[LightGBM] [Info] Number of data points in the train set: 124586, number of used features: 8
[LightGBM] [Info] Start training from score 0.168956
[LightGBM] [

Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.007701,0.087756,0.060153,0.880783
1,ice_velocity,0.000157,0.012547,0.0033,0.028451
2,ice_mask,0.042598,0.206393,0.150208,0.814034



Ridge:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.007701,0.087755,0.060152,0.880783
1,ice_velocity,0.000157,0.012547,0.0033,0.028451
2,ice_mask,0.042598,0.206393,0.150207,0.814034



Lasso:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.064599,0.254163,0.217919,-3.4e-05
1,ice_velocity,0.000162,0.01273,0.00332,-0.00019
2,ice_mask,0.191213,0.437279,0.421482,0.165243



ElasticNet:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.053446,0.231185,0.197646,0.172615
1,ice_velocity,0.000162,0.01273,0.00332,-0.00019
2,ice_mask,0.111886,0.334493,0.311609,0.511552



Lars:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.007701,0.087756,0.060153,0.880783
1,ice_velocity,0.000158,0.012552,0.003316,0.027709
2,ice_mask,0.045447,0.213182,0.154638,0.801598



LassoLars:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.064599,0.254163,0.217919,-3.4e-05
1,ice_velocity,0.000162,0.01273,0.00332,-0.00019
2,ice_mask,0.191213,0.437279,0.421483,0.16524



OrthogonalMatchingPursuit:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.01145,0.107004,0.078276,0.822749
1,ice_velocity,0.000162,0.012733,0.00324,-0.00057
2,ice_mask,0.136478,0.369429,0.282073,0.404192



BayesianRidge:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.007701,0.087756,0.060153,0.880783
1,ice_velocity,0.000157,0.012547,0.003298,0.028455
2,ice_mask,0.042598,0.206393,0.150207,0.814034



ARDRegression:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.008293,0.091065,0.063821,0.871622
1,ice_velocity,0.000162,0.01273,0.00332,-0.00019
2,ice_mask,0.043074,0.207544,0.151993,0.811955



SGDRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,1.953786e+29,442016500000000.0,399602900000000.0,-3.024589e+30
1,ice_velocity,1.513591e+29,389049000000000.0,351902600000000.0,-9.341201e+32
2,ice_mask,2.5112559999999998e+29,501124400000000.0,496332500000000.0,-1.096313e+30



PassiveAggressiveRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.065693,0.256306,0.190243,-0.016971
1,ice_velocity,0.009479,0.097361,0.096952,-57.501461
2,ice_mask,0.161796,0.402239,0.319807,0.293664



HuberRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.008525,0.09233,0.061983,0.86803
1,ice_velocity,0.000165,0.012863,0.001947,-0.021068
2,ice_mask,0.051162,0.22619,0.152042,0.776648



RANSACRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.093425,0.305655,0.16979,-0.446284
1,ice_velocity,0.000166,0.012881,0.001971,-0.02398
2,ice_mask,0.380674,0.616988,0.389372,-0.661869



TheilSenRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.009081,0.095296,0.063195,0.859414
1,ice_velocity,0.000164,0.012826,0.001971,-0.015187
2,ice_mask,0.050466,0.224646,0.152986,0.779686



LinearSVR:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.02185,0.147817,0.112234,0.66175
1,ice_velocity,0.000163,0.012763,0.00288,-0.005251
2,ice_mask,0.094719,0.307765,0.236112,0.586495



KNeighborsRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.010773,0.103795,0.045649,0.833221
1,ice_velocity,0.000164,0.012787,0.00251,-0.009148
2,ice_mask,0.020023,0.141503,0.042338,0.912587



DecisionTreeRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.00193,0.04393,0.009419,0.970125
1,ice_velocity,0.000264,0.016241,0.0012,-0.627897
2,ice_mask,0.009162,0.095716,0.013327,0.960005



RandomForestRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.000772,0.02779,0.009127,0.988045
1,ice_velocity,0.0001,0.009984,0.001382,0.384788
2,ice_mask,0.004584,0.067703,0.013942,0.979989



ExtraTreesRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.000792,0.028142,0.010056,0.987739
1,ice_velocity,8.3e-05,0.009127,0.001184,0.485918
2,ice_mask,0.003977,0.063061,0.015391,0.98264



GradientBoostingRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.013929,0.11802,0.096844,0.784374
1,ice_velocity,0.000145,0.012038,0.0028,0.1056
2,ice_mask,0.045505,0.21332,0.195905,0.801342



AdaBoostRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.006729,0.082032,0.043229,0.895826
1,ice_velocity,0.000157,0.012543,0.003245,0.029123
2,ice_mask,0.017709,0.133075,0.050483,0.92269



MLPRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.134074,0.366162,0.307961,-1.075557
1,ice_velocity,0.21244,0.460913,0.293794,-1310.085758
2,ice_mask,0.271285,0.52085,0.44919,-0.184319



PLSRegression:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.013589,0.116574,0.080526,0.789626
1,ice_velocity,0.000158,0.01258,0.00326,0.023283
2,ice_mask,0.050089,0.223805,0.161985,0.781333



XGBoost:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.011933,0.109238,0.091619,0.815272
1,ice_velocity,0.000169,0.012999,0.002879,-0.042789
2,ice_mask,0.041059,0.20263,0.188063,0.820753



LightGBM:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.012103,0.110014,0.092334,0.812638
1,ice_velocity,0.000119,0.01089,0.002545,0.268137
2,ice_mask,0.040738,0.201835,0.187511,0.822156


### Refined Testing


In [2]:
TEST_SET, ALL_SINGLE_TARGET_MODELS, REFINED_SINGLE_TARGET_MODELS, ALL_MULTI_TARGET_MODELS, REFINED_MULTI_TARGET_MODELS = load_models(N_ESTIMATORS=1000, MIN_SAMPLES_SPLIT=5)

refined_single_target_results = {}
for name, model in REFINED_SINGLE_TARGET_MODELS:
    print(f"Evaluating {name} for refined single-target regression:")
    results = fit_and_evaluate_model(
        model, X_train, y_train, X_test, y_test, multi=False, cv=5
    )
    refined_single_target_results[name] = results

for name, results in refined_single_target_results.items():
    print(f"\n{name}:")
    display(results)

Evaluating RandomForestRegressor for refined single-target regression:


### Model Interpretation


## Multi-Target Models


In [4]:
single_target_results = {}
for name, model in ALL_MULTI_TARGET_MODELS:
    print(f"Evaluating {name} for multi-target regression:")
    results = fit_and_evaluate_model(
        model, X_train, y_train, X_test, y_test, multi=True, cv=5
    )
    single_target_results[name] = results

for name, results in single_target_results.items():
    print(f"\n{name}:")
    display(results)

Evaluating Multi-target Linear Regression for multi-target regression:
Evaluating Multi-target Ridge for multi-target regression:
Evaluating Multi-target Lasso for multi-target regression:
Evaluating Multi-target ElasticNet for multi-target regression:
Evaluating Multi-target RandomForest for multi-target regression:
Evaluating Multi-target ExtraTrees for multi-target regression:
Evaluating Multi-target GradientBoosting for multi-target regression:
Evaluating Multi-target MLPRegressor for multi-target regression:
Evaluating Multi-target XGBoost for multi-target regression:
Evaluating Multi-target LightGBM for multi-target regression:
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000502 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1083
[LightGBM] [Info] Number of data points in the train set: 124586, number of used features: 8
[Ligh

Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.007701,0.087756,0.060153,0.880783
1,ice_velocity,0.000157,0.012547,0.0033,0.028451
2,ice_mask,0.042598,0.206393,0.150208,0.814034



Multi-target Ridge:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.007701,0.087755,0.060152,0.880783
1,ice_velocity,0.000157,0.012547,0.0033,0.028451
2,ice_mask,0.042598,0.206393,0.150207,0.814034



Multi-target Lasso:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.064599,0.254163,0.217919,-3.4e-05
1,ice_velocity,0.000162,0.01273,0.00332,-0.00019
2,ice_mask,0.191213,0.437279,0.421482,0.165243



Multi-target ElasticNet:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.053446,0.231185,0.197646,0.172615
1,ice_velocity,0.000162,0.01273,0.00332,-0.00019
2,ice_mask,0.111886,0.334493,0.311609,0.511552



Multi-target RandomForest:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.000756,0.02749,0.008547,0.988302
1,ice_velocity,9.1e-05,0.00952,0.001172,0.440695
2,ice_mask,0.003935,0.062727,0.013074,0.982823



Multi-target ExtraTrees:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.000642,0.025343,0.008879,0.990057
1,ice_velocity,7.6e-05,0.008741,0.001116,0.528462
2,ice_mask,0.003535,0.059453,0.014034,0.984569



Multi-target GradientBoosting:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.013929,0.11802,0.096844,0.784374
1,ice_velocity,0.000145,0.012038,0.0028,0.1056
2,ice_mask,0.045505,0.21332,0.195905,0.801342



Multi-target MLPRegressor:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.289873,0.538398,0.410515,-3.487417
1,ice_velocity,0.123782,0.351827,0.308238,-762.928056
2,ice_mask,0.110393,0.332254,0.271859,0.51807



Multi-target XGBoost:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.011933,0.109238,0.091619,0.815272
1,ice_velocity,0.000169,0.012999,0.002879,-0.042789
2,ice_mask,0.041059,0.20263,0.188063,0.820753



Multi-target LightGBM:


Unnamed: 0,Target,MSE,RMSE,MAE,R2
0,ice_thickness,0.012103,0.110014,0.092334,0.812638
1,ice_velocity,0.000119,0.01089,0.002545,0.268137
2,ice_mask,0.040738,0.201835,0.187511,0.822156


## Sequential Models
