# Model Testing


## Setup


In [1]:
from scripts.data_loader import load_data, split_features_targets
from scripts.preprocessing import preprocess_data, derive_features
from scripts.model_testing import test_independant_models, display_importances, find_top_models

from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
)
from sklearn.ensemble import BaggingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

In [2]:
df = load_data("data")
df = preprocess_data(df)
df = derive_features(df)

features, targets = split_features_targets(df)

# Target-Independant Testing


### Models


In [3]:
models = [
    RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    GradientBoostingRegressor(n_estimators=100, max_depth=5, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    BaggingRegressor(n_estimators=10, max_samples=0.5, n_jobs=-1),
    XGBRegressor(n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1),
    LGBMRegressor(
        n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1, verbose=-1
    ),
]

### Results


In [4]:
# Ice Thickness
find_top_models(test_independant_models(models, features, targets["ice_thickness"]))

X_train, X_test, y_train, y_test = split_data_by_year(features, target, split_year)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

from interpretability import interpret_model, plot_interpretation_results, plot_regression_diagnostics
interpretation_result = interpret_model(model, X, y_pred)
plot_interpretation_results(interpretation_result, feature_names=feature_names, model_name="Random Forest")
# plot_regression_diagnostics(model, X, y)

Unnamed: 0,MSE,MAE,R2
BaggingRegressor,0.001836,0.01662,0.971574
XGBRegressor,0.001946,0.020765,0.969874
LGBMRegressor,0.001977,0.020918,0.969388
GradientBoostingRegressor,0.002046,0.021354,0.968328
RandomForestRegressor,0.00219,0.020733,0.966094
ExtraTreesRegressor,0.003001,0.027351,0.953542


In [9]:
# Ice Velocity
find_top_models(test_independant_models(models, features, targets["ice_velocity"]))

# results_tuple = test_independant_models(models, features, targets["ice_velocity"])
# top_models = find_top_models(results_tuple)
# print(top_models)

Unnamed: 0,MSE,MAE,R2
ExtraTreesRegressor,2276.958217,8.324317,0.239062
RandomForestRegressor,2356.408526,8.140071,0.21251
XGBRegressor,2390.040798,9.056582,0.201271
GradientBoostingRegressor,2412.617012,9.127517,0.193726
LGBMRegressor,2419.568506,8.981739,0.191403
BaggingRegressor,2444.006845,8.141892,0.183236


In [8]:
# Ice Mask
find_top_models(test_independant_models(models, features, targets["ice_mask"]))
#find_top_models(test_independant_models(models, features, targets["ice_mask"])).head()

Unnamed: 0,MSE,MAE,R2
BaggingRegressor,0.027571,0.030664,0.969934
XGBRegressor,0.02837,0.047084,0.969063
GradientBoostingRegressor,0.029258,0.046212,0.968095
LGBMRegressor,0.029998,0.047852,0.967287
RandomForestRegressor,0.030711,0.032653,0.96651
ExtraTreesRegressor,0.038775,0.054277,0.957717


## Further Testing


### Models


In [None]:
MAX_DEPTH = 60
N_ESTIMATORS = 160
N_JOBS = -1

models = [
    RandomForestRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, n_jobs=N_JOBS),
    GradientBoostingRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, n_jobs=N_JOBS),
    BaggingRegressor(n_estimators=N_ESTIMATORS, max_samples=0.5, n_jobs=N_JOBS),
    XGBRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1, n_jobs=N_JOBS),
    LGBMRegressor(
        n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1, n_jobs=N_JOBS, verbose=-1
    ),
]

### Results


In [None]:
results = test_independant_models(models, features, targets["ice_velocity"], split_year=2085)
results.head(6)