# Model Testing


## Setup


In [1]:
from scripts.data_loader import load_data, split_features_targets
from scripts.preprocessing import preprocess_data, derive_features
from scripts.model_testing import test_independant_models, find_top_models

from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
    TheilSenRegressor,
    HuberRegressor,
)
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    AdaBoostRegressor,
    ExtraTreesRegressor,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR, LinearSVR, NuSVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.cross_decomposition import PLSRegression
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import BaggingRegressor, VotingRegressor, StackingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

In [2]:
df = load_data("data")
df = preprocess_data(df)
df = derive_features(df)

features, targets = split_features_targets(df)

## Initial Testing


### Models


In [3]:
models = [
    RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    GradientBoostingRegressor(n_estimators=100, max_depth=5, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    BaggingRegressor(n_estimators=10, max_samples=0.5, n_jobs=-1),
    XGBRegressor(n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1),
    LGBMRegressor(
        n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1, verbose=-1
    ),
]

### Results


In [4]:
# Ice Thickness
find_top_models(test_independant_models(models, features, targets["ice_thickness"])).head()

Unnamed: 0,MSE,MAE,R2
BaggingRegressor,0.001078,0.01196,0.983114
XGBRegressor,0.002065,0.022792,0.967655
GradientBoostingRegressor,0.00215,0.023234,0.966334
LGBMRegressor,0.002153,0.023245,0.966278
RandomForestRegressor,0.002307,0.019409,0.963876


In [5]:
# Ice Velocity
find_top_models(test_independant_models(models, features, targets["ice_velocity"])).head()

Unnamed: 0,MSE,MAE,R2
ExtraTreesRegressor,561.071905,5.456475,0.435968
LGBMRegressor,575.475945,5.697298,0.421488
RandomForestRegressor,583.469261,4.863364,0.413452
XGBRegressor,588.535506,5.595283,0.408359
GradientBoostingRegressor,595.054467,5.93304,0.401806


In [6]:
# Ice Mask
find_top_models(test_independant_models(models, features, targets["ice_mask"])).head()

Unnamed: 0,MSE,MAE,R2
BaggingRegressor,0.012887,0.017,0.986327
RandomForestRegressor,0.019005,0.026249,0.979835
XGBRegressor,0.019997,0.032339,0.978783
GradientBoostingRegressor,0.020073,0.031622,0.978702
LGBMRegressor,0.020165,0.032197,0.978604


## Further Testing


### Models


In [9]:
MAX_DEPTH = 50
N_ESTIMATORS = 150
N_JOBS = -1

models = [
    RandomForestRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, n_jobs=N_JOBS),
    GradientBoostingRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, n_jobs=N_JOBS),
    BaggingRegressor(n_estimators=N_ESTIMATORS, max_samples=0.5, n_jobs=N_JOBS),
    XGBRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1, n_jobs=N_JOBS),
    LGBMRegressor(
        n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1, n_jobs=N_JOBS, verbose=-1
    ),
]

### Results


In [12]:
results = test_independant_models(models, features, targets["ice_velocity"], split_year=2085)
results.head(6)