# Model Testing


## Setup


In [2]:
from scripts.data_loader import load_data, split_features_targets
from scripts.preprocessing import preprocess_data, derive_features
from scripts.model_testing import test_independant_models, find_top_models

from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
    TheilSenRegressor,
    HuberRegressor,
)
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    AdaBoostRegressor,
    ExtraTreesRegressor,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR, LinearSVR, NuSVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.cross_decomposition import PLSRegression
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import BaggingRegressor, VotingRegressor, StackingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

In [6]:
df = load_data("data")
df = preprocess_data(df)
df = derive_features(df)

features, targets = split_features_targets(df)

## Initial Testing


### Models


In [7]:
models = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    ElasticNet(),
    RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    GradientBoostingRegressor(n_estimators=100, max_depth=5, learning_rate=0.1),
    AdaBoostRegressor(n_estimators=50, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    DecisionTreeRegressor(max_depth=10),
    KNeighborsRegressor(n_neighbors=5, n_jobs=-1),
    MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=500),
    SVR(kernel="linear", C=1.0, epsilon=0.1),
    LinearSVR(C=1.0, epsilon=0.1, max_iter=1000, tol=1e-3),
    NuSVR(kernel="linear", C=1.0, nu=0.5),
    GaussianProcessRegressor(n_restarts_optimizer=0, normalize_y=True),
    PLSRegression(n_components=6),
    KernelRidge(alpha=1.0, kernel="linear"),
    TheilSenRegressor(max_subpopulation=1e4, n_subsamples=None),
    HuberRegressor(epsilon=1.35, max_iter=1000),
    BaggingRegressor(n_estimators=10, max_samples=0.5, n_jobs=-1),
    VotingRegressor(
        estimators=[
            ("lr", LinearRegression()),
            ("rf", RandomForestRegressor(n_estimators=50, max_depth=5, n_jobs=-1)),
            (
                "gbr",
                GradientBoostingRegressor(
                    n_estimators=50, max_depth=3, learning_rate=0.1
                ),
            ),
        ]
    ),
    StackingRegressor(
        estimators=[
            ("lr", LinearRegression()),
            ("rf", RandomForestRegressor(n_estimators=50, max_depth=5, n_jobs=-1)),
            (
                "gbr",
                GradientBoostingRegressor(
                    n_estimators=50, max_depth=3, learning_rate=0.1
                ),
            ),
        ]
    ),
    XGBRegressor(n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1),
    LGBMRegressor(
        n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1, verbose=-1
    ),
]

### Results


In [8]:
# Ice Thickness
find_top_models(test_independant_models(models, features, targets["ice_thickness"])).head()



Unnamed: 0,MSE,MAE,R2
RandomForestRegressor,0.001555,0.018336,0.975696
BaggingRegressor,0.001679,0.018907,0.97376
XGBRegressor,0.001831,0.021306,0.971394
GradientBoostingRegressor,0.001977,0.022163,0.969099
LGBMRegressor,0.002043,0.022519,0.968076


In [9]:
# Ice Velocity
find_top_models(test_independant_models(models, features, targets["ice_velocity"])).head()



Unnamed: 0,MSE,MAE,R2
ExtraTreesRegressor,582.162817,4.937941,0.531329
GradientBoostingRegressor,683.360187,5.368325,0.44986
XGBRegressor,688.134789,5.169892,0.446016
RandomForestRegressor,721.380434,5.117349,0.419252
LGBMRegressor,732.271686,6.156113,0.410484


In [10]:
# Ice Mask
find_top_models(test_independant_models(models, features, targets["ice_mask"])).head()



Unnamed: 0,MSE,MAE,R2
ExtraTreesRegressor,0.010069,0.024479,0.989347
XGBRegressor,0.012172,0.028409,0.987122
RandomForestRegressor,0.012231,0.023246,0.98706
GradientBoostingRegressor,0.013348,0.030836,0.985878
LGBMRegressor,0.0135,0.034609,0.985717


## Further Testing


### Models


In [11]:
MAX_DEPTH = 50
N_ESTIMATORS = 150
N_JOBS = -1

models = [
    RandomForestRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, n_jobs=N_JOBS),
    GradientBoostingRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, n_jobs=N_JOBS),
    BaggingRegressor(n_estimators=N_ESTIMATORS, max_samples=0.5, n_jobs=N_JOBS),
    XGBRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1, n_jobs=N_JOBS),
    LGBMRegressor(
        n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1, n_jobs=N_JOBS, verbose=-1
    ),
]

### Results


In [12]:
results = test_independant_models(models, features, targets["ice_velocity"], test_split=0.2, subset=False)
results.head(6)

Unnamed: 0,MSE,MAE,R2
XGBRegressor,249.364635,1.127016,0.799249
ExtraTreesRegressor,260.21868,1.199475,0.790511
RandomForestRegressor,281.443833,1.31828,0.773423
BaggingRegressor,295.442356,1.524335,0.762154
LGBMRegressor,312.035757,2.860903,0.748795
GradientBoostingRegressor,348.397428,1.262824,0.719522
