# Model Testing


## Setup


In [1]:
from scripts.data_loader import load_data, split_features_targets
from scripts.preprocessing import preprocess_data, create_features
from scripts.model_testing import test_independant_models, find_top_models
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
    RANSACRegressor,
    TheilSenRegressor,
    HuberRegressor,
)
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    AdaBoostRegressor,
    ExtraTreesRegressor,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR, LinearSVR, NuSVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.cross_decomposition import PLSRegression
from sklearn.kernel_ridge import KernelRidge
from sklearn.isotonic import IsotonicRegression
from sklearn.ensemble import BaggingRegressor, VotingRegressor, StackingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

In [2]:
df = load_data("data")
df = preprocess_data(df)
# df = create_features(df)

features, targets = split_features_targets(df)

## Initial Testing


### Models


In [3]:
models = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    ElasticNet(),
    RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    GradientBoostingRegressor(n_estimators=100, max_depth=5, learning_rate=0.1),
    AdaBoostRegressor(n_estimators=50, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    DecisionTreeRegressor(max_depth=10),
    KNeighborsRegressor(n_neighbors=5, n_jobs=-1),
    MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=500),
    SVR(kernel="linear", C=1.0, epsilon=0.1),
    LinearSVR(C=1.0, epsilon=0.1, max_iter=1000, tol=1e-3),
    NuSVR(kernel="linear", C=1.0, nu=0.5),
    GaussianProcessRegressor(n_restarts_optimizer=0, normalize_y=True),
    PLSRegression(n_components=6),
    KernelRidge(alpha=1.0, kernel="linear"),
    RANSACRegressor(min_samples=0.8, max_trials=1000),
    TheilSenRegressor(max_subpopulation=1e4, n_subsamples=None),
    HuberRegressor(epsilon=1.35, max_iter=1000),
    BaggingRegressor(n_estimators=10, max_samples=0.5, n_jobs=-1),
    VotingRegressor(
        estimators=[
            ("lr", LinearRegression()),
            ("rf", RandomForestRegressor(n_estimators=50, max_depth=5, n_jobs=-1)),
            (
                "gbr",
                GradientBoostingRegressor(
                    n_estimators=50, max_depth=3, learning_rate=0.1
                ),
            ),
        ]
    ),
    StackingRegressor(
        estimators=[
            ("lr", LinearRegression()),
            ("rf", RandomForestRegressor(n_estimators=50, max_depth=5, n_jobs=-1)),
            (
                "gbr",
                GradientBoostingRegressor(
                    n_estimators=50, max_depth=3, learning_rate=0.1
                ),
            ),
        ]
    ),
    XGBRegressor(n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1),
    LGBMRegressor(
        n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1, verbose=-1
    ),
    CatBoostRegressor(
        iterations=100, depth=5, learning_rate=0.1, thread_count=-1, verbose=0
    ),
]

### Results


In [5]:
# Ice Thickness
find_top_models(
    test_independant_models(models, features, targets["ice_thickness"], 10000, 0.2)
).head()

Unnamed: 0,MSE,MAE,R2
RandomForestRegressor,0.001754,0.019456,0.972592
BaggingRegressor,0.001999,0.020649,0.968768
XGBRegressor,0.002279,0.023679,0.964381
LGBMRegressor,0.00229,0.023875,0.964212
GradientBoostingRegressor,0.002441,0.024601,0.961848


In [6]:
# Ice Velocity
find_top_models(
    test_independant_models(models, features, targets["ice_velocity"], 10000, 0.2)
).head()

Unnamed: 0,MSE,MAE,R2
XGBRegressor,708.51207,6.082729,0.429612
BaggingRegressor,723.472375,5.66044,0.417568
GradientBoostingRegressor,730.535931,6.324826,0.411881
ExtraTreesRegressor,738.111871,6.395386,0.405782
RandomForestRegressor,755.24971,6.187289,0.391986


In [7]:
# Ice Mask
find_top_models(
    test_independant_models(models, features, targets["ice_mask"], 10000, 0.2)
).head()

Unnamed: 0,MSE,MAE,R2
XGBRegressor,0.043564,0.067745,0.953909
RandomForestRegressor,0.04418,0.056897,0.953258
GradientBoostingRegressor,0.046069,0.070259,0.951259
LGBMRegressor,0.048738,0.076469,0.948435
BaggingRegressor,0.049766,0.060418,0.947347


## Further Testing


### Models


In [None]:
models = [
    RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    GradientBoostingRegressor(n_estimators=100, max_depth=5, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    SVR(kernel="linear", C=1.0, epsilon=0.1),
    LinearSVR(C=1.0, epsilon=0.1, max_iter=1000, tol=1e-3),
    NuSVR(kernel="linear", C=1.0, nu=0.5),
    GaussianProcessRegressor(n_restarts_optimizer=0, normalize_y=True),
    BaggingRegressor(n_estimators=10, max_samples=0.5, n_jobs=-1),
    XGBRegressor(n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1),
    LGBMRegressor(
        n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1, verbose=-1
    ),
]

### Results


In [8]:
results = test_independant_models(models, features, targets["ice_velocity"], 10000, 0.2)
results.head(10)

Unnamed: 0,MSE,MAE,R2
XGBRegressor,708.51207,6.082729,0.429612
GradientBoostingRegressor,716.036765,6.29942,0.423554
ExtraTreesRegressor,725.236574,6.310362,0.416148
BaggingRegressor,734.186169,5.603887,0.408943
RandomForestRegressor,746.544406,6.162605,0.398994
LGBMRegressor,837.682476,7.165042,0.325623
CatBoostRegressor,849.630493,7.290467,0.316004
StackingRegressor,849.755385,8.362439,0.315904
DecisionTreeRegressor,924.594361,6.41252,0.255654
VotingRegressor,1006.732218,8.139762,0.189529
