# Model Testing


## Setup


In [1]:
from scripts.data_loader import load_data, split_features_targets
from scripts.preprocessing import preprocess_data, derive_features
from scripts.model_testing import test_independant_models, find_top_models, train_and_predict, test_dependent_models, split_data_by_year

from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
)
from sklearn.ensemble import BaggingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

In [2]:
df = load_data("data")
df = preprocess_data(df)
df = derive_features(df)

features, targets = split_features_targets(df)

# Target-Independant Testing


### Models


In [None]:
models = [
    RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    GradientBoostingRegressor(n_estimators=100, max_depth=5, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=100, max_depth=10, n_jobs=-1),
    BaggingRegressor(n_estimators=10, max_samples=0.5, n_jobs=-1),
    XGBRegressor(n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1),
    LGBMRegressor(
        n_estimators=100, max_depth=5, learning_rate=0.1, n_jobs=-1, verbose=-1
    ),
]

### Results


In [None]:
# Ice Thickness
find_top_models(test_independant_models(models, features, targets["ice_thickness"])).head()

In [None]:
# Ice Velocity
find_top_models(test_independant_models(models, features, targets["ice_velocity"])).head()

In [None]:
# Ice Mask
find_top_models(test_independant_models(models, features, targets["ice_mask"])).head()

## Further Testing


### Models


In [None]:
MAX_DEPTH = 60
N_ESTIMATORS = 160
N_JOBS = -1

models = [
    RandomForestRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, n_jobs=N_JOBS),
    GradientBoostingRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1),
    ExtraTreesRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, n_jobs=N_JOBS),
    BaggingRegressor(n_estimators=N_ESTIMATORS, max_samples=0.5, n_jobs=N_JOBS),
    XGBRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1, n_jobs=N_JOBS),
    LGBMRegressor(
        n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, learning_rate=0.1, n_jobs=N_JOBS, verbose=-1
    ),
]

### Results


In [None]:
results = test_independant_models(models, features, targets["ice_velocity"], split_year=2085)
results.head(6)

# Target-Dependant Testing


## Initial Testing


In [3]:
import time
MAX_DEPTH = 60
N_ESTIMATORS = 160
N_JOBS = -1

thickness_model = BaggingRegressor(n_estimators=N_ESTIMATORS, max_samples=0.5, n_jobs=N_JOBS)
mask_model =     BaggingRegressor(n_estimators=N_ESTIMATORS, max_samples=0.5, n_jobs=N_JOBS)
velocity_model = ExtraTreesRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH, n_jobs=N_JOBS)

models = [thickness_model, mask_model, velocity_model]

start_time = time.time()
results = test_dependent_models(models, features, targets, target_order=['ice_thickness', 'ice_mask', 'ice_velocity'], split_year=2085)
end_time = time.time()

print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")
print(results)

Starting test_dependent_models with split_year: 2085
Features shape: (194102, 11)
Targets shape: (194102, 3)
Train set shape: (157990, 11)
Test set shape: (36112, 11)

Training model for ice_thickness
Features used: x, y, precipitation, air_temperature, ocean_temperature, year, distance_to_pole, precipitation_rolling_std, air_temperature_rolling_std, log_air_temperature, coastline
Finished training ice_thickness in 36.65 seconds

Training model for ice_mask
Features used: x, y, precipitation, air_temperature, ocean_temperature, year, distance_to_pole, precipitation_rolling_std, air_temperature_rolling_std, log_air_temperature, coastline, predicted_ice_thickness
Finished training ice_mask in 13.79 seconds

Training model for ice_velocity
Features used: x, y, precipitation, air_temperature, ocean_temperature, year, distance_to_pole, precipitation_rolling_std, air_temperature_rolling_std, log_air_temperature, coastline, predicted_ice_thickness, predicted_ice_mask
Finished training ice_vel

## Order Testing


In [None]:
order1 = ['ice_thickness', 'ice_mask', 'ice_velocity']
order2 = ['ice_mask', 'ice_thickness', 'ice_velocity']
order3 = ['ice_velocity', 'ice_thickness', 'ice_mask']

# Test the dependent models with different orderings
results1 = test_dependent_models([thickness_model, mask_model, velocity_model], features, targets, order1, split_year=2050)
print("Order 1 results:")
print(results1)

results2 = test_dependent_models([thickness_model, mask_model, velocity_model], features, targets, order2, split_year=2050)
print("\nOrder 2 results:")
print(results2)

results3 = test_dependent_models([thickness_model, mask_model, velocity_model], features, targets, order3, split_year=2050)
print("\nOrder 3 results:")
print(results3)

## Further Testing
