# Model Testing


## Setup


In [1]:
from scripts.preprocessing.data_loader import get_train_test_splits, get_combined_dataset
from scripts.preprocessing.preprocessor import apply_minmax_scaling
from scripts.models.model_list import load_models
from scripts.models.model_testing import test_models
from IPython.display import display
from scripts.models.gp import apply_gp

X_train, X_test, y_train, y_test = get_train_test_splits(test_size=0.2)
X_train, y_train, X_train_scaler, y_train_scaler = apply_minmax_scaling(
    X_train, y_train
)
X_test, y_test, X_test_scaler, y_test_scaler = apply_minmax_scaling(X_test, y_test)
df = get_combined_dataset(X_train, y_train, X_test, y_test)
display(df)

TEST_SET, ALL_SINGLE_TARGET_MODELS, REFINED_SINGLE_TARGET_MODELS, ALL_MULTI_TARGET_MODELS, REFINED_MULTI_TARGET_MODELS = load_models()
print(f"Number of single-target models: {len(ALL_SINGLE_TARGET_MODELS)}")
print(f"Number of multi-target models: {len(ALL_MULTI_TARGET_MODELS)}")

Train years: 2015 to 2083
Test years: 2084 to 2100


Unnamed: 0,x,y,bedrock_elevation,precipitation,air_temperature,ocean_temperature,year,distance_to_pole,bedrock_below_sea_level,set,ice_thickness,ice_velocity,ice_mask
0,0.0,1.0,0.118762,0.180874,0.795808,0.268660,0.0,0.0,1.0,train,0.0,0.0,1.0
1,0.0,1.0,0.163009,0.175715,0.793057,0.258708,0.0,0.0,1.0,train,0.0,0.0,1.0
2,0.0,1.0,0.042803,0.173047,0.791427,0.251029,0.0,0.0,1.0,train,0.0,0.0,1.0
3,0.0,1.0,0.135461,0.178002,0.790789,0.245280,0.0,0.0,1.0,train,0.0,0.0,1.0
4,0.0,1.0,0.087251,0.190951,0.790841,0.240754,0.0,0.0,1.0,train,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
194097,0.0,0.0,0.347665,0.592466,0.831212,0.272332,1.0,0.0,1.0,test,0.0,0.0,1.0
194098,0.0,0.0,0.326669,0.557774,0.838175,0.285703,1.0,0.0,1.0,test,0.0,0.0,1.0
194099,0.0,0.0,0.365826,0.535849,0.843655,0.296153,1.0,0.0,1.0,test,0.0,0.0,1.0
194100,0.0,0.0,0.416587,0.533697,0.851775,0.315210,1.0,0.0,1.0,test,0.0,0.0,1.0


Number of single-target models: 23
Number of multi-target models: 8


## Broad Testing


### Single-Target


In [None]:
broad_single_target_results = test_models(ALL_SINGLE_TARGET_MODELS, multi=False, cv=5, refined=False, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

### Multi-Target


In [None]:
broad_multi_target_results = test_models(ALL_MULTI_TARGET_MODELS, multi=True, cv=5, refined=False, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

## Refined Testing


### Single-Target


In [None]:
refined_single_target_results = test_models(REFINED_SINGLE_TARGET_MODELS, multi=False, cv=5, refined=True, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

### Multi-Target


In [None]:
refined_multi_target_results = test_models(REFINED_MULTI_TARGET_MODELS, multi=True, cv=5, refined=True, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

## Sequential Models


## Genetic Programming


In [None]:
best_model, train_metrics, test_metrics = apply_gp(X_train, X_test, y_train, y_test)
print("Best Model:", best_model)
print("Train Metrics:", train_metrics)
print("Test Metrics:", test_metrics)

## KAN


In [2]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

OSError: [WinError 126] The specified module could not be found. Error loading "c:\Users\samma\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\lib\fbgemm.dll" or one of its dependencies.

In [None]:
from kan import *
import torch
from kan.utils import create_dataset, ex_round

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Convert your data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float64).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float64).to(device)

# Create a dataset dictionary
dataset = {
    'train_input': X_train_tensor,
    'train_label': y_train_tensor,
    'test_input': torch.tensor(X_test.values, dtype=torch.float64).to(device),
    'test_label': torch.tensor(y_test.values, dtype=torch.float64).to(device)
}


input_dim = X_train.shape[1]
output_dim = y_train.shape[1]
hidden_dim = 10  # You can adjust this

model = KAN(width=[input_dim, hidden_dim, output_dim], grid=3, k=3, seed=42, device=device)

model.fit(dataset, opt="LBFGS", steps=100, lamb=0.001)

model = model.prune()
model = model.refine(10)
model.fit(dataset, opt="LBFGS", steps=50)

lib = ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','abs']
model.auto_symbolic(lib=lib)

formula = model.symbolic_formula()
for i, f in enumerate(formula[0]):
    print(f"Target {i+1}: {ex_round(f, 4)}")

predictions = model(dataset['test_input'])

from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, predictions.cpu().detach().numpy())
r2 = r2_score(y_test, predictions.cpu().detach().numpy())

print(f"MSE: {mse}")
print(f"R2 Score: {r2}")