# Model Testing


## Setup


In [1]:
from scripts.preprocessing.data_loader import get_train_test_splits, get_combined_dataset
from scripts.preprocessing.preprocessor import apply_minmax_scaling
from scripts.models.model_list import load_models
from scripts.models.model_testing import test_models
from IPython.display import display
from scripts.visualisations.plotting import plot_averaged_heatmap
from scripts.visualisations.helpers import rescale_features

X_train, X_test, y_train, y_test = get_train_test_splits(test_size=0.2)
X_train, y_train, train_scales = apply_minmax_scaling(X_train, y_train)
X_test, y_test, test_scales = apply_minmax_scaling(X_test, y_test)
df = get_combined_dataset(X_train, y_train, X_test, y_test)

display(df)
display(df.describe())
display(df.info())

TEST_SET, ALL_SINGLE_TARGET_MODELS, REFINED_SINGLE_TARGET_MODELS, ALL_MULTI_TARGET_MODELS, REFINED_MULTI_TARGET_MODELS = load_models()
print(f"Number of single-target models: {len(ALL_SINGLE_TARGET_MODELS)}")
print(f"Number of multi-target models: {len(ALL_MULTI_TARGET_MODELS)}")

df_test = df[df['set'] == 'test'].copy()
df_test = rescale_features(df_test, test_scales)
test = plot_averaged_heatmap(df, "cold_proximity")

MemoryError: Unable to allocate 154. GiB for an array with shape (194102, 106756) and data type float64

## Broad Testing


### Single-Target


In [None]:
broad_single_target_results = test_models(ALL_SINGLE_TARGET_MODELS, multi=False, cv=5, refined=False, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

### Multi-Target


In [None]:
broad_multi_target_results = test_models(ALL_MULTI_TARGET_MODELS, multi=True, cv=5, refined=False, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

## Refined Testing


### Single-Target


In [None]:
refined_single_target_results = test_models(REFINED_SINGLE_TARGET_MODELS, multi=False, cv=5, refined=True, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

### Multi-Target


In [None]:
refined_multi_target_results = test_models(REFINED_MULTI_TARGET_MODELS, multi=True, cv=5, refined=True, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

## Sequential Models


## Genetic Programming


In [None]:
best_model, train_metrics, test_metrics = apply_gp(X_train, X_test, y_train, y_test)
print("Best Model:", best_model)
print("Train Metrics:", train_metrics)
print("Test Metrics:", test_metrics)

## KAN


In [None]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

In [None]:
from kan import *
import torch
from kan.utils import create_dataset, ex_round

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Convert your data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float64).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float64).to(device)

# Create a dataset dictionary
dataset = {
    'train_input': X_train_tensor,
    'train_label': y_train_tensor,
    'test_input': torch.tensor(X_test.values, dtype=torch.float64).to(device),
    'test_label': torch.tensor(y_test.values, dtype=torch.float64).to(device)
}


input_dim = X_train.shape[1]
output_dim = y_train.shape[1]
hidden_dim = 10  # You can adjust this

model = KAN(width=[input_dim, hidden_dim, output_dim], grid=3, k=3, seed=42, device=device)

model.fit(dataset, opt="LBFGS", steps=100, lamb=0.001)

model = model.prune()
model = model.refine(10)
model.fit(dataset, opt="LBFGS", steps=50)

lib = ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','abs']
model.auto_symbolic(lib=lib)

formula = model.symbolic_formula()
for i, f in enumerate(formula[0]):
    print(f"Target {i+1}: {ex_round(f, 4)}")

predictions = model(dataset['test_input'])

from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, predictions.cpu().detach().numpy())
r2 = r2_score(y_test, predictions.cpu().detach().numpy())

print(f"MSE: {mse}")
print(f"R2 Score: {r2}")