In [1]:
import sys
import os

# Get the root directory
root_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Add the root directory to the Python path
sys.path.append(root_dir)

In [2]:
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import OneCycleLR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from TINTOlib.tinto import TINTO
from kan import *
from tqdm import tqdm
from sklearn.metrics import confusion_matrix


import traceback
import time
import gc
import copy
import traceback
import cv2
import math
import random
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.optim as optim
import csv


if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")

print(device)



cuda


In [3]:
SEED = 381
# SET RANDOM SEED FOR REPRODUCIBILITY
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

# BEST 3.23

In [4]:
folder="data/puma8NH"
x_col=["theta1", "theta2", "theta3", "thetad1", "thetad2", "thetad3", "tau1","tau2"]
target_col=["target"]

# Functions

### Load Dataset and Images

In [5]:
def load_and_clean(npy_filename, y_filename, x_col, target_col):
    """
    Load the feature array (npy_filename) and label array (y_filename),
    drop rows in the feature array that contain any NaNs, and apply
    the same mask to the label array.
    """
    # Load numpy arrays
    X = np.load(os.path.join(folder, npy_filename))
    y = np.load(os.path.join(folder, y_filename))
    
    # Ensure the number of rows matches between X and y
    if X.shape[0] != y.shape[0]:
        raise ValueError("The number of rows in {} and {} do not match.".format(npy_filename, y_filename))
    
    # Create a boolean mask for rows that do NOT have any NaN values in X
    valid_rows = ~np.isnan(X).any(axis=1)
    #print(valid_rows)
    # Filter both arrays using the valid_rows mask
    X_clean = X[valid_rows]
    y_clean = y[valid_rows]
    
    # Convert arrays to DataFrames
    df_X = pd.DataFrame(X_clean)
    df_y = pd.DataFrame(y_clean)
    df_X.columns = x_col
    df_y.columns = target_col
    return df_X, df_y

In [6]:
def load_and_preprocess_data(X_train, y_train, X_test, y_test, X_val, y_val, image_model, problem_type, batch_size=32):
    # Add target column to input for IGTD
    X_train_full = X_train.copy()
    X_train_full["target"] = y_train.values

    X_val_full = X_val.copy()
    X_val_full["target"] = y_val.values

    X_test_full = X_test.copy()
    X_test_full["target"] = y_test.values

    # Generate the images if the folder does not exist
    if not os.path.exists(f'{images_folder}/train'):
        image_model.fit_transform(X_train_full, f'{images_folder}/train')
        image_model.saveHyperparameters(f'{images_folder}/model.pkl')
    else:
        print("The images are already generated")

    # Load image paths
    imgs_train = pd.read_csv(os.path.join(f'{images_folder}/train', f'{problem_type}.csv'))
    imgs_train["images"] = images_folder + "/train/" + imgs_train["images"]

    if not os.path.exists(f'{images_folder}/val'):
        image_model.transform(X_val_full, f'{images_folder}/val')
    else:
        print("The images are already generated")

    imgs_val = pd.read_csv(os.path.join(f'{images_folder}/val', f'{problem_type}.csv'))
    imgs_val["images"] = images_folder + "/val/" + imgs_val["images"]

    if not os.path.exists(f'{images_folder}/test'):
        image_model.transform(X_test_full, f'{images_folder}/test')
    else:
        print("The images are already generated")

    imgs_test = pd.read_csv(os.path.join(f'{images_folder}/test', f'{problem_type}.csv'))
    imgs_test["images"] = images_folder + "/test/" + imgs_test["images"]

    # Image data
    X_train_img = np.array([cv2.imread(img) for img in imgs_train["images"]])
    X_val_img = np.array([cv2.imread(img) for img in imgs_val["images"]])
    X_test_img = np.array([cv2.imread(img) for img in imgs_test["images"]])

    # Create a MinMaxScaler object
    scaler = MinMaxScaler()

    # Scale numerical data
    X_train_num = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
    X_val_num = pd.DataFrame(scaler.transform(X_val), columns=X_val.columns)
    X_test_num = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    attributes = len(X_train_num.columns)
    height, width, channels = X_train_img[0].shape
    imgs_shape = (channels, height, width)

    print("Images shape: ", imgs_shape)
    print("Attributes: ", attributes)

    # Convert data to PyTorch tensors
    X_train_num_tensor = torch.as_tensor(X_train_num.values, dtype=torch.float32)
    X_val_num_tensor = torch.as_tensor(X_val_num.values, dtype=torch.float32)
    X_test_num_tensor = torch.as_tensor(X_test_num.values, dtype=torch.float32)
    X_train_img_tensor = torch.as_tensor(X_train_img, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0
    X_val_img_tensor = torch.as_tensor(X_val_img, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0
    X_test_img_tensor = torch.as_tensor(X_test_img, dtype=torch.float32).permute(0, 3, 1, 2) / 255.0
    y_train_tensor = torch.as_tensor(y_train.values, dtype=torch.float32).reshape(-1, 1)
    y_val_tensor = torch.as_tensor(y_val.values, dtype=torch.float32).reshape(-1, 1)
    y_test_tensor = torch.as_tensor(y_test.values, dtype=torch.float32).reshape(-1, 1)

    # Create DataLoaders
    train_dataset = TensorDataset(X_train_num_tensor, X_train_img_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_num_tensor, X_val_img_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_num_tensor, X_test_img_tensor, y_test_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    return train_loader, val_loader, test_loader, attributes, imgs_shape

In [7]:
def complete_coordinate_and_xcol(coordinate, x_col):
    """
    Given a coordinate (tuple of row, col arrays) and x_col feature list,
    return completed coordinate and x_col including empty positions.

    Empty positions are filled with labels: 'Ex1', 'Ex2', ...
    """

    row_coords, col_coords = coordinate
    max_row = row_coords.max()
    max_col = col_coords.max()
    
    # All possible coordinate slots
    full_coords = set((r, c) for r in range(max_row + 1) for c in range(max_col + 1))
    current_coords = set(zip(row_coords, col_coords))
    missing_coords = sorted(full_coords - current_coords)

    # Create updated coordinate arrays
    new_row_coords = list(row_coords)
    new_col_coords = list(col_coords)
    new_x_col = list(x_col)

    for idx, (r, c) in enumerate(missing_coords):
        new_row_coords.append(r)
        new_col_coords.append(c)
        new_x_col.append(f"Ex{idx+1}")

    completed_coordinate = (np.array(new_row_coords), np.array(new_col_coords))
    return completed_coordinate, new_x_col

In [8]:
def plot_feature_mapping(x_col, coordinate, scale=(4,4)):
    grid = np.full(scale, "", dtype=object)
    rows, cols = coordinate
    for i, (r, c) in enumerate(zip(rows, cols)):
        if i < len(x_col):
            grid[r, c] = x_col[i]
        else:
            grid[r, c] = "?"
    
    plt.figure(figsize=(scale[1] * 2, scale[0] * 2))
    for i in range(scale[0]):
        for j in range(scale[1]):
            plt.text(j, i, grid[i, j], ha='center', va='center', fontsize=10,
                     bbox=dict(facecolor='white', edgecolor='gray'))
    
    plt.xticks(np.arange(scale[1]))
    plt.yticks(np.arange(scale[0]))
    plt.grid(True)
    plt.title("Feature → Pixel Mapping")
    plt.gca().invert_yaxis()  # So row 0 is at the top
    plt.show()

In [9]:
def combine_loader(loader):
    """
    Combines all batches from a DataLoader into three tensors.
    Assumes each batch is a tuple: (mlp_tensor, img_tensor, target_tensor)
    """
    mlp_list, img_list, target_list = [], [], []
    for mlp, img, target in loader:
        mlp_list.append(mlp)
        img_list.append(img)
        target_list.append(target)
    return torch.cat(mlp_list, dim=0), torch.cat(img_list, dim=0), torch.cat(target_list, dim=0)

### Functions for KAN

In [10]:
def average_rmse(output, val_target):
    """
    Computes the root mean squared error (RMSE) between output and val_target.

    Args:
        output (torch.Tensor): The predicted output tensor.
        val_target (torch.Tensor): The ground truth tensor.
    
    Returns:
        float: The RMSE value.
    """
    mse = torch.mean((output - val_target) ** 2)
    rmse = torch.sqrt(mse)
    return rmse.item()

In [11]:
def plot_sorted_feature_importance(columns, importances, plot=False):
    # Move to CPU and numpy if it's a GPU tensor
    if isinstance(importances, torch.Tensor):
        importances = importances.detach().cpu().numpy()

    # Pair columns and importances and sort by importance descending
    sorted_pairs = sorted(zip(columns, importances), key=lambda x: x[1], reverse=True)
    print(sorted_pairs)
    sorted_columns, sorted_importances = zip(*sorted_pairs)
    if plot:
        # Create the bar plot
        plt.figure(figsize=(6, 3))
        plt.barh(sorted_columns, sorted_importances, color='royalblue')
        plt.xlabel('Importance')
        plt.title('KAN Feature Importances')
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.show()
    return sorted_pairs

### Grad CAM Functions

In [12]:
def heatmap_to_feature_relevance(heatmap, coordinate, x_col, zoom=1):
    """
    Map heatmap pixel intensities to their corresponding features using coordinate and zoom.
    Returns a dictionary of {feature_name: relevance_score}.
    """
    feature_scores = {}

    for i, col in enumerate(x_col):
        if i < len(coordinate[0]):
            r, c = coordinate[0][i], coordinate[1][i]
            ry = r * zoom + zoom // 2
            cx = c * zoom + zoom // 2
            if ry < heatmap.shape[0] and cx < heatmap.shape[1]:
                feature_scores[col] = heatmap[ry, cx].item()

    return feature_scores

def plot_feature_relevance_bar(feature_scores, plot=False):
    """
    Plots a horizontal bar chart of feature relevance from Grad-CAM heatmap.
    """
    sorted_scores = sorted(feature_scores.items(), key=lambda item: item[1], reverse=True)
    print(sorted_scores)
    features, scores = zip(*sorted_scores)
    if plot:
        plt.figure(figsize=(6, 3))
        plt.barh(features, scores, color='royalblue')
        plt.xlabel("Grad-CAM Relevance")
        plt.title("Feature Relevance for Test")
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.show()
    return sorted_scores

In [13]:
def compute_avg_feature_relevance_from_val(model, model_state, val_inputs, val_imgs, coordinate, x_col, zoom=1):
    """
    Computes average Grad-CAM feature relevance over all validation instances with a tqdm progress bar.

    Args:
        model: Trained model
        model_state: Trained weights to be loaded
        val_inputs: List or tensor of numerical inputs
        val_imgs: List or tensor of image inputs
        coordinate: IGTD-style coordinate tuple (row array, col array)
        x_col: List of feature names (including extras)
        zoom: Zoom level used when generating the images

    Returns:
        Dictionary of average feature relevance
    """
    accumulated_scores = {feature: [] for feature in x_col}

    for num_input, img_input in tqdm(zip(val_inputs, val_imgs), total=len(val_inputs), desc="Computing Grad-CAM"):
        heatmap = grad_cam_side_by_side(
            model=model,
            model_state=model_state,
            num_input=num_input,
            img_input=img_input,
            coordinate=coordinate,
            x_col=x_col,
            zoom=zoom,
            show=False
        )
        scores = heatmap_to_feature_relevance(heatmap, coordinate, x_col, zoom)
        for feature, value in scores.items():
            accumulated_scores[feature].append(value)

    # Compute average
    avg_scores = {feature: float(np.mean(values)) if values else 0.0
                  for feature, values in accumulated_scores.items()}

    return avg_scores


def plot_avg_feature_relevance_from_val(model, val_inputs, val_imgs, coordinate, x_col, zoom=1):
    """
    Combines all steps: compute average relevance and plot.
    """
    avg_scores = compute_avg_feature_relevance_from_val(model, val_inputs, val_imgs, coordinate, x_col, zoom)
    plot_feature_relevance_bar(avg_scores)
    return avg_scores

### CNN Functions

### Hybrid Functions

In [14]:
def print_mkan_vs_cnn_relevance(feature_scores, mkan_len):
    if isinstance(feature_scores, torch.Tensor):
        feature_scores = feature_scores.detach().cpu().numpy()

    mkan_relevance = feature_scores[:mkan_len].sum()
    cnn_relevance = feature_scores[mkan_len:].sum()
    m_kan_relevance_perct = float(mkan_relevance/(mkan_relevance+cnn_relevance))
    cnn_relevance_perct = float(cnn_relevance/(mkan_relevance+cnn_relevance))
    print(f"M_KAN Relevance: {m_kan_relevance_perct}")
    print(f"CNN Relevance: {cnn_relevance_perct}")
    return m_kan_relevance_perct, cnn_relevance_perct

In [15]:
def fit_hybrid_dataloaders(model, 
                           dataset,
                           opt="AdamW", 
                           steps=100, 
                           log=1, 
                           lamb=0., 
                           lamb_l1=1., 
                           lamb_entropy=2., 
                           lamb_coef=0., 
                           lamb_coefdiff=0., 
                           update_grid=True, 
                           grid_update_num=10, 
                           loss_fn=None, 
                           lr=1., 
                           start_grid_update_step=-1, 
                           stop_grid_update_step=50, 
                           batch=-1,
                           metrics=None, 
                           save_fig=False, 
                           in_vars=None, 
                           out_vars=None, 
                           beta=3, 
                           save_fig_freq=1, 
                           img_folder='./video', 
                           singularity_avoiding=False, 
                           y_th=1000., 
                           reg_metric='edge_forward_spline_n', 
                           display_metrics=None,
                           sum_f_reg=True):
    """
    Trains the hybrid model (with a KAN branch and a CNN branch) using a steps-based loop
    adapted from KAN.fit(), with grid updates and regularization.
    
    Instead of a single dataset dict, this function accepts three DataLoaders:
        - train_loader: provides (mlp, img, target) for training
        - val_loader: provides (mlp, img, target) for evaluation during training
        - test_loader: provides (mlp, img, target) for validation

    Internally, the function combines each loader into a dataset dictionary.
    
    Returns:
        results: dictionary containing training loss, evaluation loss, regularization values,
                 and any additional metrics recorded during training.
    """
    # Warn if regularization is requested but model's internal flag isn't enabled.
    if lamb > 0. and not getattr(model.m_kan, "save_act", False):
        print("setting lamb=0. If you want to set lamb > 0, set model.m_kan.save_act=True")
    
    # Disable symbolic processing for training if applicable (KAN internal logic)
    if hasattr(model.m_kan, "disable_symbolic_in_fit"):
        old_save_act, old_symbolic_enabled = model.m_kan.disable_symbolic_in_fit(lamb)
        f_old_save_act, f_old_symbolic_enabled = model.final_kan.disable_symbolic_in_fit(lamb)
    else:
        old_save_act, old_symbolic_enabled = None, None

    pbar = tqdm(range(steps), desc='Training', ncols=100)

    # Default loss function (mean squared error) if not provided
    if loss_fn is None:
        loss_fn = lambda x, y: torch.mean((x - y) ** 2)

    # Determine grid update frequency
    grid_update_freq = int(stop_grid_update_step / grid_update_num) if grid_update_num > 0 else 1

    # Determine total number of training examples
    n_train = dataset["train_input"].shape[0]
    n_eval  = dataset["val_input"].shape[0]  # using val set for evaluation during training
    batch_size = n_train if batch == -1 or batch > n_train else batch

    # Set up optimizer: choose between Adam and LBFGS (removed tolerance_ys)
    if opt == "AdamW":
        optimizer = optim.AdamW(model.parameters(), lr=lr)
    elif opt == "LBFGS":        
        optimizer = LBFGS(model.parameters(), lr=lr, history_size=10, 
                          line_search_fn="strong_wolfe", 
                          tolerance_grad=1e-32, 
                          tolerance_change=1e-32, 
                          tolerance_ys=1e-32)
    else:
        raise ValueError("Optimizer not recognized. Use 'Adam' or 'LBFGS'.")

    # Prepare results dictionary.
    results = {'train_loss': [], 'eval_loss': [], 'reg': []}

    
    if metrics is not None:
        for metric in metrics:
            results[metric.__name__] = []

    best_model_state = None
    best_epoch = -1
    best_loss = float('inf')

    for step in pbar:
        # Randomly sample indices for a mini-batch from the training set.
        train_indices = np.random.choice(n_train, batch_size, replace=False)
        # Use full evaluation set for evaluation; you can also sample if desired.
        eval_indices = np.arange(n_eval)
        cached_loss = {}
        # Closure for LBFGS
        def closure():
            optimizer.zero_grad()
            mlp_batch = dataset["train_input"][train_indices]
            img_batch = dataset["train_img"][train_indices]
            target_batch = dataset["train_label"][train_indices]
            outputs = model(mlp_batch, img_batch)
            train_loss = loss_fn(outputs, target_batch)
            # Compute regularization term if enabled.
            if hasattr(model.m_kan, "save_act") and model.m_kan.save_act:
                if reg_metric == 'edge_backward':
                    model.m_kan.attribute()
                    model.final_kan.attribute()
                if reg_metric == 'node_backward':
                    model.m_kan.node_attribute()
                    model.final_kan.node_attribute()
                reg_val_inner = model.m_kan.get_reg(reg_metric, lamb_l1, lamb_entropy, lamb_coef, lamb_coefdiff)
                if sum_f_reg:
                    reg_val_inner += model.final_kan.get_reg(reg_metric, lamb_l1, lamb_entropy, lamb_coef, lamb_coefdiff)
            else:
                reg_val_inner = torch.tensor(0., device=device)
            loss_val_inner = train_loss + lamb * reg_val_inner
            loss_val_inner.backward()

            cached_loss['loss'] = loss_val_inner.detach()
            cached_loss['reg'] = reg_val_inner.detach()
            return loss_val_inner

        # Perform grid update if applicable.
        if (step % grid_update_freq == 0 and step < stop_grid_update_step 
            and update_grid and step >= start_grid_update_step):
            
            mlp_batch = dataset['train_input'][train_indices]
            cnn_batch = dataset['train_img'][train_indices]
            
            model.m_kan.update_grid(mlp_batch)
            #cnn_output = model.cnn_branch(cnn_batch)  # Process image input
            concatenated = model.get_concat_output(mlp_batch, cnn_batch)
            #concatenated = torch.cat((mlp_batch, cnn_output), dim=1)
            model.final_kan.update_grid(concatenated)

        # Perform an optimizer step.
        if opt == "LBFGS":
            optimizer.step(closure)
            loss_val = cached_loss['loss']
            reg_val = cached_loss['reg']
        else:  # AdamW branch
            optimizer.zero_grad()
            mlp_batch = dataset["train_input"][train_indices]
            img_batch = dataset["train_img"][train_indices]
            target_batch = dataset["train_label"][train_indices]
            outputs = model(mlp_batch, img_batch)
            train_loss = loss_fn(outputs, target_batch)
            if hasattr(model.m_kan, "save_act") and model.m_kan.save_act:
                if reg_metric == 'edge_backward':
                    model.m_kan.attribute()
                    model.final_kan.attribute()
                if reg_metric == 'node_backward':
                    model.m_kan.node_attribute()
                    model.final_kan.node_attribute()
                reg_val = model.m_kan.get_reg(reg_metric, lamb_l1, lamb_entropy, lamb_coef, lamb_coefdiff)
                if sum_f_reg:
                    reg_val += model.final_kan.get_reg(reg_metric, lamb_l1, lamb_entropy, lamb_coef, lamb_coefdiff)
            else:
                reg_val = torch.tensor(0., device=device)
            loss_val = train_loss + lamb * reg_val
            loss_val.backward()
            optimizer.step()

        with torch.no_grad():
            mlp_eval = dataset["val_input"][eval_indices]
            img_eval = dataset["val_img"][eval_indices]
            target_eval = dataset["val_label"][eval_indices]
            eval_loss = loss_fn(model(mlp_eval, img_eval), target_eval)

        eval_loss_item = torch.sqrt(eval_loss.detach()).item()
        # Record results (using square-root of loss similar to KAN.fit)
        results['train_loss'].append(torch.sqrt(loss_val.detach()).item())
        results['eval_loss'].append(eval_loss_item)
        results['reg'].append(reg_val.detach().item())

        if metrics is not None:
            for metric in metrics:
                # Here, we assume each metric returns a tensor.
                results[metric.__name__].append(metric().item())

        if eval_loss < best_loss:
            best_epoch = step
            best_loss = eval_loss
            best_model_state = copy.deepcopy(model.state_dict())

        # Update progress bar.
        if display_metrics is None:
            pbar.set_description("| train_loss: %.2e | eval_loss: %.2e | reg: %.2e |" %
                                 (torch.sqrt(loss_val.detach()).item(),
                                  torch.sqrt(eval_loss.detach()).item(),
                                  reg_val.detach().item()))
        else:
            desc = ""
            data = []
            for metric in display_metrics:
                desc += f" {metric}: %.2e |"
                data.append(results[metric.__name__][-1])
            pbar.set_description(desc % tuple(data))

        # Optionally save a figure snapshot.
        if save_fig and step % save_fig_freq == 0:
            save_act_backup = getattr(model.m_kan, "save_act", False)
            model.m_kan.save_act = True
            model.plot(folder=img_folder, in_vars=in_vars, out_vars=out_vars, title=f"Step {step}", beta=beta)
            plt.savefig(os.path.join(img_folder, f"{step}.jpg"), bbox_inches='tight', dpi=200)
            plt.close()
            model.m_kan.save_act = save_act_backup
                
        if math.isnan(eval_loss_item):
            break
    # Restore original settings if applicable.
    if old_symbolic_enabled is not None:
        model.m_kan.symbolic_enabled = old_symbolic_enabled
    if hasattr(model.m_kan, "log_history"):
        model.m_kan.log_history('fit')
    print(f"Best epoch {best_epoch}")
    return best_model_state, results, best_epoch

In [16]:
import traceback

def try_create_model(model_class, attributes, imgs_shape, kan_neurons, kan_grid, cnn_bottleneck_dim, alpha, hidden_dim, embed_dim, num_heads):
    try:
        model = model_class(attributes, imgs_shape, kan_neurons, kan_grid,
                            cnn_bottleneck_dim=cnn_bottleneck_dim, alpha=alpha, hidden_dim=hidden_dim, embed_dim=embed_dim, num_heads=num_heads)

        # Test the model with a sample input
        num_input = torch.randn(4, attributes)
        img_input = torch.randn(4, *imgs_shape)
        output = model(num_input, img_input)
        
        print(f"Successfully created and tested {model_class.__name__}")
        
        return model
    except Exception as e:
        print(f"Error creating or testing {model_class.__name__}:")
        traceback.print_exc()
        return None

In [17]:
def cnn_branch_relevance(model, best_model_state):
    avg_scores = compute_avg_feature_relevance_from_val(
        model=model,
        model_state=best_model_state,
        val_inputs=dataset["test_input"],
        val_imgs=dataset["test_img"],
        coordinate=completed_coordinate,
        x_col=completed_x_col,
        zoom=2
    )
    return plot_feature_relevance_bar(avg_scores)

In [18]:
def train_and_plot_relevance(model_class, kan_neurons, kan_grid, lamb, steps, cnn_bottleneck_dim=-1, alpha=-1, hidden_dim=-1, embed_dim=-1, num_heads=-1, n_kan_len=None, filename=None, opt_col_val=None):
    torch.cuda.empty_cache()
    gc.collect()
    model = try_create_model(model_class, attributes, imgs_shape, kan_neurons=kan_neurons, kan_grid=kan_grid,
                             cnn_bottleneck_dim=cnn_bottleneck_dim, alpha=alpha, hidden_dim=hidden_dim, embed_dim=embed_dim, num_heads=num_heads)
    best_model_state, metrics3, best_epoch = fit_hybrid_dataloaders(model, dataset, opt="LBFGS", lamb=lamb, steps=steps)
    model.load_state_dict(best_model_state)
    rmse = average_rmse(model(dataset['test_input'], dataset['test_img']), dataset['test_label'])
    print(rmse)
    #plot_training_RMSE(metrics3['train_loss'], metrics3['eval_loss'])
    if not n_kan_len:
        n_kan_len = kan_neurons
    k_rel, cnn_rel = print_mkan_vs_cnn_relevance(model.final_kan.feature_score, mkan_len=n_kan_len)
    #plot_mkan_vs_cnn_relevance(model.final_kan.feature_score, mkan_len=kan_neurons)
    kan_mrf = ""#plot_sorted_feature_importance(x_col, model.m_kan.feature_score)
    
    cnn_mrf = "" #cnn_branch_relevance(model, best_model_state)

    append_row_to_csv(filename, kan_neurons, kan_grid, lamb, opt_col_val, rmse, best_epoch, k_rel, cnn_rel, kan_mrf, cnn_mrf)

### Write metrics

In [19]:
def create_csv_with_header(filename, columns_opt):
    header=['kan_neurons', 'kan_grid', 'lamb', columns_opt, 'RMSE','Best_Epoch','KAN_Relevance','CNN_Relevance','KAN M.R.F.','CNN M.R.F.']
    """Creates a CSV file with a given header."""
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(header)

In [20]:
def format_top_3(pairs):
    return '\n'.join(f"{k}: {v:.2f}" for k, v in pairs[:3])

In [21]:
def append_row_to_csv(filename, kan_neurons, kan_grid, lamb, opt_col_val, acc, best_epoch, k_rel, cnn_rel, kan_mrf, cnn_mrf):
    row = [kan_neurons, kan_grid, lamb, opt_col_val, acc, best_epoch, k_rel, cnn_rel, format_top_3(kan_mrf), format_top_3(cnn_mrf)]
    """Appends a single row to an existing CSV file."""
    if not os.path.isfile(filename):
        raise FileNotFoundError(f"{filename} does not exist. Please create the file first with a header.")
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(row)

### Models Class Hybrids

In [22]:
class Model3_1(nn.Module):
    def __init__(self, attributes, imgs_shape, kan_neurons, kan_grid, cnn_bottleneck_dim=-1, alpha=-1, hidden_dim=-1, embed_dim=-1, num_heads=-1, device=device):
        super(Model3_1, self).__init__()

        self.device = device

        # CNN branch
        self.cnn_branch = nn.Sequential(
            nn.Conv2d(imgs_shape[0], 16, kernel_size=3, padding=2),     # out: 16 x 9 x 9
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2),                                            # out: 16 x 4 x 4
            
            nn.Conv2d(16, 32, kernel_size=3, padding=2),                # out: 32 x 5 x 5
            nn.LayerNorm([32, 13, 13]),
            nn.Sigmoid(),
            nn.Flatten()
        ).to(device)

        # Dummy pass to get flattened size
        self.flat_size = self._get_flat_size(imgs_shape)

        # Bottleneck layer
        self.cnn_bottleneck = nn.Linear(self.flat_size, cnn_bottleneck_dim).to(device)

        # KAN branch
        self.m_kan = KAN(
            width=[attributes, kan_neurons],
            grid=kan_grid,
            k=3,
            seed=SEED,
            device=device
        )

        # Final KAN layer
        self.final_kan = KAN(
            width=[cnn_bottleneck_dim + kan_neurons, 1],
            grid=kan_grid,
            k=3,
            seed=SEED,
            device=device
        )

    def _get_flat_size(self, imgs_shape):
        dummy_input = torch.zeros(1, *imgs_shape, device=self.device)
        x = self.cnn_branch(dummy_input)
        return x.shape[1]

    def get_concat_output(self, mlp_input, cnn_input):
        kan_input = mlp_input.to(self.device)
        cnn_input = cnn_input.to(self.device)

        conv_out = self.cnn_branch(cnn_input)
        cnn_output = self.cnn_bottleneck(conv_out)

        kan_output = self.m_kan(kan_input)

        return torch.cat((kan_output, cnn_output), dim=1)

    def forward(self, mlp_input, cnn_input):
        concat_output = self.get_concat_output(mlp_input, cnn_input)
        return self.final_kan(concat_output)

In [23]:
class Model3_2(nn.Module):
    def __init__(self, attributes, imgs_shape, kan_neurons, kan_grid, cnn_bottleneck_dim=-1, alpha=-1, hidden_dim=-1, embed_dim=-1, num_heads=-1, device=device):
        super(Model3_2, self).__init__()
        # CNN branch
        self.cnn_branch = nn.Sequential(
            nn.Conv2d(imgs_shape[0], 16, kernel_size=3, padding=2),     # out: 16 x 9 x 9
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2),                                            # out: 16 x 4 x 4
            
            nn.Conv2d(16, 32, kernel_size=3, padding=2),                # out: 32 x 5 x 5
            nn.LayerNorm([32, 13, 13]),
            nn.Sigmoid(),
            nn.Flatten()
        ).to(device)
        
        # Final KAN layers
        self.m_kan = KAN(
            width=[attributes, kan_neurons],
            grid=kan_grid,
            k=3,
            seed=SEED,
            device=device
        )

        # Calculate the size of the flattened output
        self.flat_size = self._get_flat_size(imgs_shape)

        # Final MLP layers
        self.final_kan = KAN(
            width=[self.flat_size + kan_neurons, 1],
            grid=kan_grid,
            k=3,
            seed=SEED,
            device=device
        )

        self.device = device
        self.alpha = alpha


    def _get_flat_size(self, imgs_shape):
        # Forward pass with dummy input to calculate flat size
        dummy_input = torch.zeros(4, *imgs_shape, device=device)
        x = self.cnn_branch(dummy_input)
        return x.size(1)

    def get_concat_output(self, mlp_input, cnn_input):
        # Ensure inputs are moved to the correct device
        kan_input = mlp_input.to(self.device)
        cnn_input = cnn_input.to(self.device)
        
        cnn_output = self.cnn_branch(cnn_input)  # Process image input
        cnn_output = cnn_output * self.alpha
        kan_output = self.m_kan(kan_input)  # Process numerical input
        
        return torch.cat((kan_output, cnn_output), dim=1)

    
    def forward(self, mlp_input, cnn_input):
        concat_output = self.get_concat_output(mlp_input, cnn_input)
        return self.final_kan(concat_output)

In [24]:
class Model3_3(nn.Module):
    def __init__(self, attributes, imgs_shape, kan_neurons, kan_grid, cnn_bottleneck_dim=-1, alpha=-1, hidden_dim=-1, embed_dim=-1, num_heads=-1, device=device):
        super(Model3_3, self).__init__()
        self.device = device

        # CNN branch
        self.cnn_branch = nn.Sequential(
            nn.Conv2d(imgs_shape[0], 16, kernel_size=3, padding=2),     # out: 16 x 9 x 9
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2),                                            # out: 16 x 4 x 4
            
            nn.Conv2d(16, 32, kernel_size=3, padding=2),                # out: 32 x 5 x 5
            nn.LayerNorm([32, 13, 13]),
            nn.Sigmoid(),
            nn.Flatten()
        ).to(device)

        self.flat_size = self._get_flat_size(imgs_shape)

        # KAN branch
        self.m_kan = KAN(
            width=[attributes, kan_neurons],
            grid=kan_grid,
            k=3,
            seed=SEED,
            device=device
        )

        # Gating MLP: inputs are concatenated CNN + KAN representations
        self.gate_net = nn.Sequential(
            nn.Linear(self.flat_size + kan_neurons, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()  # Output ∈ [0,1]
        ).to(device)

        # Final regressor (KAN layer)
        self.final_kan = KAN(
            width=[kan_neurons + self.flat_size, 1],
            grid=kan_grid,
            k=3,
            seed=SEED,
            device=device
        )

    def _get_flat_size(self, imgs_shape):
        dummy_input = torch.zeros(4, *imgs_shape, device=self.device)
        x = self.cnn_branch(dummy_input)
        return x.size(1)

    def get_concat_output(self, mlp_input, cnn_input):
        mlp_input = mlp_input.to(self.device)
        cnn_input = cnn_input.to(self.device)

        kan_out = self.m_kan(mlp_input)                  # shape: (B, kan_neurons)
        cnn_out = self.cnn_branch(cnn_input)             # shape: (B, cnn_flat)

        concat = torch.cat((kan_out, cnn_out), dim=1)    # For gating
        alpha = self.gate_net(concat)                    # shape: (B, 1)

        gated_kan = (1 - alpha) * kan_out                # shape: (B, kan_neurons)
        gated_cnn = alpha * cnn_out                      # shape: (B, cnn_flat)

        return torch.cat((gated_kan, gated_cnn), dim=1)  # shape: (B, total)

    def forward(self, mlp_input, cnn_input):
        fused = self.get_concat_output(mlp_input, cnn_input)
        return self.final_kan(fused)


In [25]:
class Model3_4(nn.Module):
    def __init__(self, attributes, imgs_shape, kan_neurons, kan_grid, cnn_bottleneck_dim=-1, alpha=-1, hidden_dim=-1, embed_dim=-1, num_heads=-1, device=device):
        super(Model3_4, self).__init__()
        self.device = device

        # CNN branch
        self.cnn_branch = nn.Sequential(
            nn.Conv2d(imgs_shape[0], 16, kernel_size=3, padding=2),     # out: 16 x 9 x 9
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2),                                            # out: 16 x 4 x 4
            
            nn.Conv2d(16, 32, kernel_size=3, padding=2),                # out: 32 x 5 x 5
            nn.LayerNorm([32, 13, 13]),
            nn.Sigmoid(),
            nn.Flatten()
        ).to(device)


        self.flat_size = self._get_flat_size(imgs_shape)

        # KAN Branch
        self.m_kan = KAN(
            width=[attributes, kan_neurons],
            grid=kan_grid,
            k=3,
            seed=SEED,
            device=device
        )

        # Linear projections for Q, K, V
        self.query_proj = nn.Linear(kan_neurons, embed_dim).to(device)
        self.key_proj = nn.Linear(self.flat_size, embed_dim).to(device)
        self.value_proj = nn.Linear(self.flat_size, embed_dim).to(device)

        # Attention module
        self.attn = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads, batch_first=True).to(device)

        # Final regression layer (KAN again)
        self.final_kan = KAN(
            width=[embed_dim, 1],
            grid=kan_grid,
            k=3,
            seed=SEED,
            device=device
        )


    def _get_flat_size(self, imgs_shape):
        dummy_input = torch.zeros(1, *imgs_shape, device=self.device)
        return self.cnn_branch(dummy_input).shape[1]

    
    def get_concat_output(self, mlp_input, cnn_input):
        # Get KAN and CNN outputs
        kan_out = self.m_kan(mlp_input.to(self.device))  # [B, D_kan]
        cnn_out = self.cnn_branch(cnn_input.to(self.device))  # [B, D_cnn]

        # Project into Q, K, V space
        Q = self.query_proj(kan_out).unsqueeze(1)  # [B, 1, E]
        K = self.key_proj(cnn_out).unsqueeze(1)    # [B, 1, E]
        V = self.value_proj(cnn_out).unsqueeze(1)  # [B, 1, E]

        # Cross-attention: KAN attends to CNN
        attn_out, _ = self.attn(Q, K, V)  # [B, 1, E]
        attn_out = attn_out.squeeze(1)   # [B, E]

        return attn_out


    def forward(self, mlp_input, cnn_input):
        attn_out = self.get_concat_output(mlp_input, cnn_input)

        return self.final_kan(attn_out)

# Load Dataset and Images

In [26]:
X_train, y_train = load_and_clean('N_train.npy', 'y_train.npy',x_col, target_col)
X_test, y_test   = load_and_clean('N_test.npy',  'y_test.npy', x_col, target_col)
X_val, y_val     = load_and_clean('N_val.npy',   'y_val.npy', x_col, target_col)

In [27]:
# Get the shape of the dataframe
num_columns = X_train.shape[1]

# Calculate number of columns - 1
columns_minus_one = num_columns - 1

# Calculate the square root for image size
image_size = math.ceil(math.sqrt(columns_minus_one))
print(image_size)

3


In [28]:
dataset_name = 'puma8NH'
#Select the model and the parameters
problem_type = "regression"
pixel=20
image_model = TINTO(problem=problem_type, blur=False, pixels=pixel, random_seed=SEED)
name = f"TINTO"

#Define the dataset path and the folder where the images will be saved
images_folder = f"HyNNImages/Regression/{dataset_name}/images_{dataset_name}_{name}"

In [29]:
train_loader, val_loader, test_loader, attributes, imgs_shape = load_and_preprocess_data(
    X_train, y_train, X_test, y_test, X_val, y_val,
    image_model=image_model,
    problem_type=problem_type,
    batch_size=16
)

The images are already generated
The images are already generated
The images are already generated
Images shape:  (3, 20, 20)
Attributes:  8


In [30]:
# Combine dataloaders into tensors.
train_mlp, train_img, train_target = combine_loader(train_loader)
val_mlp, val_img, val_target = combine_loader(val_loader)
test_mlp, test_img, test_target = combine_loader(test_loader)

dataset = {
    "train_input": train_mlp.to(device),
    "train_img": train_img.to(device),
    "train_label": train_target.to(device),
    "val_input": val_mlp.to(device),
    "val_img": val_img.to(device),
    "val_label": val_target.to(device),
    "test_input": test_mlp.to(device),
    "test_img": test_img.to(device),
    "test_label": test_target.to(device),
}

In [31]:
# Print the shapes of the tensors
print("Train data shape:", dataset['train_input'].shape)
print("Train target shape:", dataset['train_label'].shape)
print("Test data shape:", dataset['test_input'].shape)
print("Test target shape:", dataset['test_label'].shape)
print("Validation data shape:", dataset['val_input'].shape)
print("Validation target shape:", dataset['val_label'].shape)

Train data shape: torch.Size([5242, 8])
Train target shape: torch.Size([5242, 1])
Test data shape: torch.Size([1639, 8])
Test target shape: torch.Size([1639, 1])
Validation data shape: torch.Size([1311, 8])
Validation target shape: torch.Size([1311, 1])


# Set Files Name

In [32]:
filename_1=f'{dataset_name}_Tinto_Concat_Op1.csv'
filename_2=f'{dataset_name}_Tinto_Concat_Op2.csv'
filename_3=f'{dataset_name}_Tinto_Concat_Op3.csv'
filename_4=f'{dataset_name}_Tinto_Concat_Op4.csv'

In [33]:
columns_opt1 = 'cnn_bottleneck_dim'
columns_opt2 = 'alpha'
columns_opt3 = 'hidden_dim'
columns_opt4 = 'embed_dim, num_heads'

# Option 1: Concat KAN with (CNN with dense layer to reduce output_size)

In [34]:
# create_csv_with_header(filename_1, columns_opt1)

In [35]:
# print("------------------------------ kan_neurons=2, kan_grid=4, lamb=1e-05 ------------------------------")
# for cnn_bottleneck_dim in [1, 2, 3, 4, 5, 6]:
#     print(f"cnn_bottleneck_dim: {cnn_bottleneck_dim}")
#     train_and_plot_relevance(Model3_1, kan_neurons=2, kan_grid=4, lamb=1e-05, steps=50, 
#                              cnn_bottleneck_dim=cnn_bottleneck_dim, filename=filename_1, opt_col_val=cnn_bottleneck_dim)

# print("------------------------------ kan_neurons=3, kan_grid=5, lamb=0.0001 ------------------------------")
# for cnn_bottleneck_dim in [1,2, 3, 4, 5, 6, 7, 9]:
#     print(f"cnn_bottleneck_dim: {cnn_bottleneck_dim}")
#     train_and_plot_relevance(Model3_1, kan_neurons=3, kan_grid=5, lamb=0.0001, steps=50, 
#                              cnn_bottleneck_dim=cnn_bottleneck_dim, filename=filename_1, opt_col_val=cnn_bottleneck_dim)
    
# print("------------------------------ kan_neurons=3, kan_grid=4, lamb=1e-05 ------------------------------")
# for cnn_bottleneck_dim in [1,2, 3, 4, 5, 6, 7, 9]:
#     print(f"cnn_bottleneck_dim: {cnn_bottleneck_dim}")
#     train_and_plot_relevance(Model3_1, kan_neurons=3, kan_grid=4, lamb=1e-05, steps=50, 
#                              cnn_bottleneck_dim=cnn_bottleneck_dim, filename=filename_1, opt_col_val=cnn_bottleneck_dim)


# print("------------------------------ kan_neurons=4, kan_grid=4, lamb=0.01 ------------------------------")
# for cnn_bottleneck_dim in [1, 2, 3, 4, 6, 7, 8, 10, 12]:
#     print(f"cnn_bottleneck_dim: {cnn_bottleneck_dim}")
#     train_and_plot_relevance(Model3_1, kan_neurons=4, kan_grid=4, lamb=0.01, steps=50, 
#                              cnn_bottleneck_dim=cnn_bottleneck_dim, filename=filename_1, opt_col_val=cnn_bottleneck_dim)

# Option 2: Multiply CNN output by factor

In [36]:
# create_csv_with_header(filename_2, columns_opt2)

In [37]:
# print("------------------------------ kan_neurons=2, kan_grid=4, lamb=1e-05 ------------------------------")
# for alpha in [0.9, .8, .75, .7, .6, .5, .4, .3, .2, .1, .05, .01]:
#     print(f"alpha: {alpha}")
#     train_and_plot_relevance(Model3_2, kan_neurons=2, kan_grid=4, lamb=1e-05, steps=60, 
#                              alpha=alpha, filename=filename_2, opt_col_val=alpha)

# print("------------------------------ kan_neurons=3, kan_grid=5, lamb=0.0001 ------------------------------")
# for alpha in [0.9, .8, .75, .7, .6, .5, .4, .3, .2, .1, .05, .01]:
#     print(f"alpha: {alpha}")
#     train_and_plot_relevance(Model3_2, kan_neurons=3, kan_grid=5, lamb=0.0001, steps=60, 
#                              alpha=alpha, filename=filename_2, opt_col_val=alpha)

# print("------------------------------ kan_neurons=3, kan_grid=4, lamb=1e-05 ------------------------------")
# for alpha in [0.9, .8, .75, .7, .6, .5, .4, .3, .2, .1, .05, .01]:
#     print(f"alpha: {alpha}")
#     train_and_plot_relevance(Model3_2, kan_neurons=3, kan_grid=4, lamb=1e-05, steps=60, 
#                              alpha=alpha, filename=filename_2, opt_col_val=alpha)

# print("------------------------------ kan_neurons=4, kan_grid=4, lamb=0.01 ------------------------------")
# for alpha in [0.9, .8, .75, .7, .6, .5, .4, .3, .2, .1, .05, .01]:
#     print(f"alpha: {alpha}")
#     train_and_plot_relevance(Model3_2, kan_neurons=4, kan_grid=4, lamb=0.01, steps=60, 
#                              alpha=alpha, filename=filename_2, opt_col_val=alpha)

# Option3

In [38]:
# create_csv_with_header(filename_3, columns_opt3)

In [39]:
# print("------------------------------ kan_neurons=2, kan_grid=4, lamb=1e-05 ------------------------------")
# for hidden_dim in [128, 64, 32, 16, 8]:
#     print(f"hidden_dim: {hidden_dim}")
#     train_and_plot_relevance(Model3_3, kan_neurons=2, kan_grid=4, lamb=1e-05, steps=80, 
#                              hidden_dim=hidden_dim, filename=filename_3, opt_col_val=hidden_dim)

# print("------------------------------ kan_neurons=3, kan_grid=5, lamb=0.0001 ------------------------------")
# for hidden_dim in [128, 64, 32, 16, 8]:
#     print(f"hidden_dim: {hidden_dim}")
#     train_and_plot_relevance(Model3_3, kan_neurons=3, kan_grid=5, lamb=0.0001, steps=80, 
#                              hidden_dim=hidden_dim, filename=filename_3, opt_col_val=hidden_dim)

# print("------------------------------ kan_neurons=3, kan_grid=4, lamb=1e-05 ------------------------------")
# for hidden_dim in [128, 64, 32, 16, 8]:
#     print(f"hidden_dim: {hidden_dim}")
#     train_and_plot_relevance(Model3_3, kan_neurons=2, kan_grid=3, lamb=0.01, steps=80, 
#                              hidden_dim=hidden_dim, filename=filename_3, opt_col_val=hidden_dim)

# print("------------------------------ kan_neurons=4, kan_grid=4, lamb=0.01 ------------------------------")
# for hidden_dim in [128, 64, 32, 16, 8]:
#     print(f"hidden_dim: {hidden_dim}")
#     train_and_plot_relevance(Model3_3, kan_neurons=4, kan_grid=4, lamb=0.01, steps=80, 
#                              hidden_dim=hidden_dim, filename=filename_3, opt_col_val=hidden_dim)

# Opt4: MultiHead Attention

In [40]:
# create_csv_with_header(filename_4, columns_opt4)

In [41]:
print("------------------------------ kan_neurons=2, kan_grid=4, lamb=1e-05 ------------------------------")
for embed_dim in [64, 32, 16]:
    for num_head in [2, 4 , 8]:
        print(f"embed_dim: {embed_dim}, num_head:{num_head}")
        train_and_plot_relevance(Model3_4, kan_neurons=2, kan_grid=4, lamb=1e-05, steps=130, 
                                 embed_dim=embed_dim, num_heads=num_head, filename=filename_4, opt_col_val=f'{embed_dim}, {num_head}')

for embed_dim in [48, 24, 12]:
    for num_head in [6]:
        print(f"embed_dim: {embed_dim}, num_head:{num_head}")
        train_and_plot_relevance(Model3_4, kan_neurons=2, kan_grid=4, lamb=1e-05, steps=130, 
                                 embed_dim=embed_dim, num_heads=num_head, filename=filename_4, opt_col_val=f'{embed_dim}, {num_head}')

print("------------------------------ kan_neurons=3, kan_grid=5, lamb=0.0001 ------------------------------")
for embed_dim in [64, 32, 16]:
    for num_head in [2, 4 , 8]:
        print(f"embed_dim: {embed_dim}, num_head:{num_head}")
        train_and_plot_relevance(Model3_4, kan_neurons=3, kan_grid=5, lamb=0.0001, steps=130, 
                                 embed_dim=embed_dim, num_heads=num_head, filename=filename_4, opt_col_val=f'{embed_dim}, {num_head}')

for embed_dim in [48, 24, 12]:
    for num_head in [6]:
        print(f"embed_dim: {embed_dim}, num_head:{num_head}")
        train_and_plot_relevance(Model3_4, kan_neurons=3, kan_grid=5, lamb=0.0001, steps=130, 
                                 embed_dim=embed_dim, num_heads=num_head, filename=filename_4, opt_col_val=f'{embed_dim}, {num_head}')


print("------------------------------ kan_neurons=3, kan_grid=4, lamb=1e-05 ------------------------------")
for embed_dim in [64, 32, 16]:
    for num_head in [2, 4 , 8]:
        print(f"embed_dim: {embed_dim}, num_head:{num_head}")
        train_and_plot_relevance(Model3_4, kan_neurons=3, kan_grid=4, lamb=1e-05, steps=130, 
                                 embed_dim=embed_dim, num_heads=num_head, filename=filename_4, opt_col_val=f'{embed_dim}, {num_head}')
for embed_dim in [48, 24, 12]:
    for num_head in [6]:
        print(f"embed_dim: {embed_dim}, num_head:{num_head}")
        train_and_plot_relevance(Model3_4, kan_neurons=3, kan_grid=4, lamb=1e-05, steps=130, 
                                 embed_dim=embed_dim, num_heads=num_head, filename=filename_4, opt_col_val=f'{embed_dim}, {num_head}')



print("------------------------------ kan_neurons=4, kan_grid=4, lamb=0.01 ------------------------------")
for embed_dim in [64, 32, 16]:
    for num_head in [2, 4 , 8]:
        print(f"embed_dim: {embed_dim}, num_head:{num_head}")
        train_and_plot_relevance(Model3_4, kan_neurons=4, kan_grid=4, lamb=0.01, steps=130,
                                 embed_dim=embed_dim, num_heads=num_head, filename=filename_4, opt_col_val=f'{embed_dim}, {num_head}')

for embed_dim in [48, 24, 12]:
    for num_head in [6]:
        print(f"embed_dim: {embed_dim}, num_head:{num_head}")
        train_and_plot_relevance(Model3_4, kan_neurons=4, kan_grid=4, lamb=0.01, steps=130,
                                 embed_dim=embed_dim, num_heads=num_head, filename=filename_4, opt_col_val=f'{embed_dim}, {num_head}')


------------------------------ kan_neurons=2, kan_grid=4, lamb=1e-05 ------------------------------
embed_dim: 64, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.62e+00 | eval_loss: 3.68e+00 | reg: 2.73e+01 |: 100%|█| 130/130 [01:21<00:00,  1.59i


saving model version 0.1
Best epoch 100
3.775926113128662
M_KAN Relevance: 0.014159921556711197
CNN Relevance: 0.9858400821685791
embed_dim: 64, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.44e+00 | eval_loss: 3.73e+00 | reg: 2.24e+01 |: 100%|█| 130/130 [01:56<00:00,  1.12i


saving model version 0.1
Best epoch 32
3.732860803604126
M_KAN Relevance: 4.021665063191904e-06
CNN Relevance: 0.9999960064888
embed_dim: 64, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.44e+00 | eval_loss: 3.73e+00 | reg: 2.24e+01 |: 100%|█| 130/130 [01:56<00:00,  1.11i


saving model version 0.1
Best epoch 32
3.732860803604126
M_KAN Relevance: 4.021665063191904e-06
CNN Relevance: 0.9999960064888
embed_dim: 32, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.45e+00 | eval_loss: 3.73e+00 | reg: 1.95e+01 |: 100%|█| 130/130 [01:51<00:00,  1.16i


saving model version 0.1
Best epoch 26
3.7470808029174805
M_KAN Relevance: 0.1178840696811676
CNN Relevance: 0.88211590051651
embed_dim: 32, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.45e+00 | eval_loss: 3.73e+00 | reg: 1.95e+01 |: 100%|█| 130/130 [01:51<00:00,  1.16i


saving model version 0.1
Best epoch 26
3.7470808029174805
M_KAN Relevance: 0.1178840696811676
CNN Relevance: 0.88211590051651
embed_dim: 32, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.45e+00 | eval_loss: 3.73e+00 | reg: 1.95e+01 |: 100%|█| 130/130 [01:51<00:00,  1.16i


saving model version 0.1
Best epoch 26
3.7470808029174805
M_KAN Relevance: 0.1178840696811676
CNN Relevance: 0.88211590051651
embed_dim: 16, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.49e+00 | eval_loss: 3.67e+00 | reg: 1.74e+01 |: 100%|█| 130/130 [01:54<00:00,  1.14i


saving model version 0.1
Best epoch 46
3.7330265045166016
M_KAN Relevance: 0.5126510858535767
CNN Relevance: 0.4873489439487457
embed_dim: 16, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.54e+00 | eval_loss: 3.63e+00 | reg: 1.59e+01 |: 100%|█| 130/130 [01:19<00:00,  1.64i


saving model version 0.1
Best epoch 35
3.7410175800323486
M_KAN Relevance: 0.6010497212409973
CNN Relevance: 0.3989502489566803
embed_dim: 16, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.54e+00 | eval_loss: 3.63e+00 | reg: 1.59e+01 |: 100%|█| 130/130 [01:19<00:00,  1.63i


saving model version 0.1
Best epoch 35
3.7410175800323486
M_KAN Relevance: 0.6010497212409973
CNN Relevance: 0.3989502489566803
embed_dim: 48, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.44e+00 | eval_loss: 3.74e+00 | reg: 2.17e+01 |: 100%|█| 130/130 [01:52<00:00,  1.15i


saving model version 0.1
Best epoch 25
3.753851890563965
M_KAN Relevance: 0.06087382510304451
CNN Relevance: 0.939126193523407
embed_dim: 24, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.58e+00 | eval_loss: 3.69e+00 | reg: 1.59e+01 |: 100%|█| 130/130 [01:26<00:00,  1.50i


saving model version 0.1
Best epoch 103
3.7727386951446533
M_KAN Relevance: 0.16896185278892517
CNN Relevance: 0.8310381770133972
embed_dim: 12, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.45e+00 | eval_loss: 3.71e+00 | reg: 1.42e+01 |: 100%|█| 130/130 [01:54<00:00,  1.13i


saving model version 0.1
Best epoch 17
3.7261929512023926
M_KAN Relevance: 0.00018197229655925184
CNN Relevance: 0.9998180270195007
------------------------------ kan_neurons=3, kan_grid=5, lamb=0.0001 ------------------------------
embed_dim: 64, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.77e+00 | eval_loss: 3.85e+00 | reg: 2.06e+01 |: 100%|█| 130/130 [01:43<00:00,  1.25i


saving model version 0.1
Best epoch 129
3.9172306060791016
M_KAN Relevance: 0.24410440027713776
CNN Relevance: 0.7558956146240234
embed_dim: 64, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.77e+00 | eval_loss: 3.85e+00 | reg: 2.06e+01 |: 100%|█| 130/130 [01:43<00:00,  1.25i


saving model version 0.1
Best epoch 129
3.9172306060791016
M_KAN Relevance: 0.24410440027713776
CNN Relevance: 0.7558956146240234
embed_dim: 64, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.77e+00 | eval_loss: 3.85e+00 | reg: 2.06e+01 |: 100%|█| 130/130 [01:43<00:00,  1.25i


saving model version 0.1
Best epoch 129
3.9172306060791016
M_KAN Relevance: 0.24410440027713776
CNN Relevance: 0.7558956146240234
embed_dim: 32, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.49e+00 | eval_loss: 3.69e+00 | reg: 1.78e+01 |: 100%|█| 130/130 [01:52<00:00,  1.15i


saving model version 0.1
Best epoch 52
3.738222122192383
M_KAN Relevance: 0.2284696102142334
CNN Relevance: 0.7715303897857666
embed_dim: 32, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.49e+00 | eval_loss: 3.69e+00 | reg: 1.78e+01 |: 100%|█| 130/130 [01:53<00:00,  1.14i


saving model version 0.1
Best epoch 52
3.738222122192383
M_KAN Relevance: 0.2284696102142334
CNN Relevance: 0.7715303897857666
embed_dim: 32, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.49e+00 | eval_loss: 3.69e+00 | reg: 1.78e+01 |: 100%|█| 130/130 [01:53<00:00,  1.15i


saving model version 0.1
Best epoch 52
3.738222122192383
M_KAN Relevance: 0.2284696102142334
CNN Relevance: 0.7715303897857666
embed_dim: 16, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.54e+00 | eval_loss: 3.66e+00 | reg: 1.65e+01 |: 100%|█| 130/130 [01:47<00:00,  1.21i


saving model version 0.1
Best epoch 120
3.7498912811279297
M_KAN Relevance: 0.4190428853034973
CNN Relevance: 0.5809570550918579
embed_dim: 16, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.54e+00 | eval_loss: 3.66e+00 | reg: 1.65e+01 |: 100%|█| 130/130 [01:48<00:00,  1.20i


saving model version 0.1
Best epoch 120
3.7498912811279297
M_KAN Relevance: 0.4190428853034973
CNN Relevance: 0.5809570550918579
embed_dim: 16, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.53e+00 | eval_loss: 3.65e+00 | reg: 1.58e+01 |: 100%|█| 130/130 [01:53<00:00,  1.15i


saving model version 0.1
Best epoch 90
3.757819175720215
M_KAN Relevance: 0.40652117133140564
CNN Relevance: 0.5934788584709167
embed_dim: 48, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.52e+00 | eval_loss: 3.65e+00 | reg: 2.04e+01 |: 100%|█| 130/130 [01:51<00:00,  1.16i


saving model version 0.1
Best epoch 111
3.7439422607421875
M_KAN Relevance: 0.14022360742092133
CNN Relevance: 0.8597764372825623
embed_dim: 24, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.91e+00 | eval_loss: 3.99e+00 | reg: 2.80e+01 |: 100%|█| 130/130 [01:11<00:00,  1.83i


saving model version 0.1
Best epoch 104
4.021620750427246
M_KAN Relevance: 0.09087295085191727
CNN Relevance: 0.9091270565986633
embed_dim: 12, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.43e+00 | eval_loss: 3.73e+00 | reg: 1.63e+01 |: 100%|█| 130/130 [01:52<00:00,  1.16i


saving model version 0.1
Best epoch 56
3.7586207389831543
M_KAN Relevance: 0.06090916320681572
CNN Relevance: 0.9390907883644104
------------------------------ kan_neurons=3, kan_grid=4, lamb=1e-05 ------------------------------
embed_dim: 64, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.55e+00 | eval_loss: 3.63e+00 | reg: 3.04e+01 |: 100%|█| 130/130 [01:22<00:00,  1.58i


saving model version 0.1
Best epoch 100
3.725766658782959
M_KAN Relevance: 0.028219910338521004
CNN Relevance: 0.9717801213264465
embed_dim: 64, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.55e+00 | eval_loss: 3.63e+00 | reg: 3.04e+01 |: 100%|█| 130/130 [01:22<00:00,  1.58i


saving model version 0.1
Best epoch 100
3.725766658782959
M_KAN Relevance: 0.028219910338521004
CNN Relevance: 0.9717801213264465
embed_dim: 64, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.55e+00 | eval_loss: 3.63e+00 | reg: 3.04e+01 |: 100%|█| 130/130 [01:22<00:00,  1.57i


saving model version 0.1
Best epoch 100
3.725766658782959
M_KAN Relevance: 0.028219910338521004
CNN Relevance: 0.9717801213264465
embed_dim: 32, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.45e+00 | eval_loss: 3.71e+00 | reg: 2.02e+01 |: 100%|█| 130/130 [01:52<00:00,  1.15i


saving model version 0.1
Best epoch 28
3.7482035160064697
M_KAN Relevance: 0.30607855319976807
CNN Relevance: 0.6939213871955872
embed_dim: 32, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.45e+00 | eval_loss: 3.71e+00 | reg: 2.02e+01 |: 100%|█| 130/130 [01:52<00:00,  1.15i


saving model version 0.1
Best epoch 28
3.7482035160064697
M_KAN Relevance: 0.30607855319976807
CNN Relevance: 0.6939213871955872
embed_dim: 32, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.45e+00 | eval_loss: 3.71e+00 | reg: 2.02e+01 |: 100%|█| 130/130 [01:52<00:00,  1.15i


saving model version 0.1
Best epoch 28
3.7482035160064697
M_KAN Relevance: 0.30607855319976807
CNN Relevance: 0.6939213871955872
embed_dim: 16, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.52e+00 | eval_loss: 3.64e+00 | reg: 1.93e+01 |: 100%|█| 130/130 [01:44<00:00,  1.25i


saving model version 0.1
Best epoch 86
3.7143940925598145
M_KAN Relevance: 0.13543155789375305
CNN Relevance: 0.8645684719085693
embed_dim: 16, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.52e+00 | eval_loss: 3.64e+00 | reg: 1.93e+01 |: 100%|█| 130/130 [01:44<00:00,  1.24i


saving model version 0.1
Best epoch 86
3.7143940925598145
M_KAN Relevance: 0.13543155789375305
CNN Relevance: 0.8645684719085693
embed_dim: 16, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.52e+00 | eval_loss: 3.64e+00 | reg: 1.93e+01 |: 100%|█| 130/130 [01:45<00:00,  1.23i


saving model version 0.1
Best epoch 86
3.7143940925598145
M_KAN Relevance: 0.13543155789375305
CNN Relevance: 0.8645684719085693
embed_dim: 48, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.54e+00 | eval_loss: 3.64e+00 | reg: 2.57e+01 |: 100%|█| 130/130 [01:32<00:00,  1.41i


saving model version 0.1
Best epoch 113
3.7316043376922607
M_KAN Relevance: 0.006798543967306614
CNN Relevance: 0.9932014346122742
embed_dim: 24, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.57e+00 | eval_loss: 3.64e+00 | reg: 2.59e+01 |: 100%|█| 130/130 [01:39<00:00,  1.30i


saving model version 0.1
Best epoch 93
3.7372970581054688
M_KAN Relevance: 0.3834154009819031
CNN Relevance: 0.6165845990180969
embed_dim: 12, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.39e+00 | eval_loss: 3.82e+00 | reg: 1.51e+01 |: 100%|█| 130/130 [01:51<00:00,  1.17i


saving model version 0.1
Best epoch 19
3.7562928199768066
M_KAN Relevance: 0.3765682578086853
CNN Relevance: 0.6234317421913147
------------------------------ kan_neurons=4, kan_grid=4, lamb=0.01 ------------------------------
embed_dim: 64, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.50e+00 | eval_loss: 3.66e+00 | reg: 1.12e+01 |: 100%|█| 130/130 [01:59<00:00,  1.09i


saving model version 0.1
Best epoch 56
3.7181315422058105
M_KAN Relevance: 1.3386869795795064e-05
CNN Relevance: 0.9999866485595703
embed_dim: 64, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.50e+00 | eval_loss: 3.66e+00 | reg: 1.12e+01 |: 100%|█| 130/130 [01:58<00:00,  1.09i


saving model version 0.1
Best epoch 56
3.7181315422058105
M_KAN Relevance: 1.3386869795795064e-05
CNN Relevance: 0.9999866485595703
embed_dim: 64, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.50e+00 | eval_loss: 3.66e+00 | reg: 1.12e+01 |: 100%|█| 130/130 [01:59<00:00,  1.09i


saving model version 0.1
Best epoch 56
3.7181315422058105
M_KAN Relevance: 1.3386869795795064e-05
CNN Relevance: 0.9999866485595703
embed_dim: 32, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.47e+00 | eval_loss: 3.71e+00 | reg: 8.48e+00 |: 100%|█| 130/130 [01:53<00:00,  1.14i


saving model version 0.1
Best epoch 35
3.739335298538208
M_KAN Relevance: 0.02773342654109001
CNN Relevance: 0.9722665548324585
embed_dim: 32, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.47e+00 | eval_loss: 3.71e+00 | reg: 8.48e+00 |: 100%|█| 130/130 [01:53<00:00,  1.14i


saving model version 0.1
Best epoch 35
3.739335298538208
M_KAN Relevance: 0.02773342654109001
CNN Relevance: 0.9722665548324585
embed_dim: 32, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.39e+00 | eval_loss: 3.79e+00 | reg: 9.22e+00 |: 100%|█| 130/130 [01:50<00:00,  1.17i


saving model version 0.1
Best epoch 43
3.7501659393310547
M_KAN Relevance: 0.0026824232190847397
CNN Relevance: 0.9973176121711731
embed_dim: 16, num_head:2
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.42e+00 | eval_loss: 3.74e+00 | reg: 8.34e+00 |: 100%|█| 130/130 [01:54<00:00,  1.14i


saving model version 0.1
Best epoch 16
3.7220332622528076
M_KAN Relevance: 0.2069840133190155
CNN Relevance: 0.7930159568786621
embed_dim: 16, num_head:4
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.42e+00 | eval_loss: 3.74e+00 | reg: 8.34e+00 |: 100%|█| 130/130 [01:54<00:00,  1.14i


saving model version 0.1
Best epoch 16
3.7220332622528076
M_KAN Relevance: 0.2069840133190155
CNN Relevance: 0.7930159568786621
embed_dim: 16, num_head:8
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.42e+00 | eval_loss: 3.74e+00 | reg: 8.34e+00 |: 100%|█| 130/130 [01:54<00:00,  1.13i


saving model version 0.1
Best epoch 16
3.7220332622528076
M_KAN Relevance: 0.2069840133190155
CNN Relevance: 0.7930159568786621
embed_dim: 48, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.58e+00 | eval_loss: 3.63e+00 | reg: 2.09e+01 |: 100%|█| 130/130 [01:28<00:00,  1.47i


saving model version 0.1
Best epoch 129
3.7480971813201904
M_KAN Relevance: 0.014722379855811596
CNN Relevance: 0.9852776527404785
embed_dim: 24, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.39e+00 | eval_loss: 3.79e+00 | reg: 1.03e+01 |: 100%|█| 130/130 [01:50<00:00,  1.18i


saving model version 0.1
Best epoch 27
3.7361900806427
M_KAN Relevance: 0.10823393613100052
CNN Relevance: 0.8917660713195801
embed_dim: 12, num_head:6
checkpoint directory created: ./model
saving model version 0.0
checkpoint directory created: ./model
saving model version 0.0
Successfully created and tested Model3_4


| train_loss: 3.45e+00 | eval_loss: 3.72e+00 | reg: 7.03e+00 |: 100%|█| 130/130 [01:52<00:00,  1.16i

saving model version 0.1
Best epoch 87
3.763381004333496
M_KAN Relevance: 0.9218504428863525
CNN Relevance: 0.07814953476190567



