# Function Fitting

In [None]:
import jax
import jax.numpy as jnp

from src.functions import *
from src.utils import *
from src.std_kan import StdKAN

from jaxkan.KAN import KAN

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import optax
from flax import nnx

import os

# Create the directory if it doesn't exist
results_dir = "ff_results"
os.makedirs(results_dir, exist_ok=True)

## Grid Search Parameters

In [None]:
func_dict = {"f1": f1, "f2": f2, "f3": f3, "f4": f4, "f5": f5}

N = 5000
seed = 42

num_epochs = 2000

opt_type = optax.adam(learning_rate=0.001)

G_values = [5, 10, 20, 40]
widths = [2, 4, 8, 16, 32, 64]
depths = [1, 2, 3, 4]

## Baseline Results

In [None]:
experiment_name = "baseline"
results_file = os.path.join(results_dir, f"{experiment_name}.txt")

# Define the headers
header = "function, G, width, depth, run, loss, mse, l2"

# Check if the file exists and write the header if it doesn't
if not os.path.exists(results_file):
    with open(results_file, "w") as file:
        file.write(header + "\n")

In [None]:
# Procedure
for func_name in func_dict.keys():
    print(f"Running Experiments for {func_name} function.")
    function = func_dict[func_name]

    # Generate data
    x, y = generate_func_data(function, 2, N, seed)

    # Split data in case we also want to use test mse as final metric
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)

    # Model input/output
    n_in, n_out = X_train.shape[1], y_train.shape[1]

    # Grid search
    for G in G_values:
        print(f"\tUsing G = {G}.")

        for depth in depths:
            for width in widths:

                hidden = [width]*depth
                layer_dims = [n_in, *hidden, n_out]

                req_params = {'k': 3, 'G': G, 'grid_range': (-1.0, 1.0), 'grid_e': 1.0, 'residual': nnx.silu, 'external_weights': True, 'add_bias': True,
                              'init_scheme': {'type': 'default'}}
                
                print(f"\t\tTraining model with dimensions {layer_dims}.")

                for run in [1, 2, 3, 4, 5]:

                    model = KAN(layer_dims = layer_dims, layer_type = 'spline', required_parameters = req_params, seed = seed+run)
                    optimizer = nnx.Optimizer(model, opt_type)
                
                    # Train
                    for epoch in range(num_epochs):
                        train_loss = func_fit_step(model, optimizer, X_train, y_train)
                
                    # Evaluate
                    y_pred = model(X_test)
                    test_mse = mean_squared_error(y_test, y_pred)
                
                    l2error = func_fit_eval(model, function, 2, resolution=200, make_plot=False)
                
                    # Log results
                    new_row = f"{func_name}, {G}, {width}, {depth}, {run}, {train_loss}, {test_mse}, {l2error}"
                                    
                    # Append the row to the file
                    with open(results_file, "a") as rfile:
                        rfile.write(new_row + "\n")

                    print(f"\t\t\t{run}. Final loss: {train_loss:.2e} \tTest MSE: {test_mse:.2e} \tRel. L2 Error: {l2error:.2e}")

## LeCun Results

In [None]:
experiment_name = "lecun"
results_file = os.path.join(results_dir, f"{experiment_name}.txt")

# Define the headers
header = "function, G, width, depth, run, loss, mse, l2"

# Check if the file exists and write the header if it doesn't
if not os.path.exists(results_file):
    with open(results_file, "w") as file:
        file.write(header + "\n")

In [None]:
# Procedure
for func_name in func_dict.keys():
    print(f"Running Experiments for {func_name} function.")
    function = func_dict[func_name]

    # Generate data
    x, y = generate_func_data(function, 2, N, seed)

    # Split data in case we also want to use test mse as final metric
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)

    # Model input/output
    n_in, n_out = X_train.shape[1], y_train.shape[1]

    # Grid search
    for G in G_values:
        print(f"\tUsing G = {G}.")

        for depth in depths:
            for width in widths:

                hidden = [width]*depth
                layer_dims = [n_in, *hidden, n_out]

                req_params = {'k': 3, 'G': G, 'grid_range': (-1.0, 1.0), 'grid_e': 1.0, 'residual': nnx.silu, 'external_weights': True, 'add_bias': True,
                              'init_scheme': {'type': 'lecun', 'gain': None, 'distribution':'uniform'}}
                
                print(f"\t\tTraining model with dimensions {layer_dims}.")

                for run in [1, 2, 3, 4, 5]:

                    model = KAN(layer_dims = layer_dims, layer_type = 'spline', required_parameters = req_params, seed = seed+run)
                    optimizer = nnx.Optimizer(model, opt_type)
                
                    # Train
                    for epoch in range(num_epochs):
                        train_loss = func_fit_step(model, optimizer, X_train, y_train)
                
                    # Evaluate
                    y_pred = model(X_test)
                    test_mse = mean_squared_error(y_test, y_pred)
                
                    l2error = func_fit_eval(model, function, 2, resolution=200, make_plot=False)
                
                    # Log results
                    new_row = f"{func_name}, {G}, {width}, {depth}, {run}, {train_loss}, {test_mse}, {l2error}"
                                    
                    # Append the row to the file
                    with open(results_file, "a") as rfile:
                        rfile.write(new_row + "\n")

                    print(f"\t\t\t{run}. Final loss: {train_loss:.2e} \tTest MSE: {test_mse:.2e} \tRel. L2 Error: {l2error:.2e}")

## Glorot Results

In [None]:
experiment_name = "glorot"
results_file = os.path.join(results_dir, f"{experiment_name}.txt")

# Define the headers
header = "function, G, width, depth, run, loss, mse, l2"

# Check if the file exists and write the header if it doesn't
if not os.path.exists(results_file):
    with open(results_file, "w") as file:
        file.write(header + "\n")

In [None]:
# Procedure
for func_name in func_dict.keys():
    print(f"Running Experiments for {func_name} function.")
    function = func_dict[func_name]

    # Generate data
    x, y = generate_func_data(function, 2, N, seed)

    # Split data in case we also want to use test mse as final metric
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)

    # Model input/output
    n_in, n_out = X_train.shape[1], y_train.shape[1]

    # Grid search
    for G in G_values:
        print(f"\tUsing G = {G}.")

        for depth in depths:
            for width in widths:

                hidden = [width]*depth
                layer_dims = [n_in, *hidden, n_out]

                req_params = {'k': 3, 'G': G, 'grid_range': (-1.0, 1.0), 'grid_e': 1.0, 'residual': nnx.silu, 'external_weights': True, 'add_bias': True,
                              'init_scheme': {'type': 'glorot', 'gain': None, 'distribution':'uniform', 'sample_size': 10000}}
                
                print(f"\t\tTraining model with dimensions {layer_dims}.")

                for run in [1, 2, 3, 4, 5]:

                    model = KAN(layer_dims = layer_dims, layer_type = 'spline', required_parameters = req_params, seed = seed+run)
                    optimizer = nnx.Optimizer(model, opt_type)
                
                    # Train
                    for epoch in range(num_epochs):
                        train_loss = func_fit_step(model, optimizer, X_train, y_train)
                
                    # Evaluate
                    y_pred = model(X_test)
                    test_mse = mean_squared_error(y_test, y_pred)
                
                    l2error = func_fit_eval(model, function, 2, resolution=200, make_plot=False)
                
                    # Log results
                    new_row = f"{func_name}, {G}, {width}, {depth}, {run}, {train_loss}, {test_mse}, {l2error}"
                                    
                    # Append the row to the file
                    with open(results_file, "a") as rfile:
                        rfile.write(new_row + "\n")

                    print(f"\t\t\t{run}. Final loss: {train_loss:.2e} \tTest MSE: {test_mse:.2e} \tRel. L2 Error: {l2error:.2e}")

## Custom standardization results

In [None]:
experiment_name = "std"
results_file = os.path.join(results_dir, f"{experiment_name}.txt")

# Define the headers
header = "function, G, width, depth, run, loss, mse, l2"

# Check if the file exists and write the header if it doesn't
if not os.path.exists(results_file):
    with open(results_file, "w") as file:
        file.write(header + "\n")

In [None]:
# Procedure
for func_name in func_dict.keys():
    print(f"Running Experiments for {func_name} function.")
    function = func_dict[func_name]

    # Generate data
    x, y = generate_func_data(function, 2, N, seed)

    # Split data in case we also want to use test mse as final metric
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)

    # Model input/output
    n_in, n_out = X_train.shape[1], y_train.shape[1]

    # Grid search
    for G in G_values:
        print(f"\tUsing G = {G}.")

        for depth in depths:
            for width in widths:

                hidden = [width]*depth
                layer_dims = [n_in, *hidden, n_out]

                req_params = {'k': 3, 'G': G, 'grid_range': (-1.0, 1.0), 'grid_e': 1.0, 'residual': nnx.silu, 'external_weights': True, 'add_bias': True,
                              'init_scheme': {'gain': None, 'distribution':'uniform'}}
                
                print(f"\t\tTraining model with dimensions {layer_dims}.")

                for run in [1, 2, 3, 4, 5]:

                    model = StdKAN(layer_dims = layer_dims, required_parameters = req_params, seed = seed+run)
                    optimizer = nnx.Optimizer(model, opt_type)
                
                    # Train
                    for epoch in range(num_epochs):
                        train_loss = func_fit_step(model, optimizer, X_train, y_train)
                
                    # Evaluate
                    y_pred = model(X_test)
                    test_mse = mean_squared_error(y_test, y_pred)
                
                    l2error = func_fit_eval(model, function, 2, resolution=200, make_plot=False)
                
                    # Log results
                    new_row = f"{func_name}, {G}, {width}, {depth}, {run}, {train_loss}, {test_mse}, {l2error}"
                                    
                    # Append the row to the file
                    with open(results_file, "a") as rfile:
                        rfile.write(new_row + "\n")

                    print(f"\t\t\t{run}. Final loss: {train_loss:.2e} \tTest MSE: {test_mse:.2e} \tRel. L2 Error: {l2error:.2e}")

## Empirical Power Law Results

In [None]:
pows_basis = [0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.00]
pows_res = [0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.00]

In [None]:
experiment_name = "power"
results_file = os.path.join(results_dir, f"{experiment_name}.txt")

# Define the headers
header = "function, G, width, depth, pow_basis, pow_res, run, loss, mse, l2"

# Check if the file exists and write the header if it doesn't
if not os.path.exists(results_file):
    with open(results_file, "w") as file:
        file.write(header + "\n")

In [None]:
# Procedure
for func_name in func_dict.keys():
    print(f"Running Experiments for {func_name} function.")
    function = func_dict[func_name]

    # Generate data
    x, y = generate_func_data(function, 2, N, seed)

    # Split data in case we also want to use test mse as final metric
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)

    # Model input/output
    n_in, n_out = X_train.shape[1], y_train.shape[1]

    # Grid search
    for G in G_values:
        print(f"\tUsing G = {G}.")

        for depth in depths:
            for width in widths:

                hidden = [width]*depth
                layer_dims = [n_in, *hidden, n_out]
                
                print(f"\t\tTraining model with dimensions {layer_dims}.")

                for pow_basis in pows_basis:

                    for pow_res in pows_res:

                        req_params = {'k': 3, 'G': G, 'grid_range': (-1.0, 1.0), 'grid_e': 1.0, 'residual': nnx.silu, 'external_weights': True, 'add_bias': True,
                                      'init_scheme': {'type': 'power', "const_b": 1.0, "const_r": 1.0, "pow_b1": pow_basis, "pow_b2": pow_basis, "pow_r1": pow_res, "pow_r2": pow_res}}

                        print(f"\t\t\tWorking with pow_basis = {pow_basis} and pow_res = {pow_res}.")
                        
                        for run in [1, 2, 3]:
        
                            model = KAN(layer_dims = layer_dims, layer_type = 'spline', required_parameters = req_params, seed = seed+run)
                            optimizer = nnx.Optimizer(model, opt_type)
                        
                            # Train
                            for epoch in range(num_epochs):
                                train_loss = func_fit_step(model, optimizer, X_train, y_train)
                        
                            # Evaluate
                            y_pred = model(X_test)
                            test_mse = mean_squared_error(y_test, y_pred)
                        
                            l2error = func_fit_eval(model, function, 2, resolution=200, make_plot=False)
                        
                            # Log results
                            new_row = f"{func_name}, {G}, {width}, {depth}, {pow_res}, {pow_basis}, {run}, {train_loss}, {test_mse}, {l2error}"
                                            
                            # Append the row to the file
                            with open(results_file, "a") as rfile:
                                rfile.write(new_row + "\n")
        
                            print(f"\t\t\t\t{run}. Final loss: {train_loss:.2e} \tTest MSE: {test_mse:.2e} \tRel. L2 Error: {l2error:.2e}")