In [None]:
!pip install pandas numpy matplotlib tensorflow tkan==0.3.0 sigkan==0.1.5 tkat==0.1.1 scikit-learn pyarrow keras-efficient-kan keras-sig

In [None]:
import time
import numpy as np
import pandas as pd
from IPython.display import display
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, LSTM, Dense, Flatten, Input

from tkan import TKAN
from tkat import TKAT
from sigkan import SigKAN
from sigtkan import SigTKAN

from sklearn.metrics import r2_score

tf.keras.utils.set_random_seed(1)
tf.config.experimental.enable_op_determinism()

In [None]:
class MinMaxScaler:
    def __init__(self, feature_axis=None, minmax_range=(0, 1)):
        """
        Initialize the MinMaxScaler.
        Args:
        feature_axis (int, optional): The axis that represents the feature dimension if applicable.
                                      Use only for 3D data to specify which axis is the feature axis.
                                      Default is None, automatically managed based on data dimensions.
        """
        self.feature_axis = feature_axis
        self.min_ = None
        self.max_ = None
        self.scale_ = None
        self.minmax_range = minmax_range # Default range for scaling (min, max)

    def fit(self, X):
        """
        Fit the scaler to the data based on its dimensionality.
        Args:
        X (np.array): The data to fit the scaler on.
        """
        if X.ndim == 3 and self.feature_axis is not None:  # 3D data
            axis = tuple(i for i in range(X.ndim) if i != self.feature_axis)
            self.min_ = np.min(X, axis=axis)
            self.max_ = np.max(X, axis=axis)
        elif X.ndim == 2:  # 2D data
            self.min_ = np.min(X, axis=0)
            self.max_ = np.max(X, axis=0)
        elif X.ndim == 1:  # 1D data
            self.min_ = np.min(X)
            self.max_ = np.max(X)
        else:
            raise ValueError("Data must be 1D, 2D, or 3D.")

        self.scale_ = self.max_ - self.min_
        return self

    def transform(self, X):
        """
        Transform the data using the fitted scaler.
        Args:
        X (np.array): The data to transform.
        Returns:
        np.array: The scaled data.
        """
        X_scaled = (X - self.min_) / self.scale_
        X_scaled = X_scaled * (self.minmax_range[1] - self.minmax_range[0]) + self.minmax_range[0]
        return X_scaled

    def fit_transform(self, X):
        """
        Fit to data, then transform it.
        Args:
        X (np.array): The data to fit and transform.
        Returns:
        np.array: The scaled data.
        """
        return self.fit(X).transform(X)

    def inverse_transform(self, X_scaled):
        """
        Inverse transform the scaled data to original data.
        Args:
        X_scaled (np.array): The scaled data to inverse transform.
        Returns:
        np.array: The original data scale.
        """
        X = (X_scaled - self.minmax_range[0]) / (self.minmax_range[1] - self.minmax_range[0])
        X = X * self.scale_ + self.min_
        return X

In [None]:
df = pd.read_parquet('/workspace/data.parquet')
df = df[(df.index >= pd.Timestamp('2020-01-01')) & (df.index < pd.Timestamp('2023-01-01'))]
assets = ['BTC', 'ETH', 'ADA', 'XMR', 'EOS', 'MATIC', 'TRX', 'FTM', 'BNB', 'XLM', 'ENJ', 'CHZ', 'BUSD', 'ATOM', 'LINK', 'ETC', 'XRP', 'BCH', 'LTC']
df = df[[c for c in df.columns if 'quote asset volume' in c and any(asset in c for asset in assets)]]
df.columns = [c.replace(' quote asset volume', '') for c in df.columns]
known_input_df = pd.DataFrame(index=df.index, data=np.array([df.reset_index()['group'].apply(lambda x: (x.hour)).values, df.reset_index()['group'].apply(lambda x: (x.dayofweek)).values]).T, columns = ['hour', 'dayofweek'])
display(df)
display(known_input_df)

In [None]:
N_MAX_EPOCHS = 100
BATCH_SIZE = 128
early_stopping_callback = lambda : tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0.00001,
    patience=6,
    mode="min",
    restore_best_weights=True,
    start_from_epoch=6,
)
lr_callback = lambda : tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.25,
    patience=3,
    mode="min",
    min_delta=0.00001,
    min_lr=0.000025,
    verbose=0,
)
callbacks = lambda : [early_stopping_callback(), lr_callback(), tf.keras.callbacks.TerminateOnNaN()]

In [None]:
def generate_data(df, sequence_length, n_ahead):
    #Case without known inputs
    scaler_df = df.copy().shift(n_ahead).rolling(24 * 14).median()
    tmp_df = df.copy() / scaler_df
    tmp_df = tmp_df.iloc[24 * 14 + n_ahead:].fillna(0.)
    scaler_df = scaler_df.iloc[24 * 14 + n_ahead:].fillna(0.)
    def prepare_sequences(df, scaler_df, n_history, n_future):
        X, y, y_scaler = [], [], []
        num_features = df.shape[1]
        
        # Iterate through the DataFrame to create sequences
        for i in range(n_history, len(df) - n_future + 1):
            # Extract the sequence of past observations
            X.append(df.iloc[i - n_history:i].values)
            # Extract the future values of the first column
            y.append(df.iloc[i:i + n_future,0:1].values)
            y_scaler.append(scaler_df.iloc[i:i + n_future,0:1].values)
        
        X, y, y_scaler = np.array(X), np.array(y), np.array(y_scaler)
        return X, y, y_scaler
    
    # Prepare sequences
    X, y, y_scaler = prepare_sequences(tmp_df, scaler_df, sequence_length, n_ahead)
    
    # Split the dataset into training and testing sets
    train_test_separation = int(len(X) * 0.8)
    X_train_unscaled, X_test_unscaled = X[:train_test_separation], X[train_test_separation:]
    y_train_unscaled, y_test_unscaled = y[:train_test_separation], y[train_test_separation:]
    y_scaler_train, y_scaler_test = y_scaler[:train_test_separation], y_scaler[train_test_separation:]
    
    # Generate the data
    X_scaler = MinMaxScaler(feature_axis=2)
    X_train = X_scaler.fit_transform(X_train_unscaled)
    X_test = X_scaler.transform(X_test_unscaled)
    
    y_scaler = MinMaxScaler(feature_axis=2)
    y_train = y_scaler.fit_transform(y_train_unscaled)
    y_test = y_scaler.transform(y_test_unscaled)
    
    y_train = y_train.reshape(y_train.shape[0], -1) 
    y_test = y_test.reshape(y_test.shape[0], -1)
    return X_scaler, X_train, X_test, X_train_unscaled, X_test_unscaled, y_scaler, y_train, y_test, y_train_unscaled, y_test_unscaled, y_scaler_train, y_scaler_test

def generate_data_w_known_inputs(df, known_input_df, sequence_length, n_ahead):
    #Case without known inputs - fill with 0 the unknown features future values in X
    scaler_df = df.copy().shift(n_ahead).rolling(24 * 14).median()
    tmp_df = df.copy() / scaler_df
    tmp_df = tmp_df.iloc[24 * 14 + n_ahead:].fillna(0.)
    scaler_df = scaler_df.iloc[24 * 14 + n_ahead:].fillna(0.)
    tmp_known_input_df = known_input_df.iloc[24 * 14 + n_ahead:].copy()
    def prepare_sequences(df, known_input_df, scaler_df, n_history, n_future):
        Xu, Xk, y, y_scaler = [], [], [], []
        num_features = df.shape[1]
        
        # Iterate through the DataFrame to create sequences
        for i in range(n_history, len(df) - n_future + 1):
            # Extract the sequence of past observations
            Xu.append(np.concatenate((df.iloc[i - n_history:i].values, np.zeros((n_future, df.shape[1]))), axis=0))
            Xk.append(known_input_df.iloc[i - n_history:i+n_future].values)
            # Extract the future values of the first column
            y.append(df.iloc[i:i + n_future,0:1].values)
            y_scaler.append(scaler_df.iloc[i:i + n_future,0:1].values)
        
        Xu, Xk, y, y_scaler = np.array(Xu), np.array(Xk), np.array(y), np.array(y_scaler)
        return Xu, Xk, y, y_scaler
    
    # Prepare sequences
    Xu, Xk, y, y_scaler = prepare_sequences(tmp_df, tmp_known_input_df, scaler_df, sequence_length, n_ahead)

    X = np.concatenate((Xu, Xk), axis=-1)
    
    # Split the dataset into training and testing sets
    train_test_separation = int(len(X) * 0.8)
    X_train_unscaled, X_test_unscaled = X[:train_test_separation], X[train_test_separation:]
    y_train_unscaled, y_test_unscaled = y[:train_test_separation], y[train_test_separation:]
    y_scaler_train, y_scaler_test = y_scaler[:train_test_separation], y_scaler[train_test_separation:]
    
    # Generate the data
    X_scaler = MinMaxScaler(feature_axis=2)
    X_train = X_scaler.fit_transform(X_train_unscaled)
    X_test = X_scaler.transform(X_test_unscaled)
    
    y_scaler = MinMaxScaler(feature_axis=2)
    y_train = y_scaler.fit_transform(y_train_unscaled)
    y_test = y_scaler.transform(y_test_unscaled)
    
    y_train = y_train.reshape(y_train.shape[0], -1) 
    y_test = y_test.reshape(y_test.shape[0], -1)
    return X_scaler, X_train, X_test, X_train_unscaled, X_test_unscaled, y_scaler, y_train, y_test, y_train_unscaled, y_test_unscaled, y_scaler_train, y_scaler_test

In [None]:
n_ahead = 30
sequence_length = 5 * n_ahead

X_scaler, X_train, X_test, X_train_unscaled, X_test_unscaled, y_scaler, y_train, y_test, y_train_unscaled, y_test_unscaled, y_scaler_train, y_scaler_test = generate_data(df, sequence_length, n_ahead)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# SigTKAN usage

In [None]:
num_unknow_features = len(assets)
num_know_features = X_train.shape[2] - num_unknow_features

# Define SigTKAN model with signature level and KAN configurations
model = Sequential([
    Input(shape=X_train.shape[1:]),
    SigTKAN(
        units=100, 
        sig_level=2,  # Signature truncation level
        sub_kan_configs=[{'grid_size': 3} for _ in range(4)],  # KAN layer configurations
        sub_kan_output_dim=20,
        sub_kan_input_dim=X_train.shape[2],
        dropout=0.1,
        return_sequences=False
    ),
    Dense(100, 'relu'),
    Dense(units=n_ahead, activation='linear')
])

model.compile(optimizer='adam', loss='mean_squared_error', jit_compile=False)

model.summary()

history = model.fit(
    X_train, y_train, 
    batch_size=BATCH_SIZE, 
    epochs=N_MAX_EPOCHS, 
    validation_split=0.2, 
    callbacks=callbacks(), 
    shuffle=True, 
    verbose=False
)

preds = model.predict(X_test).flatten()
errors = preds - y_test.flatten()
rmse = np.sqrt(np.mean(np.square(errors)))
r2 = r2_score(y_true=y_test.flatten(), y_pred=preds)
mae = np.mean(np.abs(errors))

metrics_summary = f"""
Model Type: SigTKAN
------------------------------------
Root Mean Squared Error (RMSE): {rmse:.4f}
R-squared (R²) Score: {r2:.4f}
Mean Absolute Error (MAE): {mae:.4f}
"""
print(metrics_summary)

all_errors = {}
preds = model.predict(X_test)
errors = preds - y_test
all_errors['SigTKAN'] = errors

## Other example comparison

In [None]:
models = ['SigKAN', 'TKAT', 'TKAN', 'MLP', 'GRU', 'LSTM']

for model_type in models:

    if model_type == "TKAT":
        X_scaler, X_train, X_test, X_train_unscaled, X_test_unscaled, y_scaler, y_train, y_test, y_train_unscaled, y_test_unscaled, y_scaler_train, y_scaler_test = generate_data_w_known_inputs(df, known_input_df, sequence_length, n_ahead)
    else:
        X_scaler, X_train, X_test, X_train_unscaled, X_test_unscaled, y_scaler, y_train, y_test, y_train_unscaled, y_test_unscaled, y_scaler_train, y_scaler_test = generate_data(df, sequence_length, n_ahead)

    if model_type == "TKAT":
        num_unknow_features = len(assets)
        num_know_features = X_train.shape[2] - num_unknow_features
        model = TKAT(sequence_length, num_unknow_features, num_know_features, 1, 100, 4, n_ahead, use_tkan=True)
    elif model_type == 'SigKAN':
        model = Sequential([
            Input(shape=X_train.shape[1:]),
            SigKAN(100, 2, dropout=0.),
            Flatten(),
            Dense(100, 'relu'),
            Dense(units=n_ahead, activation='linear')
        ])
    elif 'TKAN' in model_type:
        model = Sequential([
            Input(shape=X_train.shape[1:]),
            TKAN(100, tkan_activations=[{'grid_size': 3} for i in range(5)], sub_kan_output_dim=20, sub_kan_input_dim=1, return_sequences=True),
            TKAN(100, tkan_activations=[{'grid_size': 3} for i in range(5)], sub_kan_output_dim=20, sub_kan_input_dim=1, return_sequences=False),
            Dense(units=n_ahead, activation='linear')
        ])
    elif 'GRU' in model_type:
        model = Sequential([
            Input(shape=X_train.shape[1:]),
            GRU(100, return_sequences=True),
            GRU(100, return_sequences=False),
            Dense(units=n_ahead, activation='linear')
        ])
    elif 'LSTM' in model_type:
        model = Sequential([
            Input(shape=X_train.shape[1:]),
            LSTM(100, return_sequences=True),
            LSTM(100, return_sequences=False),
            Dense(units=n_ahead, activation='linear')
        ])
    elif 'MLP' in model_type:
        model = Sequential([
            Input(shape=X_train.shape[1:]),
            Flatten(),
            Dense(100, activation='relu'),
            Dense(100, activation='relu'),
            Dense(units=n_ahead, activation='linear')
        ])
    
    optimizer = tf.keras.optimizers.Adam(0.001)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    model.summary()

    history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=N_MAX_EPOCHS, validation_split=0.2, callbacks=callbacks(), shuffle=True, verbose=False)
    preds = model.predict(X_test)
    errors = preds - y_test
    all_errors[model_type] = errors

In [None]:
model_types = ['SigTKAN', 'SigKAN', 'TKAT', 'TKAN', 'MLP', 'GRU', 'LSTM']
grey_shades = ['#FF0000', '#252525', '#404040', '#525252', '#737373', '#969696', '#bdbdbd']  # Red for SigTKAN, then darker to lighter

for model_type, color in zip(model_types, grey_shades):
    if model_type in all_errors:
        y_pred = all_errors[model_type] + y_test
        r2 = r2_score(y_true=y_test.flatten(), y_pred=y_pred.flatten())
        plt.plot(np.mean(all_errors[model_type]**2, axis=0), label=f'{model_type}: R2={round(r2,4)}', color=color, linewidth=2 if model_type == 'SigTKAN' else 1)

plt.legend()
plt.title('Model comparison with SigTKAN - Errors based on number of steps forward')
plt.xlabel('Number of steps forward')
plt.ylabel('Mean Squared Error')
plt.grid(True, alpha=0.3)
plt.savefig('model_and_errors_sigtkan.png')
plt.show()

## SigTKAN Architecture Analysis

In [None]:
# Analyze SigTKAN specific features
print("SigTKAN Architecture Benefits:")
print("1. Signature Transform: Captures path-dependent features from sequential data")
print("2. Temporal KAN: Adaptive activation functions that learn optimal transformations")
print("3. Recurrent Structure: Maintains temporal dependencies while processing signatures")
print("4. Multi-scale Processing: Multiple sub-KAN layers for different feature aspects")

# Compare computational complexity
print("\nModel Complexity Comparison:")
for model_type in ['SigTKAN', 'SigKAN', 'TKAN', 'GRU', 'LSTM']:
    if model_type in all_errors:
        mse = np.mean(all_errors[model_type]**2)
        r2_val = r2_score(y_true=y_test.flatten(), y_pred=(all_errors[model_type] + y_test).flatten())
        print(f"{model_type:>8}: MSE={mse:.6f}, R²={r2_val:.4f}")

In [None]:
# Test different SigTKAN configurations
print("Testing SigTKAN with different signature levels...")

sigtkan_configs = [
    {'sig_level': 1, 'label': 'SigTKAN (sig_level=1)'},
    {'sig_level': 2, 'label': 'SigTKAN (sig_level=2)'},
    {'sig_level': 3, 'label': 'SigTKAN (sig_level=3)'},
]

sigtkan_results = {}

for config in sigtkan_configs:
    print(f"\nTraining {config['label']}...")
    
    model = Sequential([
        Input(shape=X_train.shape[1:]),
        SigTKAN(
            units=100,
            sig_level=config['sig_level'],
            sub_kan_configs=[{'grid_size': 3} for _ in range(4)],
            sub_kan_output_dim=20,
            sub_kan_input_dim=X_train.shape[2],
            dropout=0.1,
            return_sequences=False
        ),
        Dense(100, 'relu'),
        Dense(units=n_ahead, activation='linear')
    ])
    
    model.compile(optimizer='adam', loss='mean_squared_error', jit_compile=False)
    
    history = model.fit(
        X_train, y_train,
        batch_size=BATCH_SIZE,
        epochs=N_MAX_EPOCHS,
        validation_split=0.2,
        callbacks=callbacks(),
        shuffle=True,
        verbose=False
    )
    
    preds = model.predict(X_test)
    errors = preds - y_test
    r2_val = r2_score(y_true=y_test.flatten(), y_pred=preds.flatten())
    mse = np.mean(errors**2)
    
    sigtkan_results[config['label']] = {
        'errors': errors,
        'r2': r2_val,
        'mse': mse
    }
    
    print(f"R² Score: {r2_val:.4f}, MSE: {mse:.6f}")

# Plot comparison of different SigTKAN configurations
plt.figure(figsize=(12, 6))
colors = ['#FF0000', '#FF6666', '#FF9999']

for i, (label, results) in enumerate(sigtkan_results.items()):
    plt.plot(
        np.mean(results['errors']**2, axis=0),
        label=f"{label}: R²={results['r2']:.4f}",
        color=colors[i],
        linewidth=2
    )

plt.legend()
plt.title('SigTKAN Performance with Different Signature Levels')
plt.xlabel('Number of steps forward')
plt.ylabel('Mean Squared Error')
plt.grid(True, alpha=0.3)
plt.savefig('sigtkan_signature_levels_comparison.png')
plt.show()

In [None]:
# Final performance summary
print("=" * 60)
print("FINAL PERFORMANCE SUMMARY")
print("=" * 60)

print("\nBest performing models:")
performance_data = []

# Add main comparison models
for model_type in ['SigTKAN', 'SigKAN', 'TKAT', 'TKAN', 'MLP', 'GRU', 'LSTM']:
    if model_type in all_errors:
        y_pred = all_errors[model_type] + y_test
        r2_val = r2_score(y_true=y_test.flatten(), y_pred=y_pred.flatten())
        mse = np.mean(all_errors[model_type]**2)
        performance_data.append((model_type, r2_val, mse))

# Add SigTKAN variants
for label, results in sigtkan_results.items():
    performance_data.append((label, results['r2'], results['mse']))

# Sort by R² score (descending)
performance_data.sort(key=lambda x: x[1], reverse=True)

for i, (model, r2_val, mse) in enumerate(performance_data, 1):
    print(f"{i:2d}. {model:<25} | R²: {r2_val:7.4f} | MSE: {mse:.6f}")

print("\n" + "=" * 60)
print(f"SigTKAN demonstrates {'superior' if performance_data[0][0].startswith('SigTKAN') else 'competitive'} performance")
print("combining signature transforms with temporal KAN processing.")
print("=" * 60)