In [1]:
import time
import tqdm

import numpy as np
import pandas as pd

import torch
from torch import nn, optim

from IPython.display import display
import matplotlib.pyplot as plt

from torchkan.data import timeseries
from torchkan.models import TKAT, TKAN, ReLUKAN

In [2]:
num_hidden = 64
num_heads = 4
num_embedding = 1
n_ahead = 30
sequence_length = 5 * n_ahead
sequence_length

150

In [3]:
def get_crypto_dataloaders(
    path = "torchkan/data/data.parquet", 
    n_ahead = 30,
    batch_size=16
):
    # Load California housing dataset
    df = timeseries.load_crypto(path)

    known_input_df = pd.DataFrame(
        index=df.index, 
        data=np.array([
            df.reset_index()['group'].apply(lambda x: (x.hour)).values, 
            df.reset_index()['group'].apply(lambda x: (x.dayofweek)).values
        ]).T, 
        columns = ['hour', 'dayofweek'])
    
    X_scaler, X_train, X_test, \
        X_train_unscaled, X_test_unscaled, \
            y_scaler, y_train, y_test, \
                y_train_unscaled, y_test_unscaled, \
                    y_scaler_train, y_scaler_test = \
        timeseries.generate_data_w_known_inputs(df, 
                                                known_input_df,
                                                sequence_length, 
                                                n_ahead)

    # Create data loaders (optional, if you want to batch and shuffle the data)
    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_train, y_train), 
        batch_size=batch_size, 
        shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_test, y_test), 
        batch_size=batch_size, 
        shuffle=False)

    return train_loader, test_loader

In [4]:
train_loader, test_loader = get_crypto_dataloaders(n_ahead=n_ahead)
[t.shape for t in train_loader.dataset.tensors], [t.shape for t in test_loader.dataset.tensors]

  return torch.tensor(data, dtype=dtype).to(device)


([torch.Size([20607, 180, 21]), torch.Size([20607, 30])],
 [torch.Size([5152, 180, 21]), torch.Size([5152, 30])])

In [5]:
shape = train_loader.dataset.tensors[0].shape
torch.split(train_loader.dataset.tensors[0], (1,) * shape[-1], -1)[0].shape

torch.Size([20607, 180, 1])

In [6]:
train_loader.dataset.tensors[0][:, :, 0:1].shape

torch.Size([20607, 180, 1])

In [7]:
model = TKAN(input_size=sequence_length+n_ahead, hidden_size=num_hidden)
model

TKAN(
  (tkan_cells): ModuleList(
    (0): TKANCell(
      (tkan_sub_layers): ModuleList(
        (0): KANLinear(
          (linear): Linear(in_features=180, out_features=180, bias=True)
          (layer_norm): LayerNorm((180,), eps=1e-05, elementwise_affine=True)
        )
      )
    )
  )
)

In [8]:
model = TKAT(
            sequence_length=sequence_length, 
            num_unknown_features=19, 
            num_known_features=2, 
            num_embedding=num_embedding, 
            num_hidden=num_hidden, 
            num_heads=num_heads, 
            n_ahead=n_ahead, 
            use_tkan=True
        )
model

[GRN] in_size: 21, hidden_size: 64, out_size: 21
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_

TKAT(
  (embedding_layer): EmbeddingLayer(
    (dense_layers): ModuleList(
      (0-20): 21 x Linear(in_features=1, out_features=1, bias=True)
    )
  )
  (vsn_past_features): VariableSelectionNetwork(
    (mlp_dense): GRN(
      (skip_layer): Linear(in_features=21, out_features=21, bias=True)
      (hidden_layer_1): Sequential(
        (0): Linear(in_features=21, out_features=64, bias=True)
        (1): ELU(alpha=1.0)
      )
      (hidden_layer_2): Linear(in_features=64, out_features=64, bias=True)
      (gate_layer): Gate(
        (dense_layer): Linear(in_features=64, out_features=21, bias=True)
        (gated_layer): Linear(in_features=64, out_features=21, bias=True)
      )
      (add_and_norm_layer): AddAndNorm(
        (norm_layer): LayerNorm((21,), eps=1e-05, elementwise_affine=True)
      )
    )
    (grn_layers): ModuleList(
      (0-20): 21 x GRN(
        (skip_layer): Linear(in_features=1, out_features=64, bias=True)
        (hidden_layer_1): Sequential(
          (0): Line

In [12]:
model = ReLUKAN([180,1], 5, 3)
model

ReLUKAN(
  (rk_layers): ModuleList(
    (0): ReLUKANLayer(
      (conv): Conv2d(1, 1, kernel_size=(8, 180), stride=(1, 1))
    )
  )
)

In [19]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters: {total_params}")

#criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()

# LBFGS is really slow
# optimizer = optim.LBFGS(model.parameters(), lr=0.01)
# Adam works with very low lr
optimizer = optim.Adam(model.parameters(), lr=0.0002)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Total trainable parameters: 4321


In [30]:

def train(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    bar = tqdm.tqdm(enumerate(train_loader))
    for idx, (data, target) in bar:
        data, target = data[...,0:1].to(device), target[...,0:1].to(device)
        #print(f"data: {data.shape}")

        if isinstance(optimizer, optim.LBFGS):
            def closure():
                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                return loss
            loss = optimizer.step(closure)
        else:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            loss = loss.item()
            
        total_loss += loss
        bar.set_postfix({"loss": loss, "avg_loss": total_loss / (idx+1)})
        
    return total_loss / len(train_loader)

def validate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        bar = tqdm.tqdm(test_loader)
        for data, target in bar:
            data, target = data[...,0:1].to(device), target[...,0:1].to(device)

            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            #pred = output.argmax(dim=1, keepdim=True)
            #correct += pred.eq(target.view_as(pred)).sum().item()
            error = torch.abs(output - target).mean()
            bar.set_postfix({"loss": loss, "error": error})

    return total_loss / len(test_loader), correct / len(test_loader.dataset)    

In [31]:
train_losses = []
test_losses = []

epochs = 30
for epoch in range(epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_accuracy = validate(model, test_loader, criterion, device)

    train_losses.append(train_loss)
    test_losses.append(test_loss)
    
    print(f'Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, '
        f'Test Loss: {test_loss:.4f}, Test Acc: {test_accuracy:.2f}')

1288it [00:06, 194.24it/s, loss=0.00783, avg_loss=0.00399] 
100%|█| 322/322 [00:00<00:00, 370.30it/s, loss=tensor(0.0013), error=tensor(0.03


Epoch 1, Train Loss: 0.0040, Test Loss: 0.0052, Test Acc: 0.00


1288it [00:07, 170.50it/s, loss=0.00401, avg_loss=0.00387] 
100%|█| 322/322 [00:01<00:00, 314.82it/s, loss=tensor(0.0016), error=tensor(0.03


Epoch 2, Train Loss: 0.0039, Test Loss: 0.0054, Test Acc: 0.00


1288it [00:06, 185.59it/s, loss=0.00129, avg_loss=0.00392] 
100%|█| 322/322 [00:00<00:00, 411.90it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 3, Train Loss: 0.0039, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 199.00it/s, loss=0.015, avg_loss=0.00394]   
100%|█| 322/322 [00:00<00:00, 426.46it/s, loss=tensor(0.0023), error=tensor(0.04


Epoch 4, Train Loss: 0.0039, Test Loss: 0.0061, Test Acc: 0.00


1288it [00:06, 206.32it/s, loss=0.000538, avg_loss=0.00384]
100%|█| 322/322 [00:00<00:00, 435.63it/s, loss=tensor(0.0012), error=tensor(0.03


Epoch 5, Train Loss: 0.0038, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 190.11it/s, loss=0.00481, avg_loss=0.00377] 
100%|█| 322/322 [00:00<00:00, 406.28it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 6, Train Loss: 0.0038, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 197.80it/s, loss=0.0015, avg_loss=0.00387]  
100%|█| 322/322 [00:00<00:00, 417.15it/s, loss=tensor(0.0012), error=tensor(0.03


Epoch 7, Train Loss: 0.0039, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 198.88it/s, loss=0.0101, avg_loss=0.00384]  
100%|█| 322/322 [00:01<00:00, 283.51it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 8, Train Loss: 0.0038, Test Loss: 0.0050, Test Acc: 0.00


1288it [00:06, 187.63it/s, loss=0.00177, avg_loss=0.00385] 
100%|█| 322/322 [00:00<00:00, 395.13it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 9, Train Loss: 0.0038, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 185.29it/s, loss=0.000768, avg_loss=0.00387]
100%|█| 322/322 [00:00<00:00, 386.21it/s, loss=tensor(0.0025), error=tensor(0.03


Epoch 10, Train Loss: 0.0039, Test Loss: 0.0073, Test Acc: 0.00


1288it [00:06, 203.51it/s, loss=0.00219, avg_loss=0.0039]  
100%|█| 322/322 [00:00<00:00, 423.07it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 11, Train Loss: 0.0039, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 205.04it/s, loss=0.00196, avg_loss=0.00384] 
100%|█| 322/322 [00:00<00:00, 323.31it/s, loss=tensor(0.0013), error=tensor(0.03


Epoch 12, Train Loss: 0.0038, Test Loss: 0.0052, Test Acc: 0.00


1288it [00:06, 207.03it/s, loss=0.00705, avg_loss=0.00385] 
100%|█| 322/322 [00:00<00:00, 390.89it/s, loss=tensor(0.0011), error=tensor(0.02


Epoch 13, Train Loss: 0.0038, Test Loss: 0.0054, Test Acc: 0.00


1288it [00:06, 203.12it/s, loss=0.00121, avg_loss=0.00383] 
100%|█| 322/322 [00:01<00:00, 267.46it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 14, Train Loss: 0.0038, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 204.49it/s, loss=0.0143, avg_loss=0.00381]  
100%|█| 322/322 [00:00<00:00, 419.66it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 15, Train Loss: 0.0038, Test Loss: 0.0050, Test Acc: 0.00


1288it [00:06, 204.90it/s, loss=0.00116, avg_loss=0.00377] 
100%|█| 322/322 [00:00<00:00, 376.90it/s, loss=tensor(0.0015), error=tensor(0.03


Epoch 16, Train Loss: 0.0038, Test Loss: 0.0054, Test Acc: 0.00


1288it [00:06, 188.73it/s, loss=0.00538, avg_loss=0.00379] 
100%|█| 322/322 [00:00<00:00, 353.30it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 17, Train Loss: 0.0038, Test Loss: 0.0052, Test Acc: 0.00


1288it [00:06, 201.57it/s, loss=0.00127, avg_loss=0.00378] 
100%|█| 322/322 [00:00<00:00, 427.04it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 18, Train Loss: 0.0038, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 189.05it/s, loss=0.000977, avg_loss=0.00376]
100%|█| 322/322 [00:00<00:00, 370.68it/s, loss=tensor(0.0015), error=tensor(0.02


Epoch 19, Train Loss: 0.0038, Test Loss: 0.0057, Test Acc: 0.00


1288it [00:06, 202.49it/s, loss=0.00139, avg_loss=0.00377] 
100%|█| 322/322 [00:00<00:00, 405.53it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 20, Train Loss: 0.0038, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 192.40it/s, loss=0.00263, avg_loss=0.00378] 
100%|█| 322/322 [00:00<00:00, 422.31it/s, loss=tensor(0.0011), error=tensor(0.02


Epoch 21, Train Loss: 0.0038, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 205.76it/s, loss=0.00979, avg_loss=0.00381] 
100%|█| 322/322 [00:00<00:00, 390.55it/s, loss=tensor(0.0011), error=tensor(0.03


Epoch 22, Train Loss: 0.0038, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 193.17it/s, loss=0.00116, avg_loss=0.00375] 
100%|█| 322/322 [00:00<00:00, 370.26it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 23, Train Loss: 0.0038, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 197.86it/s, loss=0.00128, avg_loss=0.00374] 
100%|█| 322/322 [00:00<00:00, 377.56it/s, loss=tensor(0.0011), error=tensor(0.02


Epoch 24, Train Loss: 0.0037, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 185.28it/s, loss=0.00853, avg_loss=0.00373] 
100%|█| 322/322 [00:00<00:00, 369.23it/s, loss=tensor(0.0029), error=tensor(0.05


Epoch 25, Train Loss: 0.0037, Test Loss: 0.0066, Test Acc: 0.00


1288it [00:06, 203.61it/s, loss=0.0022, avg_loss=0.00374]  
100%|█| 322/322 [00:00<00:00, 358.39it/s, loss=tensor(0.0012), error=tensor(0.02


Epoch 26, Train Loss: 0.0037, Test Loss: 0.0054, Test Acc: 0.00


1288it [00:06, 187.21it/s, loss=0.00354, avg_loss=0.00376] 
100%|█| 322/322 [00:00<00:00, 388.80it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 27, Train Loss: 0.0038, Test Loss: 0.0050, Test Acc: 0.00


1288it [00:06, 195.06it/s, loss=0.00196, avg_loss=0.00376] 
100%|█| 322/322 [00:00<00:00, 427.02it/s, loss=tensor(0.0016), error=tensor(0.03


Epoch 28, Train Loss: 0.0038, Test Loss: 0.0054, Test Acc: 0.00


1288it [00:06, 195.31it/s, loss=0.00762, avg_loss=0.00373] 
100%|█| 322/322 [00:00<00:00, 396.28it/s, loss=tensor(0.0010), error=tensor(0.02


Epoch 29, Train Loss: 0.0037, Test Loss: 0.0051, Test Acc: 0.00


1288it [00:06, 199.03it/s, loss=0.00216, avg_loss=0.00371] 
100%|█| 322/322 [00:00<00:00, 367.66it/s, loss=tensor(0.0018), error=tensor(0.04

Epoch 30, Train Loss: 0.0037, Test Loss: 0.0056, Test Acc: 0.00





In [35]:
model = model.eval()

In [37]:
output = model(train_loader.dataset.tensors[0][:, :, 0:1])
output.shape

torch.Size([20607, 1, 1])