In [1]:
import time
import tqdm

import numpy as np
import pandas as pd

import torch
from torch import nn, optim

from IPython.display import display
import matplotlib.pyplot as plt

from torchkan.data import timeseries
from torchkan.models import TKAT, TKAN

In [2]:
num_hidden = 64
num_heads = 4
num_embedding = 1
n_ahead = 30
sequence_length = 5 * n_ahead
sequence_length

150

In [3]:
def get_crypto_dataloaders(
    path = "torchkan/data/data.parquet", 
    n_ahead = 30,
    batch_size=16
):
    # Load California housing dataset
    df = timeseries.load_crypto(path)

    known_input_df = pd.DataFrame(
        index=df.index, 
        data=np.array([
            df.reset_index()['group'].apply(lambda x: (x.hour)).values, 
            df.reset_index()['group'].apply(lambda x: (x.dayofweek)).values
        ]).T, 
        columns = ['hour', 'dayofweek'])
    
    X_scaler, X_train, X_test, \
        X_train_unscaled, X_test_unscaled, \
            y_scaler, y_train, y_test, \
                y_train_unscaled, y_test_unscaled, \
                    y_scaler_train, y_scaler_test = \
        timeseries.generate_data_w_known_inputs(df, 
                                                known_input_df,
                                                sequence_length, 
                                                n_ahead)

    # Create data loaders (optional, if you want to batch and shuffle the data)
    train_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_train, y_train), 
        batch_size=batch_size, 
        shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(X_test, y_test), 
        batch_size=batch_size, 
        shuffle=False)

    return train_loader, test_loader

In [4]:
train_loader, test_loader = get_crypto_dataloaders(n_ahead=n_ahead)
[t.shape for t in train_loader.dataset.tensors], [t.shape for t in test_loader.dataset.tensors]

  return torch.tensor(data, dtype=dtype).to(device)


([torch.Size([20607, 180, 21]), torch.Size([20607, 30])],
 [torch.Size([5152, 180, 21]), torch.Size([5152, 30])])

In [5]:
shape = train_loader.dataset.tensors[0].shape
torch.split(train_loader.dataset.tensors[0], (1,) * shape[-1], -1)[0].shape

torch.Size([20607, 180, 1])

In [6]:
train_loader.dataset.tensors[0][:, :, 0:1].shape

torch.Size([20607, 180, 1])

In [7]:
model = TKAN(input_size=sequence_length+n_ahead, hidden_size=num_hidden)
model

TKAN(
  (tkan_cells): ModuleList(
    (0): TKANCell(
      (tkan_sub_layers): ModuleList(
        (0): KANLinear(
          (linear): Linear(in_features=180, out_features=180, bias=True)
          (layer_norm): LayerNorm((180,), eps=1e-05, elementwise_affine=True)
        )
      )
    )
  )
)

In [8]:
model = TKAT(
            sequence_length=sequence_length, 
            num_unknown_features=19, 
            num_known_features=2, 
            num_embedding=num_embedding, 
            num_hidden=num_hidden, 
            num_heads=num_heads, 
            n_ahead=n_ahead, 
            use_tkan=True
        )
model

[GRN] in_size: 21, hidden_size: 64, out_size: 21
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_size: 64
[GRN] in_size: 1, hidden_size: 64, out_

TKAT(
  (embedding_layer): EmbeddingLayer(
    (dense_layers): ModuleList(
      (0-20): 21 x Linear(in_features=1, out_features=1, bias=True)
    )
  )
  (vsn_past_features): VariableSelectionNetwork(
    (mlp_dense): GRN(
      (skip_layer): Linear(in_features=21, out_features=21, bias=True)
      (hidden_layer_1): Sequential(
        (0): Linear(in_features=21, out_features=64, bias=True)
        (1): ELU(alpha=1.0)
      )
      (hidden_layer_2): Linear(in_features=64, out_features=64, bias=True)
      (gate_layer): Gate(
        (dense_layer): Linear(in_features=64, out_features=21, bias=True)
        (gated_layer): Linear(in_features=64, out_features=21, bias=True)
      )
      (add_and_norm_layer): AddAndNorm(
        (norm_layer): LayerNorm((21,), eps=1e-05, elementwise_affine=True)
      )
    )
    (grn_layers): ModuleList(
      (0-20): 21 x GRN(
        (skip_layer): Linear(in_features=1, out_features=64, bias=True)
        (hidden_layer_1): Sequential(
          (0): Line

In [9]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters: {total_params}")

criterion = nn.CrossEntropyLoss()

# LBFGS is really slow
# optimizer = optim.LBFGS(model.parameters(), lr=0.01)
# Adam works with very low lr
optimizer = optim.Adam(model.parameters(), lr=0.0002)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Total trainable parameters: 52278456


In [10]:

def train(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    
    for idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        print(f"data: {data.shape}")

        if isinstance(optimizer, optim.LBFGS):
            def closure():
                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                return loss
            loss = optimizer.step(closure)
        else:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            loss = loss.item()
            
        total_loss += loss
        
    return total_loss / len(train_loader)

def validate(model, test_loader, criterion, device):
    model.eval()
    total_loself.hidden_sizess = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            print(f"data in: {data.shape}")
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    return total_loss / len(test_loader), correct / len(test_loader.dataset)    

In [11]:
train_losses = []
test_losses = []

epochs = 30
for epoch in range(epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_accuracy = validate(model, test_loader, criterion, device)

    train_losses.append(train_loss)
    test_losses.append(test_loss)
    
    print(f'Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, '
        f'Test Loss: {test_loss:.4f}, Test Acc: {test_accuracy:.2f}')

data: torch.Size([16, 180, 21])
[TKAT] Input shape: torch.Size([16, 180, 21])
[EmbeddingLayer] Output shape: torch.Size([16, 180, 1, 21])
[TKAT] past_features shape: torch.Size([16, 150, 1, 21])
[TKAT] future_features shape: torch.Size([16, 30, 1, 2])
[VariableSelectionNetwork] [past_features]	input shape: torch.Size([16, 150, 1, 21])
[VariableSelectionNetwork] [past_features-	flatten shape: torch.Size([16, 150, 21])
[VariableSelectionNetwork] [past_features-	sparse_weights shape: torch.Size([16, 150, 1, 21])
[VariableSelectionNetwork] [past_features]	len emb list: 21
[VariableSelectionNetwork] [past_features]	transformed_embedding shape: torch.Size([16, 150, 64, 21])
[VariableSelectionNetwork] [past_features]	output shape: torch.Size([16, 150, 64])
[VariableSelectionNetwork] [future_features]	input shape: torch.Size([16, 30, 1, 2])
[VariableSelectionNetwork] [future_features-	flatten shape: torch.Size([16, 30, 2])
[VariableSelectionNetwork] [future_features-	sparse_weights shape: torc

RuntimeError: The size of tensor a (30) must match the size of tensor b (150) at non-singleton dimension 0

0,1
Search Stack Overflow,Ask Bing Chat


In [None]:
model.decoder.layer.__dict__