In [300]:
import random
import copy
from functools import partial

import pandas as pd
import numpy as np

import wandb

from darts import TimeSeries
from darts.metrics.metrics import mae, rmse, mape, smape

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.nn as nn
import torch.optim as optim 
from torch.utils.data import Dataset, DataLoader
from torcheval.metrics.functional import binary_accuracy, binary_f1_score, binary_precision, binary_recall

In [313]:
def evaluate_regression_metrics(actual, pred):
    actual = TimeSeries.from_series(pd.DataFrame(actual).squeeze())
    pred = TimeSeries.from_series(pd.DataFrame(pred).squeeze())
    result = {}
    result["MAE"] = mae(actual, pred)
    result["RMSE"] = rmse(actual, pred)
    result["MAPE"] = mape(actual, pred)
    result["SMAPE"] = smape(actual, pred)
    return result

def evaluate_classification_metrics(actual, pred, threshold=0.5):
    actual = actual.squeeze()
    pred = pred.squeeze()
    pred = (pred >= threshold).int()

    result = {}
    result["Accuracy"] = binary_accuracy(actual, pred)
    result["F1"] = binary_f1_score(actual, pred)
    result["Precision"] = binary_precision(actual, pred)
    result["Recall"] = binary_recall(actual, pred)
    return result

In [314]:
def evaluate_test(model, test_loader, device, loss_fn, calculate_metrics):
    model.eval()
    outputs_list = []
    labels_list = []
    test_loss = 0
    inputs, labels = next(iter(test_loader))
    with torch.no_grad():
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, labels.float())
        test_loss += loss.item()
        
        outputs_list.append(outputs.cpu())
        labels_list.append(labels.cpu())
        
    outputs_concat = torch.cat(outputs_list, dim=0)
    labels_concat = torch.cat(labels_list, dim=0)
    
    metrics = calculate_metrics(labels_concat, outputs_concat)
    print(f'Test Loss: {test_loss}')
    print(f'Test Metrics, {metrics}')
    
    wandb.log({
        'test_loss': test_loss,
        'test_metrics': metrics,
    })
    
    return test_loss, metrics

In [315]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, n_past, n_predict, n_stride=1, classification=False):
        """
        Args:
            data: Input time series data.
            n_past: Number of past time steps as input.
            n_predict: Number of future time steps to predict.
            n_stride: Step size for moving window.
            classification: If True, convert future values into classification labels (higher/lower).
        """
        self.data = data
        self.n_past = n_past
        self.n_predict = n_predict
        self.n_stride = n_stride
        self.classification = classification
        self.samples = self._create_samples()
    
    def _create_samples(self):
        samples = []
        for i in range(0, len(self.data) - self.n_past - self.n_predict + 1, self.n_stride):
            past = self.data[i : i + self.n_past]
            future = self.data[i + self.n_past : i + self.n_past + self.n_predict]
            
            if self.classification:
                # Generate classification labels for all future steps
                labels = [1 if future[j] > past[-1] else 0 for j in range(len(future))]
                samples.append((past, labels))
            else:
                samples.append((past, future))
        return samples
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        past, target = self.samples[idx]
        return (torch.tensor(past.squeeze(-1), dtype=torch.float32), 
                torch.tensor(target, dtype=torch.long 
                if self.classification else torch.float32))

In [316]:
class Trainer:
    def __init__(self, model, device, optimizer, loss_fn, train_loader, val_loader, n_epochs,
                 calculate_metrics, run_name, patience=10):
        self.device = device
        self.model = model.to(self.device)
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.calculate_metrics = calculate_metrics
        self.n_epochs = n_epochs
        self.patience = patience
        self.best_val_loss = float('inf')
        self.patience_counter = 0
        self.best_model_state = None
        self.early_stopped = False
        self.run_name = run_name
        
        
    def train(self):
        for epoch in range(self.n_epochs):
            train_loss = self._train_epoch(epoch)
            val_loss, metrics = self._val_epoch(epoch)
            wandb.log({"train_loss": train_loss, "val_loss": val_loss})
            wandb.log(metrics)
            self._early_stopping(val_loss)
            if self.early_stopped:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break
            
        print("Training complete, saving best and final model.")
        self._save_best_model()
        self._save_final_model()
        
        best_model = copy.deepcopy(self.model)  
        best_model.load_state_dict(self.best_model_state)
        return best_model 
            
    def _train_epoch(self, epoch):
        self.model.train()
        train_loss = 0
        for batch in self.train_loader:
            inputs, labels = batch
            inputs, labels = inputs.to(self.device), labels.to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(inputs)
            loss = self.loss_fn(outputs, labels.float())
            loss.backward()
            self.optimizer.step()
            train_loss += loss.item()
        print(f'Epoch {epoch+1}, Train Loss: {train_loss / len(self.train_loader)}')
        return train_loss / len(self.train_loader)
    
    def _val_epoch(self, epoch):
        self.model.eval()
        val_loss = 0
        outputs_list = []
        labels_list = []
        with torch.no_grad():
            for batch in self.val_loader:
                inputs, labels = batch
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                loss = self.loss_fn(outputs, labels.float())
                val_loss += loss.item()
                outputs_list.append(outputs.cpu())
                labels_list.append(labels.cpu())
                
        outputs_concat = torch.cat(outputs_list, dim=0)
        labels_concat = torch.cat(labels_list, dim=0)
        
        metrics = self.calculate_metrics(labels_concat, outputs_concat)
        print(f'Epoch {epoch+1}, Validation Loss: {val_loss / len(self.val_loader)}')
        print(f"Metrics", metrics)
        return val_loss / len(self.val_loader), metrics
    
    def _early_stopping(self, val_loss):
        if val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
            self.patience_counter = 0
            self.best_model_state = self.model.state_dict()
        else:
            self.patience_counter += 1
            if self.patience_counter >= self.patience:
                self.early_stopped = True
                
                
    def _save_best_model(self):
        if self.best_model_state is not None:
            torch.save(self.best_model_state, f"../artifacts/{self.run_name}_best_model.pth")
            artifact = wandb.Artifact(f"{self.run_name}_best_model", type="model")
            artifact.add_file(f"../artifacts/{self.run_name}_best_model.pth")
            wandb.log_artifact(artifact)
            print("Best model saved to WandB.")

    def _save_final_model(self):
        torch.save(self.model.state_dict(), f"../artifacts/{self.run_name}_final_model.pth")
        artifact = wandb.Artifact(f"{self.run_name}_final_model", type="model")
        artifact.add_file(f"../artifacts/{self.run_name}_final_model.pth")
        wandb.log_artifact(artifact)
        print("Final model saved to WandB.")

In [317]:
class LinearLayer(nn.Module):
    def __init__(self, n_past, n_predict):
        super().__init__()
        self.fc = nn.Linear(n_past, n_predict)

    def forward(self, x):
        return self.fc(x)

In [318]:
class LinearLayerClassification(nn.Module):
    def __init__(self, n_past, n_predict):
        super().__init__()
        self.fc1 = nn.Linear(n_past, 10)
        self.fc2 = nn.Linear(10, 4)
        self.fc3 = nn.Linear(4, n_predict)
        self.sigmoid = nn.Sigmoid() 
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return self.sigmoid(x)

In [None]:
class RNNClassifier(nn.Module):
    def __init__(self, n_past, n_predict):
        super().__init__()

In [319]:
def main(config):
    wandb.init(dir="../wandb", project="stock_time_series", name=config["run_name"], config=config)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"using {device}")
    is_classification = config["is_classification"]
    
    
    df_train = pd.read_csv("../data/train.csv", parse_dates=["timestamp"])["low"]
    df_val = pd.read_csv("../data/validate.csv", parse_dates=["timestamp"])["low"]
    df_test = pd.read_csv("../data/test.csv", parse_dates=["timestamp"])["low"]
    
    scaler_transformer = None
    if config["scaler"] == "min-max":
        scaler_transformer = MinMaxScaler()
    elif config["scaler"] == "std":
        scaler_transformer = StandardScaler()
        
    train_series = scaler_transformer.fit_transform(df_train.to_frame())
    val_series = scaler_transformer.transform(df_val.to_frame())
    test_series = scaler_transformer.transform(df_test.to_frame())
    
    train_dataset  = TimeSeriesDataset(train_series, 
                                       n_past=config["n_past"], 
                                       n_predict=config["n_predict"], 
                                       n_stride=config["n_stride"],
                                       classification=is_classification)
    val_dataset  = TimeSeriesDataset(val_series, 
                                   n_past=config["n_past"], 
                                   n_predict=config["n_predict"], 
                                   n_stride=config["n_stride"],
                                   classification=is_classification)
    test_dataset  = TimeSeriesDataset(test_series, 
                                   n_past=config["n_past"], 
                                   n_predict=config["n_predict"], 
                                   n_stride=config["n_stride"],
                                   classification=is_classification)
    
    
    train_dataloader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=False)
    val_dataloader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)
    

    if is_classification:
        model = LinearLayerClassification(n_past=config["n_past"], n_predict=config["n_predict"])
        loss_fn = nn.BCELoss()
        calculate_metrics = partial(evaluate_classification_metrics, threshold=config["threshold"])
    else:
        model = LinearLayer(n_past=config["n_past"], n_predict=config["n_predict"])
        loss_fn = nn.MSELoss()
        calculate_metrics = evaluate_regression_metrics
        
    optimizer = optim.Adam(model.parameters())
    
    
    trainer = Trainer(model = model, 
                      device = device, 
                      optimizer = optimizer,
                      loss_fn = loss_fn,
                      train_loader = train_dataloader,
                      val_loader = val_dataloader,
                      calculate_metrics = calculate_metrics,
                      n_epochs = config["n_epochs"],
                      run_name = config["run_name"],
                      patience=10)
    trainer.train()
    
    evaluate_test(model, test_dataloader, device, loss_fn, calculate_metrics=calculate_metrics)
    wandb.finish()
    

In [320]:
def get_config():
    return {
        "is_classification" : True,
        "run_name" : "classification-test",
        
        "scaler" : "min-max",
        "batch_size" : 16,
        "threshold" : 0.5,
        
        "n_past" : 20,
        "n_predict" : 1,
        "n_stride" : 1,
        
        
        
        "n_epochs" : 5,
    }

In [321]:
if __name__ == "__main__":
    seed = 42
    random.seed(42)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    
    config = get_config()
    main(config)

using cuda
Epoch 1, Train Loss: 0.6947396462578928




Epoch 1, Validation Loss: 0.6991743456411655
Metrics {'Accuracy': tensor(0.4886), 'F1': tensor(0.), 'Precision': tensor(0.), 'Recall': tensor(0.)}
Epoch 2, Train Loss: 0.6937140119075775




Epoch 2, Validation Loss: 0.6966246834036262
Metrics {'Accuracy': tensor(0.4886), 'F1': tensor(0.), 'Precision': tensor(0.), 'Recall': tensor(0.)}
Epoch 3, Train Loss: 0.693503985135786




Epoch 3, Validation Loss: 0.6955371250606905
Metrics {'Accuracy': tensor(0.4886), 'F1': tensor(0.), 'Precision': tensor(0.), 'Recall': tensor(0.)}
Epoch 4, Train Loss: 0.6933967449972707




Epoch 4, Validation Loss: 0.6949830033206352
Metrics {'Accuracy': tensor(0.4886), 'F1': tensor(0.), 'Precision': tensor(0.), 'Recall': tensor(0.)}
Epoch 5, Train Loss: 0.6933339217016774




Epoch 5, Validation Loss: 0.6946813187560017
Metrics {'Accuracy': tensor(0.4886), 'F1': tensor(0.), 'Precision': tensor(0.), 'Recall': tensor(0.)}
Training complete, saving best and final model.
Best model saved to WandB.
Final model saved to WandB.




Test Loss: 0.6957568526268005
Test Metrics, {'Accuracy': tensor(0.4919), 'F1': tensor(0.), 'Precision': tensor(0.), 'Recall': tensor(0.)}


0,1
Accuracy,▁▁▁▁▁
F1,▁▁▁▁▁
Precision,▁▁▁▁▁
Recall,▁▁▁▁▁
test_loss,▁
train_loss,█▃▂▁▁
val_loss,█▄▂▁▁

0,1
Accuracy,0.48856
F1,0.0
Precision,0.0
Recall,0.0
test_loss,0.69576
train_loss,0.69333
val_loss,0.69468
