In [60]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.linear import Linear
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [61]:
data_file_path = "/Users/aishwaryaiyer/Documents/GitHub/Digital-Asset-Prediction/data/processed/combined_dataset_v1.csv"

In [62]:
def load_data(file_path):

    """ 
    I am sorting it by first symbol and then date bc I think that it ensures that each asset history is maintained, that way learning is not disrupted
    """

    df = pd.read_csv(file_path, parse_dates=['date'])
    df.sort_values(by=['symbol', 'date'], inplace=True)
    return df



In [63]:
class TimeSeriesDataset(torch.utils.data.Dataset):
    def __init__(self, data, target_col, window_size, pred_len=1):
        self.data = data.values
        self.target_idx = data.columns.get_loc(target_col)
        self.window_size = window_size
        self.pred_len = pred_len

    def __len__(self):
        return len(self.data) - self.window_size - self.pred_len + 1

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.window_size]
        y = self.data[idx + self.window_size : idx + self.window_size + self.pred_len, self.target_idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)



In [64]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ProbSparseAttention(nn.Module):
    def __init__(self, d_model, n_heads, top_k_factor=0.5):
        super(ProbSparseAttention, self).__init__()
        self.n_heads = n_heads
        self.d_model = d_model
        self.d_k = d_model // n_heads
        self.top_k_factor = top_k_factor

        self.q_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.out = nn.Linear(d_model, d_model)

    def forward(self, queries, keys, values, mask=None):
        B, T, _ = queries.size()
        H = self.n_heads

        Q = self.q_linear(queries).view(B, T, H, self.d_k).transpose(1, 2)
        K = self.k_linear(keys).view(B, T, H, self.d_k).transpose(1, 2)
        V = self.v_linear(values).view(B, T, H, self.d_k).transpose(1, 2)

        scores = torch.matmul(Q, K.transpose(-2, -1)) / (self.d_k ** 0.5)
        top_k = max(1, int(self.top_k_factor * T))
        top_scores, indices = torch.topk(scores, top_k, dim=-1)

        mask_tensor = torch.full_like(scores, float('-inf'))
        mask_tensor.scatter_(-1, indices, top_scores)
        attn = F.softmax(mask_tensor, dim=-1)

        context = torch.matmul(attn, V)
        context = context.transpose(1, 2).contiguous().view(B, T, self.d_model)
        return self.out(context)

class TransformerLayer(nn.Module):
    def __init__(self, d_model, n_heads):
        super(TransformerLayer, self).__init__()
        self.attn = ProbSparseAttention(d_model, n_heads)
        self.ff = nn.Sequential(
            nn.Linear(d_model, d_model * 4),
            nn.ReLU(),
            nn.Linear(d_model * 4, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

    def forward(self, x):
        x = self.norm1(x + self.attn(x, x, x))
        x = self.norm2(x + self.ff(x))
        return x

class Informer(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, n_heads=2):
        super(Informer, self).__init__()
        self.input_proj = nn.Linear(input_dim, hidden_dim)
        self.layers = nn.ModuleList([TransformerLayer(hidden_dim, n_heads) for _ in range(num_layers)])
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.input_proj(x)
        for layer in self.layers:
            x = layer(x)
        x = self.fc(x[:, -1, :])  # Predict using last time step
        return x


In [65]:
df = load_data(data_file_path)
df['symbol'] = df['symbol'].astype('category').cat.codes


# Normalize and prepare dataset
features = ['symbol','open', 'high', 'low', 'close', 'volume', 'market_cap', 'daily_return', 'sp500', 'treasury_spread', 'fear_greed', 'gold_price_usd']
df[features] = (df[features] - df[features].mean()) / df[features].std()
dataset = TimeSeriesDataset(df[features], target_col='close', window_size=14)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)



In [66]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from sklearn.metrics import mean_squared_error

def train(model, dataloader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0.0

    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs.squeeze(), targets.squeeze())
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * inputs.size(0)

    avg_loss = total_loss / len(dataloader.dataset)
    return avg_loss


def evaluate(model, dataloader, loss_fn, device):
    model.eval()
    total_loss = 0.0
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = loss_fn(outputs.squeeze(), targets.squeeze())
            total_loss += loss.item() * inputs.size(0)

            all_preds.append(outputs.cpu())
            all_targets.append(targets.cpu())

    avg_loss = total_loss / len(dataloader.dataset)
    preds = torch.cat(all_preds).squeeze().numpy()
    targets = torch.cat(all_targets).squeeze().numpy()
    rmse = mean_squared_error(targets, preds, squared=False)

    return avg_loss, rmse


In [67]:
from torch.optim import Adam

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Informer(input_dim=len(features), hidden_dim=64, output_dim=1).to(device)
optimizer = Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

# Split dataset
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = torch.utils.data.random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=16)

# Training loop
for epoch in range(1, 21):
    train_loss = train(model, train_loader, optimizer, loss_fn, device)
    val_loss, val_rmse = evaluate(model, val_loader, loss_fn, device)
    print(f"Epoch {epoch}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}, Val RMSE={val_rmse:.4f}")




Epoch 1: Train Loss=0.1581, Val Loss=0.0580, Val RMSE=0.2409




Epoch 2: Train Loss=0.0807, Val Loss=0.0447, Val RMSE=0.2115




Epoch 3: Train Loss=0.0505, Val Loss=0.0370, Val RMSE=0.1924




Epoch 4: Train Loss=0.0380, Val Loss=0.0377, Val RMSE=0.1941




Epoch 5: Train Loss=0.0323, Val Loss=0.0870, Val RMSE=0.2950




Epoch 6: Train Loss=0.0417, Val Loss=0.0717, Val RMSE=0.2677




Epoch 7: Train Loss=0.0239, Val Loss=0.0094, Val RMSE=0.0969




Epoch 8: Train Loss=0.0301, Val Loss=0.0150, Val RMSE=0.1227




Epoch 9: Train Loss=0.1932, Val Loss=0.0170, Val RMSE=0.1304




Epoch 10: Train Loss=0.0402, Val Loss=0.3272, Val RMSE=0.5720




Epoch 11: Train Loss=0.0282, Val Loss=0.0140, Val RMSE=0.1185




Epoch 12: Train Loss=0.0242, Val Loss=0.0121, Val RMSE=0.1102




Epoch 13: Train Loss=0.0210, Val Loss=0.1008, Val RMSE=0.3175




Epoch 14: Train Loss=0.0239, Val Loss=0.0067, Val RMSE=0.0821




Epoch 15: Train Loss=0.0128, Val Loss=0.0259, Val RMSE=0.1611




Epoch 16: Train Loss=0.0217, Val Loss=0.0055, Val RMSE=0.0743




Epoch 17: Train Loss=0.0168, Val Loss=0.0077, Val RMSE=0.0877




Epoch 18: Train Loss=0.0233, Val Loss=0.0066, Val RMSE=0.0814




Epoch 19: Train Loss=0.0176, Val Loss=0.0057, Val RMSE=0.0758
Epoch 20: Train Loss=0.0241, Val Loss=0.0082, Val RMSE=0.0904


