Simple CNN Neural Network talking in a sequence of data (could have been an RNN, but had already tried that and it did not train well) to predict the future asset price

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%pip install torch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

class StockDataset(Dataset):
    def __init__(self, stocks_path, bonds_path, real_estate_path):
        stocks = pd.read_csv(stocks_path).values
        bonds = pd.read_csv(bonds_path).values
        real_estate = pd.read_csv(real_estate_path).values
        self.features = torch.stack((torch.from_numpy(stocks), torch.from_numpy(bonds), torch.from_numpy(real_estate)), dim=1)
        self.targets = torch.from_numpy(stocks[20:] / stocks[:-20] - 1).float() # percent price change for stocks

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.features[idx:idx+20], self.targets[idx]

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(3, 16, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 2, 32)
        self.fc2 = nn.Linear(32, 1)

    def forward(self, x):
        x = x.squeeze(3)
        x = x.transpose(1,2)
        x = x.double()
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = self.pool3(torch.relu(self.conv3(x)))
        x = x.view(-1, 64 * 2)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x.squeeze()

def train(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(dataloader.dataset)

def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0
    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            running_loss += loss.item() * inputs.size(0)
    return running_loss / len(dataloader.dataset)

if __name__ == '__main__':
    stocks_path = "/content/drive/My Drive/stochastic_stocks.csv"
    bonds_path = "/content/drive/My Drive/stochastic_bond.csv"
    real_estate_path = "/content/drive/My Drive/stochastic_re.csv"
    dataset = StockDataset(stocks_path, bonds_path, real_estate_path)
    train_size = int(0.9 * len(dataset))
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
    train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=32)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = CNN().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    best_val_loss = float('inf')
    for epoch in range(50):
      train_loss = train(model, train_dataloader, criterion, optimizer, device)
      val_loss = evaluate(model, val_dataloader, criterion, device)
      print(f'Epoch {epoch+1}: train loss = {train_loss:.6f}, val loss = {val_loss:.6f}')
      if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pt')


RuntimeError: ignored

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd

# Define the CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=6, kernel_size=3)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv1d(in_channels=6, out_channels=16, kernel_size=3)
        self.fc1 = nn.Linear(in_features=48, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.fc3 = nn.Linear(in_features=84, out_features=1)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.flatten(1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Define a custom dataset
class StocksDataset(Dataset):
    def __init__(self, stocks_file, bonds_file, real_estate_file):
        self.stocks_data = pd.read_csv(stocks_file, header=None)
        self.bonds_data = pd.read_csv(bonds_file, header=None)
        self.real_estate_data = pd.read_csv(real_estate_file, header=None)

    def __len__(self):
        return len(self.stocks_data) - 30

    def __getitem__(self, index):
        stocks_seq = self.stocks_data.iloc[index :index + 20].values
        bonds_seq = self.bonds_data.iloc[index :index +20].values
        real_estate_seq = self.real_estate_data.iloc[index :index + 20].values
        try:
          label = (self.stocks_data.iloc[index + 20].values - self.stocks_data.iloc[index + 30].values) / self.stocks_data.iloc[index + 20].values
        except:
          print(len(self.stocks_data))
          print(index)
        # Convert the sequences to float tensors
        stocks_seq = torch.tensor(stocks_seq, dtype=torch.float32)
        bonds_seq = torch.tensor(bonds_seq, dtype=torch.float32)
        real_estate_seq = torch.tensor(real_estate_seq, dtype=torch.float32)

        # Stack the sequences along the feature dimension
        stocks_seq = torch.tensor(stocks_seq, dtype=torch.float32)
        bonds_seq = torch.tensor(bonds_seq, dtype=torch.float32)
        real_estate_seq = torch.tensor(real_estate_seq, dtype=torch.float32)
        x = torch.stack([stocks_seq, bonds_seq, real_estate_seq], dim=0)
        x = x.squeeze(2)
        x = x.float()
        return x, label

# Initialize the dataset and data loader
stocks_file = "/content/drive/My Drive/stochastic_stocks.csv"
bonds_file = "/content/drive/My Drive/stochastic_bond.csv"
real_estate_file = "/content/drive/My Drive/stochastic_re.csv"
dataset = StocksDataset(stocks_file, bonds_file, real_estate_file)
data_loader = DataLoader(dataset, batch_size=32, shuffle=False)

# Initialize the model, loss function, and optimizer
model = CNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(data_loader):
        optimizer.zero_grad()
        inputs = inputs.float()
        labels = labels.float()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        running_loss += loss.item()
    if i % 10 == 1:
        print(f"Epoch {epoch+1}, batch {i+1}: Loss = {running_loss/10}")
        running_loss = 0.0

  stocks_seq = torch.tensor(stocks_seq, dtype=torch.float32)
  bonds_seq = torch.tensor(bonds_seq, dtype=torch.float32)
  real_estate_seq = torch.tensor(real_estate_seq, dtype=torch.float32)


Epoch 1, batch 72: Loss = 54.20074441432953
Epoch 2, batch 72: Loss = 54.20074441432953
Epoch 3, batch 72: Loss = 54.20074441432953
Epoch 4, batch 72: Loss = 54.20074441432953
Epoch 5, batch 72: Loss = 54.20074441432953
Epoch 6, batch 72: Loss = 54.20074441432953
Epoch 7, batch 72: Loss = 54.20074441432953
Epoch 8, batch 72: Loss = 54.20074441432953
Epoch 9, batch 72: Loss = 54.20074441432953
Epoch 10, batch 72: Loss = 54.20074441432953
