In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

## Load and Preprocess Data

In [2]:
### RAW
search_dirs = ["Dataset/Equities/", "Dataset/BondsFundsIndexes/"]
stock_dict = {}

for search_dir in search_dirs:
        for filename in os.listdir(search_dir):
            if filename.endswith('.csv'):
                stock_name = filename[:-4] 
                stock_dict[stock_name] = pd.read_csv(os.path.join(search_dir, filename))

In [35]:
### Preprocess
preprocessed_stocks = {}
stock_returns = {}
minmax_scalers = {}

for stock in stock_dict:
    stock_data = stock_dict[stock]
    stock_data = stock_data.drop(columns = ['Date'])
    
    scaler = MinMaxScaler()
    preprocessed_stock = scaler.fit_transform(stock_data)
    minmax_scalers[stock] = scaler
    
    preprocessed_stock = pd.DataFrame(preprocessed_stock, columns=stock_data.columns, index=stock_data.index)
    preprocessed_stocks[stock]=preprocessed_stock
    
    

In [36]:
### Form time series
def create_time_series_data(stock_data, window_size=60):
    """
    Turn raw data into sliding window prediction.

    - data: (num_days,)
    - window_size: number of days to use for prediction

    
    Return
    - X: nparray um_samples, window_size)
    - y: nparray (num_samples,)
    """
    num_days = data.shape[0]
    X = []
    y = []

    for i in range(num_days - window_size):
        X.append(stock_data.iloc[i:i+window_size].values)
        
        ### Target Variable: Closing Price
        close_day = data['Close'].iloc[i+window_size]
        y.append(close_day)

    X = np.array(X)
    y = np.array(y)
    
    return X, y

stock_60d = {}
stock_1d = {}

for stock, data in preprocessed_stocks.items():
    stock_60d[stock], stock_1d[stock] = create_time_series_data(data)



## Define Models

In [28]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

In [29]:
class DMLPModel(nn.Module):
    def __init__(self, input_dim=6, hidden_dim=15, num_hidden_layers=6, output_dim=1):
        super(DMLPModel, self).__init__()
        self.hidden_layers = nn.ModuleList()
        
        self.hidden_layers.append(nn.Linear(input_dim, hidden_dim))
        
        for _ in range(num_hidden_layers - 1):
            self.hidden_layers.append(nn.Linear(hidden_dim, hidden_dim))

        self.output_layer = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = x.view(-1, 60 * 6)
        for layer in self.hidden_layers:
            x = self.relu(layer(x))
        x = self.output_layer(x)
        return x


class LSTMModel(nn.Module):
    def __init__(self, input_dim=6, hidden_dim=50, num_layers=4, output_dim=1, dropout_rate=0.4, recurrent_dropout_rate=0.3):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=recurrent_dropout_rate)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.dropout(out[:, -1, :])  
        out = self.fc(out)
        return out
    
class CNNModel(nn.Module):
    def __init__(self, input_dim=6, output_dim=1, num_filters=2, kernel_size=2, hidden_dim=2):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=num_filters, kernel_size=kernel_size)
        self.pool = nn.MaxPool1d(kernel_size=kernel_size)
        self.fc1 = nn.Linear(num_filters * (input_dim // 2 - 1), hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
 
def RandomForestModel(n_estimators=500, max_depth=20,min_samples_split=10,min_samples_leaf=10,max_features=40):
    return RandomForestRegressor(
                                    n_estimators=n_estimators,
                                    max_depth=max_depth,
                                    min_samples_split=min_samples_split,
                                    min_samples_leaf=min_samples_leaf,
                                    max_features=max_features
                                   )

def SVRModel(C=2**2, gamma=2**-3):
    return SVR(kernel='rbf', C=C, gamma=gamma)

## Train & Evaluate

In [50]:
def train_model(model, X_train, y_train, X_val, y_val, criterion, optimizer, num_epochs=100, patience=0, device='cpu', print_every=5):
    best_loss = float('inf')
    patience_counter = 0
    val_loss = 0
    
    model.to(device)
    print_every = print_every
    
    for epoch in range(num_epochs):
        model.train()

        X_train = X_train.to(device)
        y_train = y_train.to(device).view(-1,1)
        X_val = X_val.to(device)
        y_val = y_val.to(device).view(-1,1)
        

        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

        
        if epoch % print_every ==0:
            print(f"Epoch {epoch}/{num_epochs - 1}, Training Loss: {loss:.6f}")
        
        # Early stopping
            model.eval()
            with torch.no_grad():
                outputs = model(X_val)
                
                loss = criterion(outputs, y_val)
                val_loss = loss

                print(f"Validation Loss: {loss:.6f}")

#             if val_loss < best_loss:
#                 best_loss = val_loss
#                 best_model_wts = model.state_dict()
#                 patience_counter = 0
#             else:
#                 patience_counter += 1

    #         if patience_counter > patience:
    #             print("Early stopping")
    #             break
    
#     model.load_state_dict(best_model_wts)
    return model, val_loss

In [51]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X_train, X_test, X_val, y_train, y_val, y_test = {}, {}, {}, {}, {}, {}

# 6-1-3 split
for stock in stock_60d:
    x_train_temp, X_test[stock], y_train_temp, y_test[stock] = train_test_split(stock_60d[stock], stock_1d[stock], test_size=0.2, random_state=1)
    X_train[stock], X_val[stock], y_train[stock], y_val[stock] = train_test_split(x_train_temp, y_train_temp, test_size=0.125, random_state=1)

def to_tensor(data):
    return torch.tensor(data, dtype=torch.float32)

X_train_tensors = {stock: to_tensor(X_train[stock]) for stock in X_train}
X_val_tensors = {stock: to_tensor(X_val[stock]) for stock in X_val}
X_test_tensors = {stock: to_tensor(X_test[stock]) for stock in X_test}
y_val_tensors = {stock: to_tensor(y_val[stock]) for stock in y_val}
y_train_tensors = {stock: to_tensor(y_train[stock]) for stock in y_train}
y_test_tensors = {stock: to_tensor(y_test[stock]) for stock in y_test}

print(y_test_tensors['DEO'].shape)

torch.Size([492])


In [54]:
#DMLP
dmlp_models = {}
num_epochs = 500
learning_rate = 0.01
input_size = 6

test_losses = []
val_losses = []

for stock in X_train_tensors:
    print("model DMLP: " + stock)
#     model = LSTMModel(input_dim=input_size, hidden_dim=40)
    model = DMLPModel(input_dim=60 * input_size)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    trained_model, val_loss = train_model(model, X_train_tensors[stock], y_train_tensors[stock], X_val_tensors[stock], y_val_tensors[stock], criterion, optimizer, num_epochs, patience=0, device=device, print_every=50)
    val_losses.append(val_loss.item())
    #Test
    trained_model.eval()
    with torch.no_grad():
        outputs = trained_model(X_test_tensors[stock].to(device))
        loss = criterion(outputs, y_test_tensors[stock].to(device).view(-1,1))

        print(f"Test Loss: {loss:.6f}")
        test_losses.append(loss.item())
    dmlp_models[stock] = trained_model

print("avg val loss", np.mean(np.array(val_losses)))
print("avg test loss", np.mean(np.array(test_losses)))

model DMLP: DEO
Epoch 0/499, Training Loss: 0.415346
Validation Loss: 0.356953
Epoch 50/499, Training Loss: 0.003890
Validation Loss: 0.004550
Epoch 100/499, Training Loss: 0.002659
Validation Loss: 0.002903
Epoch 150/499, Training Loss: 0.001855
Validation Loss: 0.001874
Epoch 200/499, Training Loss: 0.001275
Validation Loss: 0.001192
Epoch 250/499, Training Loss: 0.000975
Validation Loss: 0.000913
Epoch 300/499, Training Loss: 0.000779
Validation Loss: 0.000739
Epoch 350/499, Training Loss: 0.000650
Validation Loss: 0.000637
Epoch 400/499, Training Loss: 0.000555
Validation Loss: 0.000573
Epoch 450/499, Training Loss: 0.000484
Validation Loss: 0.000528
Test Loss: 0.000523
model DMLP: DLR
Epoch 0/499, Training Loss: 0.215004
Validation Loss: 0.186090
Epoch 50/499, Training Loss: 0.004357
Validation Loss: 0.004797
Epoch 100/499, Training Loss: 0.002431
Validation Loss: 0.002449
Epoch 150/499, Training Loss: 0.001412
Validation Loss: 0.001474
Epoch 200/499, Training Loss: 0.000991
Valid

Epoch 50/499, Training Loss: 0.004797
Validation Loss: 0.005448
Epoch 100/499, Training Loss: 0.002785
Validation Loss: 0.003138
Epoch 150/499, Training Loss: 0.001579
Validation Loss: 0.001795
Epoch 200/499, Training Loss: 0.001805
Validation Loss: 0.001487
Epoch 250/499, Training Loss: 0.000831
Validation Loss: 0.001017
Epoch 300/499, Training Loss: 0.000881
Validation Loss: 0.001012
Epoch 350/499, Training Loss: 0.000665
Validation Loss: 0.000818
Epoch 400/499, Training Loss: 0.000741
Validation Loss: 0.001101
Epoch 450/499, Training Loss: 0.000591
Validation Loss: 0.000741
Test Loss: 0.000596
model DMLP: NTDOY
Epoch 0/499, Training Loss: 0.141819
Validation Loss: 0.113538
Epoch 50/499, Training Loss: 0.003151
Validation Loss: 0.002655
Epoch 100/499, Training Loss: 0.001684
Validation Loss: 0.001431
Epoch 150/499, Training Loss: 0.000889
Validation Loss: 0.000869
Epoch 200/499, Training Loss: 0.000675
Validation Loss: 0.000685
Epoch 250/499, Training Loss: 0.001098
Validation Loss: 

Epoch 50/499, Training Loss: 0.001606
Validation Loss: 0.001589
Epoch 100/499, Training Loss: 0.000901
Validation Loss: 0.000942
Epoch 150/499, Training Loss: 0.000601
Validation Loss: 0.000648
Epoch 200/499, Training Loss: 0.000476
Validation Loss: 0.000554
Epoch 250/499, Training Loss: 0.000399
Validation Loss: 0.000488
Epoch 300/499, Training Loss: 0.000489
Validation Loss: 0.000599
Epoch 350/499, Training Loss: 0.000568
Validation Loss: 0.000410
Epoch 400/499, Training Loss: 0.000287
Validation Loss: 0.000370
Epoch 450/499, Training Loss: 0.000411
Validation Loss: 0.000592
Test Loss: 0.000290
model DMLP: LVMUY
Epoch 0/499, Training Loss: 0.213551
Validation Loss: 0.192947
Epoch 50/499, Training Loss: 0.002406
Validation Loss: 0.002537
Epoch 100/499, Training Loss: 0.001687
Validation Loss: 0.001593
Epoch 150/499, Training Loss: 0.001296
Validation Loss: 0.001184
Epoch 200/499, Training Loss: 0.000916
Validation Loss: 0.000800
Epoch 250/499, Training Loss: 0.000672
Validation Loss: 

Epoch 50/499, Training Loss: 0.002151
Validation Loss: 0.002169
Epoch 100/499, Training Loss: 0.001231
Validation Loss: 0.001268
Epoch 150/499, Training Loss: 0.000871
Validation Loss: 0.001235
Epoch 200/499, Training Loss: 0.001026
Validation Loss: 0.001132
Epoch 250/499, Training Loss: 0.000660
Validation Loss: 0.000966
Epoch 300/499, Training Loss: 0.003557
Validation Loss: 0.011329
Epoch 350/499, Training Loss: 0.000639
Validation Loss: 0.000948
Epoch 400/499, Training Loss: 0.000576
Validation Loss: 0.000914
Epoch 450/499, Training Loss: 0.000550
Validation Loss: 0.000901
Test Loss: 0.000683
model DMLP: BRK-B
Epoch 0/499, Training Loss: 0.099988
Validation Loss: 0.061927
Epoch 50/499, Training Loss: 0.001869
Validation Loss: 0.001578
Epoch 100/499, Training Loss: 0.001195
Validation Loss: 0.001127
Epoch 150/499, Training Loss: 0.000960
Validation Loss: 0.000883
Epoch 200/499, Training Loss: 0.000733
Validation Loss: 0.000663
Epoch 250/499, Training Loss: 0.000535
Validation Loss: 

Epoch 50/499, Training Loss: 0.003337
Validation Loss: 0.003698
Epoch 100/499, Training Loss: 0.001821
Validation Loss: 0.002090
Epoch 150/499, Training Loss: 0.000919
Validation Loss: 0.001015
Epoch 200/499, Training Loss: 0.000941
Validation Loss: 0.001094
Epoch 250/499, Training Loss: 0.000560
Validation Loss: 0.000623
Epoch 300/499, Training Loss: 0.001618
Validation Loss: 0.000615
Epoch 350/499, Training Loss: 0.000478
Validation Loss: 0.000543
Epoch 400/499, Training Loss: 0.000421
Validation Loss: 0.000492
Epoch 450/499, Training Loss: 0.001084
Validation Loss: 0.000573
Test Loss: 0.000507
model DMLP: IXP
Epoch 0/499, Training Loss: 0.130774
Validation Loss: 0.097601
Epoch 50/499, Training Loss: 0.003040
Validation Loss: 0.003373
Epoch 100/499, Training Loss: 0.001326
Validation Loss: 0.001299
Epoch 150/499, Training Loss: 0.000940
Validation Loss: 0.001031
Epoch 200/499, Training Loss: 0.000721
Validation Loss: 0.000863
Epoch 250/499, Training Loss: 0.000650
Validation Loss: 0.

In [60]:
#LSTM
lstm_models = {}
num_epochs = 500
learning_rate = 0.01
input_size = 6

test_losses = []
val_losses = []

for stock in X_train_tensors:
    print("model LSTM: " + stock)
    model = LSTMModel(input_dim=input_size, hidden_dim=40)
#     model = DMLPModel(input_dim=60 * input_size)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    trained_model, val_loss = train_model(model, X_train_tensors[stock], y_train_tensors[stock], X_val_tensors[stock], y_val_tensors[stock], criterion, optimizer, num_epochs, patience=0, device=device, print_every=20)
    val_losses.append(val_loss.item())
    #Test
    trained_model.eval()
    with torch.no_grad():
        outputs = trained_model(X_test_tensors[stock].to(device))
        loss = criterion(outputs, y_test_tensors[stock].to(device).view(-1,1))

        print(f"Test Loss: {loss:.6f}")
        test_losses.append(loss.item())
    lstm_models[stock] = trained_model

print("avg val loss", np.mean(np.array(val_losses)))
print("avg test loss", np.mean(np.array(test_losses)))

model LSTM: DEO
Epoch 0/499, Training Loss: 0.185267
Validation Loss: 0.090330
Epoch 20/499, Training Loss: 0.010379
Validation Loss: 0.006577
Epoch 40/499, Training Loss: 0.004601
Validation Loss: 0.001534
Epoch 60/499, Training Loss: 0.003761
Validation Loss: 0.000940
Epoch 80/499, Training Loss: 0.003240
Validation Loss: 0.000865
Epoch 100/499, Training Loss: 0.003126
Validation Loss: 0.000838
Epoch 120/499, Training Loss: 0.002802
Validation Loss: 0.000806
Epoch 140/499, Training Loss: 0.002543
Validation Loss: 0.000779
Epoch 160/499, Training Loss: 0.002614
Validation Loss: 0.000741
Epoch 180/499, Training Loss: 0.002940
Validation Loss: 0.000708
Epoch 200/499, Training Loss: 0.002024
Validation Loss: 0.000650
Epoch 220/499, Training Loss: 0.002206
Validation Loss: 0.000626
Epoch 240/499, Training Loss: 0.002019
Validation Loss: 0.000519
Epoch 260/499, Training Loss: 0.002370
Validation Loss: 0.001013
Epoch 280/499, Training Loss: 0.001937
Validation Loss: 0.000690
Epoch 300/499, 

Test Loss: 0.000392
model LSTM: CME
Epoch 0/499, Training Loss: 0.295450
Validation Loss: 0.089933
Epoch 20/499, Training Loss: 0.072899
Validation Loss: 0.055010
Epoch 40/499, Training Loss: 0.071912
Validation Loss: 0.062863
Epoch 60/499, Training Loss: 0.071312
Validation Loss: 0.063063
Epoch 80/499, Training Loss: 0.028621
Validation Loss: 0.014190
Epoch 100/499, Training Loss: 0.012154
Validation Loss: 0.004982
Epoch 120/499, Training Loss: 0.006505
Validation Loss: 0.001233
Epoch 140/499, Training Loss: 0.005031
Validation Loss: 0.001073
Epoch 160/499, Training Loss: 0.003959
Validation Loss: 0.000855
Epoch 180/499, Training Loss: 0.004099
Validation Loss: 0.000809
Epoch 200/499, Training Loss: 0.003570
Validation Loss: 0.000749
Epoch 220/499, Training Loss: 0.003456
Validation Loss: 0.000833
Epoch 240/499, Training Loss: 0.003381
Validation Loss: 0.000897
Epoch 260/499, Training Loss: 0.003075
Validation Loss: 0.000731
Epoch 280/499, Training Loss: 0.002886
Validation Loss: 0.00

Epoch 480/499, Training Loss: 0.001448
Validation Loss: 0.000481
Test Loss: 0.000452
model LSTM: QCOM
Epoch 0/499, Training Loss: 0.151167
Validation Loss: 0.073165
Epoch 20/499, Training Loss: 0.015465
Validation Loss: 0.011988
Epoch 40/499, Training Loss: 0.004574
Validation Loss: 0.001124
Epoch 60/499, Training Loss: 0.003919
Validation Loss: 0.000748
Epoch 80/499, Training Loss: 0.003037
Validation Loss: 0.000842
Epoch 100/499, Training Loss: 0.002759
Validation Loss: 0.001181
Epoch 120/499, Training Loss: 0.002599
Validation Loss: 0.000802
Epoch 140/499, Training Loss: 0.002371
Validation Loss: 0.000652
Epoch 160/499, Training Loss: 0.002145
Validation Loss: 0.000631
Epoch 180/499, Training Loss: 0.001937
Validation Loss: 0.000667
Epoch 200/499, Training Loss: 0.002089
Validation Loss: 0.000697
Epoch 220/499, Training Loss: 0.002041
Validation Loss: 0.001121
Epoch 240/499, Training Loss: 0.001937
Validation Loss: 0.000605
Epoch 260/499, Training Loss: 0.001878
Validation Loss: 0.0

Epoch 460/499, Training Loss: 0.002027
Validation Loss: 0.000336
Epoch 480/499, Training Loss: 0.001886
Validation Loss: 0.000282
Test Loss: 0.000253
model LSTM: DE
Epoch 0/499, Training Loss: 0.373869
Validation Loss: 0.210640
Epoch 20/499, Training Loss: 0.022187
Validation Loss: 0.009881
Epoch 40/499, Training Loss: 0.010081
Validation Loss: 0.002125
Epoch 60/499, Training Loss: 0.006606
Validation Loss: 0.000898
Epoch 80/499, Training Loss: 0.005674
Validation Loss: 0.000943
Epoch 100/499, Training Loss: 0.005519
Validation Loss: 0.000802
Epoch 120/499, Training Loss: 0.004742
Validation Loss: 0.000766
Epoch 140/499, Training Loss: 0.004815
Validation Loss: 0.000668
Epoch 160/499, Training Loss: 0.004060
Validation Loss: 0.000811
Epoch 180/499, Training Loss: 0.003572
Validation Loss: 0.000683
Epoch 200/499, Training Loss: 0.004089
Validation Loss: 0.000751
Epoch 220/499, Training Loss: 0.003320
Validation Loss: 0.000623
Epoch 240/499, Training Loss: 0.003719
Validation Loss: 0.000

Epoch 440/499, Training Loss: 0.001491
Validation Loss: 0.000352
Epoch 460/499, Training Loss: 0.001557
Validation Loss: 0.000370
Epoch 480/499, Training Loss: 0.001446
Validation Loss: 0.000317
Test Loss: 0.000278
model LSTM: META
Epoch 0/499, Training Loss: 0.055064
Validation Loss: 0.056756
Epoch 20/499, Training Loss: 0.009368
Validation Loss: 0.007461
Epoch 40/499, Training Loss: 0.002790
Validation Loss: 0.000662
Epoch 60/499, Training Loss: 0.001953
Validation Loss: 0.000510
Epoch 80/499, Training Loss: 0.001729
Validation Loss: 0.000507
Epoch 100/499, Training Loss: 0.001599
Validation Loss: 0.000534
Epoch 120/499, Training Loss: 0.001484
Validation Loss: 0.000533
Epoch 140/499, Training Loss: 0.001289
Validation Loss: 0.000587
Epoch 160/499, Training Loss: 0.001349
Validation Loss: 0.000500
Epoch 180/499, Training Loss: 0.001479
Validation Loss: 0.000521
Epoch 200/499, Training Loss: 0.001382
Validation Loss: 0.000481
Epoch 220/499, Training Loss: 0.001348
Validation Loss: 0.0

Epoch 420/499, Training Loss: 0.001380
Validation Loss: 0.000398
Epoch 440/499, Training Loss: 0.001405
Validation Loss: 0.000437
Epoch 460/499, Training Loss: 0.001458
Validation Loss: 0.000500
Epoch 480/499, Training Loss: 0.001512
Validation Loss: 0.000398
Test Loss: 0.000307
model LSTM: LVMUY
Epoch 0/499, Training Loss: 0.236772
Validation Loss: 0.120149
Epoch 20/499, Training Loss: 0.019875
Validation Loss: 0.012058
Epoch 40/499, Training Loss: 0.007782
Validation Loss: 0.000982
Epoch 60/499, Training Loss: 0.005331
Validation Loss: 0.000410
Epoch 80/499, Training Loss: 0.005172
Validation Loss: 0.000492
Epoch 100/499, Training Loss: 0.004462
Validation Loss: 0.000370
Epoch 120/499, Training Loss: 0.003717
Validation Loss: 0.000440
Epoch 140/499, Training Loss: 0.003922
Validation Loss: 0.000744
Epoch 160/499, Training Loss: 0.003245
Validation Loss: 0.000516
Epoch 180/499, Training Loss: 0.003081
Validation Loss: 0.000356
Epoch 200/499, Training Loss: 0.003746
Validation Loss: 0.

Epoch 400/499, Training Loss: 0.001560
Validation Loss: 0.000489
Epoch 420/499, Training Loss: 0.001520
Validation Loss: 0.000546
Epoch 440/499, Training Loss: 0.001462
Validation Loss: 0.000412
Epoch 460/499, Training Loss: 0.001289
Validation Loss: 0.000427
Epoch 480/499, Training Loss: 0.001293
Validation Loss: 0.000394
Test Loss: 0.000456
model LSTM: MDLZ
Epoch 0/499, Training Loss: 0.351904
Validation Loss: 0.209859
Epoch 20/499, Training Loss: 0.060730
Validation Loss: 0.054253
Epoch 40/499, Training Loss: 0.016309
Validation Loss: 0.005731
Epoch 60/499, Training Loss: 0.009899
Validation Loss: 0.001421
Epoch 80/499, Training Loss: 0.006748
Validation Loss: 0.000767
Epoch 100/499, Training Loss: 0.006481
Validation Loss: 0.000622
Epoch 120/499, Training Loss: 0.006219
Validation Loss: 0.000591
Epoch 140/499, Training Loss: 0.005708
Validation Loss: 0.000595
Epoch 160/499, Training Loss: 0.004871
Validation Loss: 0.000638
Epoch 180/499, Training Loss: 0.004678
Validation Loss: 0.0

Epoch 380/499, Training Loss: 0.001564
Validation Loss: 0.000508
Epoch 400/499, Training Loss: 0.001357
Validation Loss: 0.000480
Epoch 420/499, Training Loss: 0.001375
Validation Loss: 0.000540
Epoch 440/499, Training Loss: 0.001315
Validation Loss: 0.000552
Epoch 460/499, Training Loss: 0.001332
Validation Loss: 0.000459
Epoch 480/499, Training Loss: 0.001250
Validation Loss: 0.000509
Test Loss: 0.000422
model LSTM: PEP
Epoch 0/499, Training Loss: 0.171937
Validation Loss: 0.078156
Epoch 20/499, Training Loss: 0.028932
Validation Loss: 0.012069
Epoch 40/499, Training Loss: 0.006627
Validation Loss: 0.001453
Epoch 60/499, Training Loss: 0.004468
Validation Loss: 0.000719
Epoch 80/499, Training Loss: 0.004130
Validation Loss: 0.000531
Epoch 100/499, Training Loss: 0.003877
Validation Loss: 0.000637
Epoch 120/499, Training Loss: 0.003461
Validation Loss: 0.000511
Epoch 140/499, Training Loss: 0.003498
Validation Loss: 0.000495
Epoch 160/499, Training Loss: 0.003512
Validation Loss: 0.00

Epoch 360/499, Training Loss: 0.001313
Validation Loss: 0.000395
Epoch 380/499, Training Loss: 0.001316
Validation Loss: 0.000397
Epoch 400/499, Training Loss: 0.001414
Validation Loss: 0.000298
Epoch 420/499, Training Loss: 0.001343
Validation Loss: 0.000520
Epoch 440/499, Training Loss: 0.001266
Validation Loss: 0.000429
Epoch 460/499, Training Loss: 0.001190
Validation Loss: 0.000287
Epoch 480/499, Training Loss: 0.001068
Validation Loss: 0.000263
Test Loss: 0.000302
model LSTM: XLV
Epoch 0/499, Training Loss: 0.183542
Validation Loss: 0.092639
Epoch 20/499, Training Loss: 0.020208
Validation Loss: 0.016817
Epoch 40/499, Training Loss: 0.007143
Validation Loss: 0.001941
Epoch 60/499, Training Loss: 0.004458
Validation Loss: 0.000718
Epoch 80/499, Training Loss: 0.004462
Validation Loss: 0.000667
Epoch 100/499, Training Loss: 0.003451
Validation Loss: 0.000656
Epoch 120/499, Training Loss: 0.003343
Validation Loss: 0.000579
Epoch 140/499, Training Loss: 0.003211
Validation Loss: 0.00

Epoch 340/499, Training Loss: 0.001751
Validation Loss: 0.000468
Epoch 360/499, Training Loss: 0.001795
Validation Loss: 0.000448
Epoch 380/499, Training Loss: 0.001782
Validation Loss: 0.000405
Epoch 400/499, Training Loss: 0.001531
Validation Loss: 0.000397
Epoch 420/499, Training Loss: 0.001607
Validation Loss: 0.000496
Epoch 440/499, Training Loss: 0.001371
Validation Loss: 0.000425
Epoch 460/499, Training Loss: 0.001428
Validation Loss: 0.000435
Epoch 480/499, Training Loss: 0.001421
Validation Loss: 0.000339
Test Loss: 0.000604
model LSTM: INDA
Epoch 0/499, Training Loss: 0.219872
Validation Loss: 0.103722
Epoch 20/499, Training Loss: 0.017360
Validation Loss: 0.011642
Epoch 40/499, Training Loss: 0.007349
Validation Loss: 0.002624
Epoch 60/499, Training Loss: 0.004624
Validation Loss: 0.000921
Epoch 80/499, Training Loss: 0.003729
Validation Loss: 0.000533
Epoch 100/499, Training Loss: 0.003766
Validation Loss: 0.000575
Epoch 120/499, Training Loss: 0.002728
Validation Loss: 0.0

Epoch 320/499, Training Loss: 0.002589
Validation Loss: 0.000659
Epoch 340/499, Training Loss: 0.002649
Validation Loss: 0.000658
Epoch 360/499, Training Loss: 0.002338
Validation Loss: 0.000597
Epoch 380/499, Training Loss: 0.002486
Validation Loss: 0.000805
Epoch 400/499, Training Loss: 0.002362
Validation Loss: 0.000567
Epoch 420/499, Training Loss: 0.002218
Validation Loss: 0.000489
Epoch 440/499, Training Loss: 0.001944
Validation Loss: 0.000538
Epoch 460/499, Training Loss: 0.001812
Validation Loss: 0.000471
Epoch 480/499, Training Loss: 0.001745
Validation Loss: 0.000541
Test Loss: 0.000485
model LSTM: XLI
Epoch 0/499, Training Loss: 0.163624
Validation Loss: 0.081267
Epoch 20/499, Training Loss: 0.015332
Validation Loss: 0.013159
Epoch 40/499, Training Loss: 0.004906
Validation Loss: 0.001018
Epoch 60/499, Training Loss: 0.003792
Validation Loss: 0.000822
Epoch 80/499, Training Loss: 0.003223
Validation Loss: 0.000688
Epoch 100/499, Training Loss: 0.003079
Validation Loss: 0.00

In [56]:
# RF
rf_models = {}
test_losses = []
val_losses = []
input_size = 6

for stock in stock_60d:
    print("model RF: " + stock)
    model_rf = RandomForestModel()
    model_rf.fit(X_train[stock].reshape(-1,60 * input_size), y_train[stock])

    y_train_pred_rf = model_rf.predict(X_train[stock].reshape(-1,60 * input_size))
    y_val_pred_rf = model_rf.predict(X_val[stock].reshape(-1,60 * input_size))
    y_test_pred_rf = model_rf.predict(X_test[stock].reshape(-1,60 * input_size))

    train_error_rf = mean_squared_error(y_train[stock], y_train_pred_rf)
    val_error_rf = mean_squared_error(y_val[stock], y_val_pred_rf)
    test_error_rf = mean_squared_error(y_test[stock], y_test_pred_rf)
    
    rf_models[stock] = model_rf
    
    val_losses.append(val_error_rf)
    test_losses.append(test_error_rf)
        
    print(f"{stock} - Train Error: {train_error_rf:.6f}, Val Error: {val_error_rf:.6f}, Test Error: {test_error_rf:.6f}")

print("avg val loss", np.mean(np.array(val_losses)))
print("avg test loss", np.mean(np.array(test_losses)))

model RF: DEO
DEO - Train Error: 0.000198, Val Error: 0.000344, Test Error: 0.000357
model RF: DLR
DLR - Train Error: 0.000224, Val Error: 0.000518, Test Error: 0.000369
model RF: ULTA
ULTA - Train Error: 0.000155, Val Error: 0.000314, Test Error: 0.000324
model RF: V
V - Train Error: 0.000086, Val Error: 0.000145, Test Error: 0.000143
model RF: DIS
DIS - Train Error: 0.000216, Val Error: 0.000414, Test Error: 0.000337
model RF: CME
CME - Train Error: 0.000153, Val Error: 0.000229, Test Error: 0.000371
model RF: AAPL
AAPL - Train Error: 0.000084, Val Error: 0.000149, Test Error: 0.000136
model RF: GOOGL
GOOGL - Train Error: 0.000093, Val Error: 0.000163, Test Error: 0.000147
model RF: UNH
UNH - Train Error: 0.000083, Val Error: 0.000100, Test Error: 0.000150
model RF: GS
GS - Train Error: 0.000152, Val Error: 0.000296, Test Error: 0.000269
model RF: QCOM
QCOM - Train Error: 0.000167, Val Error: 0.000342, Test Error: 0.000338
model RF: BA
BA - Train Error: 0.000187, Val Error: 0.000397,

In [57]:
# SVR
SVR_models = {}
test_losses = []
val_losses = []
input_size = 6

for stock in stock_60d:
    print("model RF: " + stock)
    model_svr = SVRModel()
    model_svr.fit(X_train[stock].reshape(-1,60 * input_size), y_train[stock])

    y_train_pred_svr = model_svr.predict(X_train[stock].reshape(-1,60 * input_size))
    y_val_pred_svr = model_svr.predict(X_val[stock].reshape(-1,60 * input_size))
    y_test_pred_svr = model_svr.predict(X_test[stock].reshape(-1,60 * input_size))

    train_error_svr = mean_squared_error(y_train[stock], y_train_pred_svr)
    val_error_svr = mean_squared_error(y_val[stock], y_val_pred_svr)
    test_error_svr = mean_squared_error(y_test[stock], y_test_pred_svr)
    
    SVR_models[stock] = model_svr
    
    val_losses.append(val_error_svr)
    test_losses.append(test_error_svr)
        
    print(f"{stock} - Train Error: {train_error_svr:.6f}, Val Error: {val_error_svr:.6f}, Test Error: {test_error_svr:.6f}")

print("avg val loss", np.mean(np.array(val_losses)))
print("avg test loss", np.mean(np.array(test_losses)))

model RF: DEO
DEO - Train Error: 0.002056, Val Error: 0.001983, Test Error: 0.002124
model RF: DLR
DLR - Train Error: 0.002124, Val Error: 0.002192, Test Error: 0.002033
model RF: ULTA
ULTA - Train Error: 0.002418, Val Error: 0.002274, Test Error: 0.002499
model RF: V
V - Train Error: 0.003521, Val Error: 0.003650, Test Error: 0.003401
model RF: DIS
DIS - Train Error: 0.002010, Val Error: 0.002147, Test Error: 0.002026
model RF: CME
CME - Train Error: 0.002437, Val Error: 0.002121, Test Error: 0.002284
model RF: AAPL
AAPL - Train Error: 0.003174, Val Error: 0.002930, Test Error: 0.002985
model RF: GOOGL
GOOGL - Train Error: 0.002535, Val Error: 0.002636, Test Error: 0.002404
model RF: UNH
UNH - Train Error: 0.003128, Val Error: 0.003182, Test Error: 0.003123
model RF: GS
GS - Train Error: 0.001692, Val Error: 0.001735, Test Error: 0.001752
model RF: QCOM
QCOM - Train Error: 0.001606, Val Error: 0.001698, Test Error: 0.001651
model RF: BA
BA - Train Error: 0.002332, Val Error: 0.002253,

### Mean test error (Next-day Closing Price Prediction)
Note: Closing Price is standardized
* SVR (2.2 * 10-3)
* **RF (2.8 * 10-4)**
* LSTM (4.6 * 10-4)
* DMLP (5.3 * 10-4)