In [7]:
import pandas as pd
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Set the device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_dir = './Dataset/Equities'

stock_data = {}
for file in os.listdir(data_dir):
    if file.endswith('.csv'):
        stock_name = file.split('.')[0]
        file_path = os.path.join(data_dir, file)
        stock_data[stock_name] = pd.read_csv(file_path)

for stock, data in stock_data.items():
    print(f"{stock} Data:")
    print(data.head())

BA Data:
         Date        Open        High         Low       Close   Adj Close  \
0  2014-05-27  133.089996  134.210007  132.869995  134.169998  115.965096   
1  2014-05-28  134.169998  135.149994  134.169998  134.330002  116.103371   
2  2014-05-29  134.710007  135.199997  134.429993  135.139999  116.803482   
3  2014-05-30  134.669998  135.440002  134.039993  135.250000  116.898544   
4  2014-06-02  135.500000  136.029999  134.770004  135.899994  117.460373   

    Volume  
0  3085500  
1  2609400  
2  2234100  
3  2760300  
4  2629900  
MSBHF Data:
         Date      Open      High       Low     Close  Adj Close  Volume
0  2014-05-27  6.466667  6.466667  6.466667  6.466667   4.680295       0
1  2014-05-28  6.466667  6.466667  6.466667  6.466667   4.680295       0
2  2014-05-29  6.466667  6.466667  6.466667  6.466667   4.680295       0
3  2014-05-30  6.466667  6.466667  6.466667  6.466667   4.680295       0
4  2014-06-02  6.466667  6.466667  6.466667  6.466667   4.680295       0


In [8]:
def preprocess_data(df):
    df = df.dropna()
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    scaler = MinMaxScaler()
    scaled_df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
    return scaled_df, scaler

preprocessed_data = {}
scalers = {}
for stock, data in stock_data.items():
    preprocessed_data[stock], scalers[stock] = preprocess_data(data)

for stock, data in preprocessed_data.items():
    print(f"{stock} Preprocessed Data:")
    print(data.head())

BA Preprocessed Data:
                Open      High       Low     Close  Adj Close    Volume
Date                                                                   
2014-05-27  0.098888  0.089476  0.124918  0.113307   0.062498  0.022423
2014-05-28  0.101998  0.092221  0.128620  0.113770   0.062911  0.017774
2014-05-29  0.103554  0.092367  0.129360  0.116114   0.064999  0.014110
2014-05-30  0.103438  0.093067  0.128250  0.116432   0.065282  0.019247
2014-06-02  0.105828  0.094790  0.130328  0.118313   0.066958  0.017974
MSBHF Preprocessed Data:
                Open      High       Low     Close  Adj Close  Volume
Date                                                                 
2014-05-27  0.090584  0.089442  0.093794  0.093695   0.056429     0.0
2014-05-28  0.090584  0.089442  0.093794  0.093695   0.056429     0.0
2014-05-29  0.090584  0.089442  0.093794  0.093695   0.056429     0.0
2014-05-30  0.090584  0.089442  0.093794  0.093695   0.056429     0.0
2014-06-02  0.090584  0.08944

In [9]:
X_train, X_test, y_train, y_test = {}, {}, {}, {}
for stock in X:
    X_train[stock], X_test[stock], y_train[stock], y_test[stock] = train_test_split(X[stock], y[stock], test_size=0.2, random_state=42)

def to_tensor(data):
    return torch.tensor(data, dtype=torch.float32)

X_train_tensors = {stock: to_tensor(X_train[stock]) for stock in X_train}
X_test_tensors = {stock: to_tensor(X_test[stock]) for stock in X_test}
y_train_tensors = {stock: to_tensor(y_train[stock]) for stock in y_train}
y_test_tensors = {stock: to_tensor(y_test[stock]) for stock in y_test}

class StockLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(StockLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        h_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        c_0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])
        return out

input_size = X_train_tensors[next(iter(X_train_tensors))].shape[2]
hidden_size = 50
num_layers = 2
num_epochs = 10
learning_rate = 0.001

models = {}
for stock in X_train_tensors:
    print(f"Training model for {stock}...")
    model = StockLSTM(input_size, hidden_size, num_layers)
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        model.train()
        outputs = model(X_train_tensors[stock].to(device))
        optimizer.zero_grad()
        loss = criterion(outputs, y_train_tensors[stock].to(device).view(-1, 1))
        loss.backward()
        optimizer.step()
        
        if (epoch+1) % 2 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    
    models[stock] = model

def predict_next_day_return(model, data):
    model.eval()
    last_sequence = data[-seq_length:]
    last_sequence = torch.tensor(last_sequence.values, dtype=torch.float32).unsqueeze(0).to(device)
    with torch.no_grad():
        prediction = model(last_sequence)
    return prediction.item()

initial_investment = 10000  # Starting with $10,000
daily_portfolio_value = []

current_investment = initial_investment
investment_dates = preprocessed_data[next(iter(preprocessed_data))].index[seq_length:]  # Dates for investment
for date in investment_dates:
    predicted_returns = {}
    for stock, model in models.items():
        predicted_returns[stock] = predict_next_day_return(model, preprocessed_data[stock].loc[:date])

    total_predicted_return = sum(predicted_returns.values())
    allocations = {stock: predicted_return / total_predicted_return for stock, predicted_return in predicted_returns.items()}

    next_day_values = {}
    for stock, allocation in allocations.items():
        next_day_close_price = stock_data[stock].loc[date:].iloc[1]['Close']
        next_day_value = next_day_close_price * allocation * current_investment
        next_day_values[stock] = next_day_value

    current_investment = sum(next_day_values.values())
    daily_portfolio_value.append(current_investment)

plt.figure(figsize=(14, 7))
plt.plot(investment_dates, daily_portfolio_value, label='Portfolio Value')
plt.title("Dynamic Portfolio Performance Over Time")
plt.xlabel("Date")
plt.ylabel("Portfolio Value ($)")
plt.legend()
plt.show()

Training model for BA...
Epoch [2/10], Loss: 0.1126
Epoch [4/10], Loss: 0.0924
Epoch [6/10], Loss: 0.0755
Epoch [8/10], Loss: 0.0612
Epoch [10/10], Loss: 0.0501
Training model for MSBHF...
Epoch [2/10], Loss: 0.0756
Epoch [4/10], Loss: 0.0579
Epoch [6/10], Loss: 0.0428
Epoch [8/10], Loss: 0.0306
Epoch [10/10], Loss: 0.0225
Training model for V...
Epoch [2/10], Loss: 0.2574
Epoch [4/10], Loss: 0.2124
Epoch [6/10], Loss: 0.1698
Epoch [8/10], Loss: 0.1288
Epoch [10/10], Loss: 0.0903
Training model for HCA...
Epoch [2/10], Loss: 0.2845
Epoch [4/10], Loss: 0.2428
Epoch [6/10], Loss: 0.2023
Epoch [8/10], Loss: 0.1613
Epoch [10/10], Loss: 0.1185
Training model for NTDOY...
Epoch [2/10], Loss: 0.2500
Epoch [4/10], Loss: 0.2131
Epoch [6/10], Loss: 0.1773
Epoch [8/10], Loss: 0.1415
Epoch [10/10], Loss: 0.1057
Training model for ADM...
Epoch [2/10], Loss: 0.2358
Epoch [4/10], Loss: 0.1978
Epoch [6/10], Loss: 0.1591
Epoch [8/10], Loss: 0.1187
Epoch [10/10], Loss: 0.0769
Training model for LH...
Ep

TypeError: '<' not supported between instances of 'int' and 'Timestamp'