In [1]:
import pandas as pd
from methods.make_single_df_test import make_single_df_from_bid_test
from methods.make_5min_volume_from_1_min_volume import make_5min_volume_from_1min_volume
import torch

In [2]:
print(torch.__version__)  # PyTorch version
print(torch.version.cuda)  # CUDA version
print(torch.cuda.is_available()) 

2.6.0+cu118
11.8
True


In [2]:
# pd.set_option('display.float_format', '{:.0f}'.format)

In [3]:
# pd.reset_option('display.float_format')

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)



base_dir = "hist_data/EURUSD/1_min/"

dir_bid = base_dir + "Bid/EURUSD_Candlestick_1_m_BID_01.01.2004-01.01.2007.csv"
dir_ask = base_dir + "Ask/EURUSD_Candlestick_1_M_ASK_01.01.2004-01.01.2007.csv"


df_5min_bid = pd.read_csv(dir_bid, index_col="Gmt time").sort_index(ascending=True)
df_5min_ask = pd.read_csv(dir_ask, index_col="Gmt time").sort_index(ascending=True)

df_joined = make_single_df_from_bid_test(df_bid=df_5min_bid, df_ask=df_5min_ask)

print(df_joined.head())

                            open    close     high      low  volume
Gmt time                                                           
01.01.2004 00:00:00.000  1.25934  1.25953  1.25960  1.25930   951.0
01.01.2004 00:01:00.000  1.25953  1.25904  1.25953  1.25901  1481.5
01.01.2004 00:02:00.000  1.25907  1.25886  1.25910  1.25881  1120.2
01.01.2004 00:03:00.000  1.25901  1.25886  1.25901  1.25886     0.0
01.01.2004 00:04:00.000  1.25901  1.25886  1.25901  1.25886     0.0


In [5]:
df_joined = make_5min_volume_from_1min_volume(df_joined=df_joined)

In [6]:
df_joined.head()

minute_columns,1st_minute_volume,2nd_minute_volume,3rd_minute_volume,4th_minute_volume,5th_minute_volume
timestamp_5min,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-01-01 00:05:00,951.0,1481.5,1120.2,0.0,0.0
2004-01-01 00:10:00,1205.5,398.4,0.0,0.0,0.0
2004-01-01 00:15:00,0.0,0.0,841.3,886.4,0.0
2004-01-01 00:20:00,654.6,510.0,1349.6,1237.7,0.0
2004-01-01 00:25:00,0.0,0.0,0.0,0.0,0.0


LSTM Model Definition

In [None]:
class StockPredictor(nn.Module):
    def __init__(self, input_size=1, hidden_size=50, num_layers=2, output_size=1):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=0.2
        )
        
        self.linear = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
        
        # Forward propagate LSTM
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        
        # Decode the hidden state of the last time step
        out = self.linear(out[:, -1, :])
        return out

Training the Model

In [None]:
# Initialize model
model = StockPredictor()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training parameters
epochs = 100
batch_size = 32

# Training loop
for epoch in range(epochs):
    for i in range(0, len(X_train), batch_size):
        batch_X = X_train[i:i+batch_size]
        batch_y = y_train[i:i+batch_size]
        
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.6f}')

Evaluation and Prediction

In [None]:
# Switch to evaluation mode
model.eval()

# Make predictions
train_predict = model(X_train)
test_predict = model(X_test)

# Inverse transform predictions
train_predict = scaler.inverse_transform(train_predict.detach().numpy())
y_train_actual = scaler.inverse_transform(y_train.detach().numpy())
test_predict = scaler.inverse_transform(test_predict.detach().numpy())
y_test_actual = scaler.inverse_transform(y_test.detach().numpy())

# Plot results
plt.figure(figsize=(12, 6))
plt.plot(y_train_actual, label='Actual Train Prices')
plt.plot(train_predict, label='Predicted Train Prices')
plt.plot(range(len(y_train_actual), len(y_train_actual)+len(y_test_actual)), y_test_actual, label='Actual Test Prices')
plt.plot(range(len(y_train_actual), len(y_train_actual)+len(y_test_actual)), test_predict, label='Predicted Test Prices')
plt.legend()
plt.show()

Future Prediction Function

In [None]:
def predict_future(model, last_sequence, future_days=30):
    model.eval()
    predictions = []
    current_seq = last_sequence.clone()
    
    for _ in range(future_days):
        with torch.no_grad():
            pred = model(current_seq.unsqueeze(0))
            predictions.append(pred.item())
            # Update sequence
            current_seq = torch.cat((current_seq[1:], pred), dim=0)
    
    return scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

# Get last sequence from data
last_seq = X_test[-1]
future_predictions = predict_future(model, last_seq, 30)

plt.figure(figsize=(12, 6))
plt.plot(future_predictions, label='Future Predictions')
plt.title('30-Day Stock Price Forecast')
plt.legend()
plt.show()