In [1]:
import pandas as pd
import matplotlib as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch

In [2]:
def clean(path):
    df = pd.read_csv(path)
    df['time'] = pd.to_datetime(df['time'])
    return df

df = clean("train.csv")
df.head

<bound method NDFrame.head of                      time       low      high      open     close       volume
0     2015-11-30 12:00:00    367.50    379.42    378.16    373.00  3031.105717
1     2015-11-30 18:00:00    372.25    378.55    373.00    376.86  2652.465161
2     2015-12-01 00:00:00    375.80    378.94    376.88    378.01  1695.388592
3     2015-12-01 06:00:00    354.60    378.54    378.00    361.20  2682.538466
4     2015-12-01 12:00:00    355.00    364.86    361.21    360.14  2864.005011
...                   ...       ...       ...       ...       ...          ...
11622 2023-11-14 06:00:00  36249.50  36756.16  36642.00  36272.73  1455.611439
11623 2023-11-14 12:00:00  35855.00  36708.43  36272.63  36088.57  5289.680943
11624 2023-11-14 18:00:00  34758.64  36122.76  36090.42  35554.09  7511.702326
11625 2023-11-15 00:00:00  35358.45  35670.90  35554.10  35610.51  1929.755005
11626 2023-11-15 06:00:00  35533.29  36293.22  35610.08  36253.78  1693.626865

[11627 rows x 6 colum

In [3]:
def normalize(df):
    scaler = MinMaxScaler()
    cols = ['low', 'high', 'open', 'close', 'volume']  # adjust with your column names
    df[cols] = scaler.fit_transform(df[cols])
    return df, scaler

def reverse_norm(pred, scaler):
    predictions = pred.reshape(-1, 1)
    real_pred = scaler.inverse_transform(predictions)
    return real_pred
    
norm, scaler = normalize(df)
norm.head

<bound method NDFrame.head of                      time       low      high      open     close    volume
0     2015-11-30 12:00:00  0.005457  0.000341  0.000368  0.000293  0.029584
1     2015-11-30 18:00:00  0.005527  0.000328  0.000293  0.000349  0.025770
2     2015-12-01 00:00:00  0.005580  0.000334  0.000350  0.000366  0.016130
3     2015-12-01 06:00:00  0.005265  0.000328  0.000366  0.000120  0.026073
4     2015-12-01 12:00:00  0.005271  0.000129  0.000120  0.000104  0.027901
...                   ...       ...       ...       ...       ...       ...
11622 2023-11-14 06:00:00  0.538342  0.530274  0.531431  0.526014  0.013715
11623 2023-11-14 12:00:00  0.532483  0.529579  0.526022  0.523317  0.052333
11624 2023-11-14 18:00:00  0.516201  0.521047  0.523353  0.515490  0.074715
11625 2023-11-15 00:00:00  0.525109  0.514464  0.515499  0.516316  0.018491
11626 2023-11-15 06:00:00  0.527706  0.523530  0.516319  0.525737  0.016112

[11627 rows x 6 columns]>

In [4]:
def create_sequences(df, window_size):
    sequences = []
    df_size = len(df)
    for i in range(df_size - window_size):
        # Here, i: start of the sequence, i + window_size: end of the sequence
        sequence = df[i:i + window_size]
        label = df[i + window_size:i + window_size + 1]  # next value to be predicted
        sequences.append((sequence, label))
    return sequences

window_size = 10  # This is just an example value

# Assuming 'close' is what you want to predict
sequences = create_sequences(norm['close'].values, window_size)
sequences = [(torch.FloatTensor(seq), torch.FloatTensor(lbl)) for seq, lbl in sequences]

In [5]:
def split_sequences(sequences, test_size=0.4, val_size=0.5, random_state=42):
    X, y = zip(*sequences)  # No change here, but X and y are already tensors
    X = torch.stack(X)  # Stack all sequence tensors
    y = torch.stack(y).squeeze()  # Stack all label tensors and remove extra dimension

    # Shuffle the data
    indices = torch.randperm(X.size(0))
    X, y = X[indices], y[indices]

    # Calculate split sizes
    train_end = int(X.size(0) * (1 - test_size))
    val_end = int(train_end * (1 - val_size))

    # Split the data
    X_train, X_temp = X[:train_end], X[train_end:]
    y_train, y_temp = y[:train_end], y[train_end:]
    X_val, X_test = X_temp[:val_end], X_temp[val_end:]
    y_val, y_test = y_temp[:val_end], y_temp[val_end:]

    return X_train, X_val, X_test, y_train, y_val, y_test

X_train, X_val, X_test, y_train, y_val, y_test = split_sequences(sequences)

In [6]:
from torch.utils.data import Dataset, DataLoader, TensorDataset

# Assuming you have already run the modified split_sequences function
# and have X_train, X_val, X_test, y_train, y_val, y_test

# Create TensorDataset
train_data = TensorDataset(X_train, y_train)
val_data = TensorDataset(X_val, y_val)
test_data = TensorDataset(X_test, y_test)

# Create DataLoaders
batch_size = 64  # Adjust as necessary
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_data, batch_size=batch_size)
test_loader = DataLoader(test_data, batch_size=batch_size)

In [13]:
import torch.nn as nn

class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_layer_size, output_size):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        # Setting bidirectional=True doubles the output feature dimension 
        # because it concatenates the hidden states from both directions
        self.lstm = nn.LSTM(input_size, hidden_layer_size, 
                            batch_first=True, bidirectional=True)

        # Since the LSTM is bidirectional, we need to double the input feature dimension
        self.linear = nn.Linear(hidden_layer_size * 2, output_size)

    def forward(self, input_seq):
        # lstm_out shape is (batch, seq_len, num_directions * hidden_size)
        lstm_out, _ = self.lstm(input_seq.view(len(input_seq), -1, input_size))
        
        # We take the last time step's output from both directions
        predictions = self.linear(lstm_out[:, -1, :])
        return predictions
# Instantiate the model, loss function, and optimizer
num_features = 5
input_size = num_features  # number of features in your input
hidden_layer_size = 180  # number of features in hidden state
output_size = 1  # predict one feature

model = BiLSTM(input_size, hidden_layer_size, output_size)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [14]:
epochs = 500

for epoch in range(epochs):
    # Training
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        y_pred = model(inputs)
        labels = labels.view_as(y_pred)
        single_loss = criterion(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            y_pred = model(inputs)
            val_loss += criterion(y_pred, labels).item()
    val_loss /= len(val_loader)

    print(f'Epoch {epoch+1} train_loss: {single_loss.item():.10f} val_loss: {val_loss:.10f}')

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1 train_loss: 0.0002138336 val_loss: 0.1033149486
Epoch 2 train_loss: 0.0001650601 val_loss: 0.1051297264
Epoch 3 train_loss: 0.0001400913 val_loss: 0.1039687841
Epoch 4 train_loss: 0.0001309434 val_loss: 0.1047549744
Epoch 5 train_loss: 0.0000737583 val_loss: 0.1036933917
Epoch 6 train_loss: 0.0000410145 val_loss: 0.1051873284
Epoch 7 train_loss: 0.0000535088 val_loss: 0.1037446519
Epoch 8 train_loss: 0.0000598358 val_loss: 0.1040683583
Epoch 9 train_loss: 0.0000948525 val_loss: 0.1054801428
Epoch 10 train_loss: 0.0000612892 val_loss: 0.1043496766
Epoch 11 train_loss: 0.0000644349 val_loss: 0.1049921801
Epoch 12 train_loss: 0.0001009529 val_loss: 0.1037126994
Epoch 13 train_loss: 0.0002575785 val_loss: 0.1045449342
Epoch 14 train_loss: 0.0000852729 val_loss: 0.1034034552
Epoch 15 train_loss: 0.0000473852 val_loss: 0.1050553957
Epoch 16 train_loss: 0.0001252121 val_loss: 0.1048035968
Epoch 17 train_loss: 0.0001397037 val_loss: 0.1039989091
Epoch 18 train_loss: 0.0000708899 val_lo

Epoch 145 train_loss: 0.0000071465 val_loss: 0.1040665324
Epoch 146 train_loss: 0.0000776760 val_loss: 0.1052016245
Epoch 147 train_loss: 0.0000541024 val_loss: 0.1029275522
Epoch 148 train_loss: 0.0000535775 val_loss: 0.1044841012
Epoch 149 train_loss: 0.0000406462 val_loss: 0.1047809406
Epoch 150 train_loss: 0.0000135275 val_loss: 0.1035545261
Epoch 151 train_loss: 0.0000205243 val_loss: 0.1045386260
Epoch 152 train_loss: 0.0000213462 val_loss: 0.1042629262
Epoch 153 train_loss: 0.0000578603 val_loss: 0.1048055375
Epoch 154 train_loss: 0.0000550599 val_loss: 0.1047439008
Epoch 155 train_loss: 0.0000118497 val_loss: 0.1039134580
Epoch 156 train_loss: 0.0000580654 val_loss: 0.1035440393
Epoch 157 train_loss: 0.0000546161 val_loss: 0.1034593135
Epoch 158 train_loss: 0.0000237207 val_loss: 0.1041427101
Epoch 159 train_loss: 0.0000908975 val_loss: 0.1045544962
Epoch 160 train_loss: 0.0000402656 val_loss: 0.1039707998
Epoch 161 train_loss: 0.0002626177 val_loss: 0.1029353787
Epoch 162 trai

Epoch 287 train_loss: 0.0001220944 val_loss: 0.1053543611
Epoch 288 train_loss: 0.0000232583 val_loss: 0.1050008409
Epoch 289 train_loss: 0.0000174439 val_loss: 0.1043428563
Epoch 290 train_loss: 0.0000292206 val_loss: 0.1052122180
Epoch 291 train_loss: 0.0000871979 val_loss: 0.1042719331
Epoch 292 train_loss: 0.0000231507 val_loss: 0.1049744725
Epoch 293 train_loss: 0.0001116695 val_loss: 0.1050421330
Epoch 294 train_loss: 0.0000205714 val_loss: 0.1045285098
Epoch 295 train_loss: 0.0000337549 val_loss: 0.1049742525
Epoch 296 train_loss: 0.0000656032 val_loss: 0.1034051082
Epoch 297 train_loss: 0.0000498414 val_loss: 0.1039215693
Epoch 298 train_loss: 0.0000434044 val_loss: 0.1039640964
Epoch 299 train_loss: 0.0000195901 val_loss: 0.1045529207
Epoch 300 train_loss: 0.0000163615 val_loss: 0.1043009444
Epoch 301 train_loss: 0.0000348103 val_loss: 0.1035740377
Epoch 302 train_loss: 0.0000611310 val_loss: 0.1046006117
Epoch 303 train_loss: 0.0000343433 val_loss: 0.1043848448
Epoch 304 trai

Epoch 429 train_loss: 0.0000392072 val_loss: 0.1046602587
Epoch 430 train_loss: 0.0000753325 val_loss: 0.1050356907
Epoch 431 train_loss: 0.0000720754 val_loss: 0.1049745478
Epoch 432 train_loss: 0.0000374565 val_loss: 0.1042229474
Epoch 433 train_loss: 0.0000423283 val_loss: 0.1051325684
Epoch 434 train_loss: 0.0000294991 val_loss: 0.1050630281
Epoch 435 train_loss: 0.0000326743 val_loss: 0.1050128575
Epoch 436 train_loss: 0.0000564384 val_loss: 0.1043697401
Epoch 437 train_loss: 0.0000537408 val_loss: 0.1043646307
Epoch 438 train_loss: 0.0000075977 val_loss: 0.1039478623
Epoch 439 train_loss: 0.0000791690 val_loss: 0.1048886260
Epoch 440 train_loss: 0.0000180019 val_loss: 0.1044350173
Epoch 441 train_loss: 0.0000318139 val_loss: 0.1049244381
Epoch 442 train_loss: 0.0000240454 val_loss: 0.1044880050
Epoch 443 train_loss: 0.0000417151 val_loss: 0.1038769976
Epoch 444 train_loss: 0.0000754268 val_loss: 0.1039575735
Epoch 445 train_loss: 0.0000364595 val_loss: 0.1042178575
Epoch 446 trai

In [15]:
model.eval()
test_loss = 0.0
predictions = []
actuals = []

with torch.no_grad():
    for inputs, labels in test_loader:
        # Forward pass
        y_pred = model(inputs)
        labels = labels.view_as(y_pred)

        # Calculate the batch loss
        loss = criterion(y_pred, labels)
        test_loss += loss.item()

        # Store predictions and actual values for further analysis if needed
        predictions.append(y_pred.numpy())  # or y_pred.cpu().numpy() if using GPU
        actuals.append(labels.numpy())

# Calculate average loss over the test set
test_loss /= len(test_loader)
print(f'Test Loss: {test_loss:.10f}')
# Test Loss: 0.0000459561

Test Loss: 0.0000408171


In [16]:
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import MinMaxScaler

# Load and normalize the inference data
inf_data = pd.read_csv("inf.csv")
print(inf_data.tail)

# Fit a separate scaler for the 'close' feature
close_scaler = MinMaxScaler()
inf_data['close'] = close_scaler.fit_transform(inf_data[['close']])

# Prepare the initial recent data for prediction
window_size = 10  # Adjust based on your model's training
recent_data = inf_data['close'][-window_size:].values.reshape(1, window_size, 1)  # Reshape for single batch, single feature

# Recursively predict next 5 time steps
num_predictions = 16
predictions = []
bi_predictions = []

for _ in range(num_predictions):
    # Convert recent data to tensor
    recent_data_tensor = torch.FloatTensor(recent_data)

    # Perform inference
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        predicted_next_step = model(recent_data_tensor).numpy()  # Get the model's prediction for the next step

    # Inverse transform to get actual price scale for the predicted step
    predicted_close_price = close_scaler.inverse_transform(predicted_next_step.reshape(-1, 1))[0, 0]
    predictions.append(predicted_close_price)

    # Update the recent data with the predicted value
    recent_data = np.roll(recent_data, -1, axis=1)  # Shift everything one step to the left
    recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data

# predictions now contains the 5 sequential forecasted values
print("Predicted next 5 closing prices:", predictions)

<bound method NDFrame.tail of                     time       low      high      open     close       volume
0    2023-11-16 12:00:00  36260.32  37332.32  37260.42  36316.17  6840.344833
1    2023-11-16 18:00:00  35511.11  36400.18  36314.57  36161.15  7177.317828
2    2023-11-17 00:00:00  36097.28  36674.75  36161.15  36363.27  2676.160827
3    2023-11-17 06:00:00  36131.00  36473.75  36365.33  36382.68  1550.020928
4    2023-11-17 12:00:00  35869.00  36831.99  36382.69  36508.83  4979.856742
..                   ...       ...       ...       ...       ...          ...
151  2023-12-24 06:00:00  43434.62  43721.81  43517.85  43655.99   637.606020
152  2023-12-24 12:00:00  43578.33  43901.70  43658.10  43654.11   882.832758
153  2023-12-24 18:00:00  42614.17  43719.56  43656.52  43025.03  2143.310127
154  2023-12-25 00:00:00  42755.35  43241.15  43025.02  43231.21  1245.264334
155  2023-12-25 06:00:00  43056.87  43339.64  43233.08  43188.37   807.055913

[156 rows x 6 columns]>
Predicted

  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = predicted_next_step  # Insert the prediction at the end of recent_data
  recent_data[0, -1, 0] = pred