In [1]:
import pandas as pd
import numpy as np
import torch
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn

# Load the data
data = pd.read_csv('BTCUSD.csv')

# Drop missing values
data = data.dropna()

# Set the 'time' column to be the index
data.set_index('time', inplace=True)

# Normalize the features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

# Convert the scaled data back into a DataFrame
scaled_data = pd.DataFrame(scaled_data, columns=data.columns, index=data.index)

# Split the data into training and testing sets
train_size = int(len(scaled_data) * 0.8)
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size:]


In [2]:
# Separate the features (X) and target (y)
X_train = train_data.drop('close', axis=1)
y_train = train_data['close']
X_test = test_data.drop('close', axis=1)
y_test = test_data['close']

# Convert the dataframes to numpy arrays
X_train = X_train.values
y_train = y_train.values
X_test = X_test.values
y_test = y_test.values

# Create sequences
def create_sequences(X, y, seq_length):
    Xs, ys = [], []
    for i in range(len(X)-seq_length-1):
        Xs.append(X[i:(i+seq_length)])
        ys.append(y[i+seq_length])
    return np.array(Xs), np.array(ys)

seq_length = 5  # Choose sequence length

X_train_seq, y_train_seq = create_sequences(X_train, y_train, seq_length)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, seq_length)

# Convert the sequences into PyTorch tensors
X_train_seq_tensor = Variable(torch.Tensor(X_train_seq))
y_train_seq_tensor = Variable(torch.Tensor(y_train_seq))
X_test_seq_tensor = Variable(torch.Tensor(X_test_seq))
y_test_seq_tensor = Variable(torch.Tensor(y_test_seq))




In [6]:

# Define the LSTM model
class model(nn.Module):
    def __init__(self, num_features, hidden_size, num_layers, dropout):
        super(model, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size=num_features, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        out, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        # Reshape the output tensor so that we can pass it to the linear layer
        out = out.reshape(-1, self.hidden_size)
        
        out = self.fc(out)
        return out.view(x.size(0), -1, 1)

# Instantiate the model
num_features = X_train_seq.shape[2]  # set this to the actual number of features in each input sequence
hidden_size = 128
num_layers = 2
dropout = 0.2

In [9]:
# Create DataLoaders
batch_size = 256 

train_dataset = TensorDataset(X_train_seq_tensor, y_train_seq_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size)

test_dataset = TensorDataset(X_test_seq_tensor, y_test_seq_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Training loop
losses = []
num_epochs =50
for epoch in range(num_epochs):
    for i, (x_batch, y_batch) in enumerate(train_loader):
        model.train(model.self)
        optimizer.zero_grad()

        y_train_pred = model(x_batch)

        # Only keep the final output of each sequence
        y_train_pred = y_train_pred[:,-1,:]

        loss = criterion(y_train_pred, y_batch)

        loss.backward()
        optimizer.step()

#     if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")
        losses.append(loss.item())




AttributeError: type object 'model' has no attribute 'self'