<a href="https://colab.research.google.com/github/pyagoubi/Stuff/blob/main/stockpred.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
%%capture
!pip install bt

In [29]:
%matplotlib inline
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
from torch.utils.data import dataloader
import bt as bt
import os, sys, itertools, urllib, io
import datetime as dt
import pandas as pd
import pandas_datareader as dr
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset

In [56]:
tech_daily_raw = pd.read_csv('/content/drive/MyDrive/stock predict/technical/1D_technical.csv')
features = ['open', 'high', 'low', 'close', 'rsi', 'adx', 'cci', 'ema', 'stoch', 'trend_macd', 'momentum_stoch', 'volatility_atr']
tech_daily = tech_daily_raw[features]

# init deterministic seed
seed_value = 1234
np.random.seed(seed_value) # set numpy seed
torch.manual_seed(seed_value); # set pytorch seed CPU

# set cpu or gpu enabled device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu').type

# init deterministic GPU seed
torch.cuda.manual_seed(seed_value)

# log type of device enabled
now = dt.datetime.utcnow().strftime("%Y%m%d-%H:%M:%S")
print('[LOG {}] notebook with \'{}\' computation enabled'.format(str(now), str(device)))





[LOG 20230516-14:28:14] notebook with 'cuda' computation enabled


In [57]:
tech_daily = tech_daily.copy()
tech_daily['return'] = np.log(tech_daily['close']) - np.log(tech_daily['close'].shift(1))

In [58]:
features = features + ['return']

In [59]:
scaler = MinMaxScaler()
tech_daily_scaled = pd.DataFrame(scaler.fit_transform(tech_daily), columns = features)

In [60]:
split_fraction = 0.9
split_row = int(tech_daily.shape[0] * split_fraction)
train_stock_data_return = tech_daily.iloc[:split_row]
valid_stock_data_return = tech_daily.iloc[split_row:]

In [61]:
time_steps = 4 # number of predictor timesteps
horizon = 1 # number of timesteps to be predicted
sequence_length = time_steps + horizon # determine sequence length

In [83]:
def preprocess_data(df, sequence_length):
    # Normalize the data
    #normalized_data = (df - df.mean()) / df.std()

    # Convert the DataFrame to a numpy array
    #data_array = normalized_data.values

    data_array = df.values

    # Split the data into sequences
    sequences = []
    for i in range(len(data_array) - sequence_length):
        sequence = data_array[i : i + sequence_length + 1]
        sequences.append(sequence)

    # Convert sequences to PyTorch tensors
    tensor_sequences = [torch.Tensor(seq) for seq in sequences]

    # Stack the tensor sequences
    stacked_sequences = torch.stack(tensor_sequences)

    return stacked_sequences


class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data) - 1

    def __getitem__(self, idx):
        x = torch.Tensor(self.data[idx, :-1])  # Input features (all columns except the last one)
        y = torch.Tensor([self.data[idx + 1, -1]])  # Target variable ('return' column for the next time step)
        return x, y








In [63]:
train_stock_data_return = preprocess_data(train_stock_data_return, sequence_length) 
valid_stock_data_return = preprocess_data(valid_stock_data_return, sequence_length) 

In [85]:
train_dataset = MyDataset(train_stock_data_return)
valid_dataset = MyDataset(valid_stock_data_return)

In [65]:
import torch
import torch.nn as nn

class MyLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(MyLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 64)  # First linear layer
        self.fc2 = nn.Linear(64, output_size)  # Second linear layer

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)  # Initialize hidden state
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)  # Initialize cell state
        out, _ = self.lstm(x, (h0, c0))  # LSTM layer
        out = self.fc1(out[:, -1, :])  # Extract the last time step output and pass it through the first linear layer
        out = self.fc2(out)  # Second linear layer
        return out

# Example usage
input_size = 12  # Number of input features (columns except 'return')
hidden_size = 128  # Number of hidden units in each LSTM layer
num_layers = 2  # Number of LSTM layers
output_size = 1  # Number of output units (prediction for 'return')

model = MyLSTMModel(input_size, hidden_size, num_layers, output_size)

In [86]:
import torch
import torch.nn as nn
import torch.optim as optim

def train(model, train_dataset, val_dataset, num_epochs, batch_size, learning_rate, device):
    criterion = nn.MSELoss()  # Mean Squared Error loss
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # Adam optimizer
    best_val_loss = float('inf')  # Initialize best validation loss

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size)

    for epoch in range(num_epochs):
        model.train()  # Set model to train mode
        train_loss = 0.0

        for inputs, targets in train_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()  # Clear gradients

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * inputs.size(0)

        train_loss /= len(train_dataset)

        model.eval()  # Set model to evaluation mode
        val_loss = 0.0

        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs = inputs.to(device)
                targets = targets.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                val_loss += loss.item() * inputs.size(0)

            val_loss /= len(val_dataset)

        # Display statistics
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # Save the best model based on validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')

    print("Training completed!")

# Example usage
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Assuming you have train_dataset and val_dataset objects already defined

# Initialize the model
model = MyLSTMModel(input_size, hidden_size, num_layers, output_size)
model.to(device)

# Set hyperparameters
num_epochs = 10
batch_size = 64
learning_rate = 0.001

# Train the model
train(model, train_dataset, valid_dataset, num_epochs, batch_size, learning_rate, device)


ValueError: ignored