<a href="https://colab.research.google.com/github/pyagoubi/Stuff/blob/main/stockpred.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
%%capture
!pip install bt

In [None]:
%matplotlib inline
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
from torch.utils.data import dataloader
#import bt as bt
import os, sys, itertools, urllib, io
import datetime as dt
import pandas as pd
import pandas_datareader as dr
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torch.utils.data import Dataset

In [21]:
tech_daily_raw = pd.read_csv('/content/drive/MyDrive/stock predict/technical/1D_technical.csv')
#features = ['open', 'high', 'low', 'close', 'rsi', 'adx', 'cci', 'ema', 'stoch', 'trend_macd', 'momentum_stoch', 'volatility_atr']
features = ['rsi', 'adx', 'cci', 'ema', 'stoch', 'close']
tech_daily = tech_daily_raw[features].copy()

# init deterministic seed
seed_value = 1234
np.random.seed(seed_value) # set numpy seed
torch.manual_seed(seed_value); # set pytorch seed CPU

# set cpu or gpu enabled device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu').type

# init deterministic GPU seed
torch.cuda.manual_seed(seed_value)

# log type of device enabled
now = dt.datetime.utcnow().strftime("%Y%m%d-%H:%M:%S")
print('[LOG {}] notebook with \'{}\' computation enabled'.format(str(now), str(device)))





[LOG 20230517-06:57:26] notebook with 'cuda' computation enabled


In [None]:
tech_daily['return'] = np.log(tech_daily['close']) - np.log(tech_daily['close'].shift(1))

features = features + ['return']
#features = ['return']

tech_daily = tech_daily[features]

In [22]:
#scaler = MinMaxScaler()
scaler = StandardScaler()
tech_daily_scaled = pd.DataFrame(scaler.fit_transform(tech_daily), columns = features)

In [23]:
split_fraction = 0.8
split_row = int(tech_daily.shape[0] * split_fraction)
train_stock_data_return = tech_daily_scaled.iloc[:split_row]
valid_stock_data_return = tech_daily_scaled.iloc[split_row:]

In [24]:
train_stock_data_return = train_stock_data_return[1:]

In [25]:
time_steps = 4 # number of predictor timesteps
horizon = 1 # number of timesteps to be predicted
sequence_length = time_steps + horizon # determine sequence length

In [26]:
import numpy as np
import torch

def create_sequences(df, seq_length):
    df = df.values  # Convert DataFrame to numpy array
    
    n = df.shape[0]
    xs = np.zeros((n - seq_length, seq_length, df.shape[1]))
    ys = np.zeros((n - seq_length, 1))
    
    for i in range(n - seq_length):
        xs[i] = df[i:(i+seq_length)]
        ys[i] = df[i+seq_length, -1]  # predict the 'return' column one step ahead
    
    # Convert to PyTorch tensors
    X = torch.from_numpy(xs)
    y = torch.from_numpy(ys)
    
    return X, y

In [27]:
train, train_target = create_sequences(train_stock_data_return, sequence_length)
valid, valid_target = create_sequences(valid_stock_data_return, sequence_length)

In [28]:
from torch.utils.data import Dataset

class SequenceDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.sequences[idx], self.targets[idx]

In [33]:
import torch
import torch.nn as nn

class MultivariateLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(MultivariateLSTM, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        self.fc1 = nn.Linear(hidden_size, 10)
        self.fc2 = nn.Linear(10, 1)
        #self.fc3 = nn.Linear(10, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        
        out = self.fc1(out[:, -1, :])
        out = self.fc2(out)
        #out = self.fc3(out)
        
        return out

In [30]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

In [None]:
from torch.utils.data import DataLoader
import copy

def train_model(model, train, train_target, valid, valid_target, learning_rate, num_epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    #criterion = nn.MSELoss()
    criterion = RMSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    train_dataset = SequenceDataset(train, train_target)
    val_dataset = SequenceDataset(valid, valid_target)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')

    train_losses = []
    val_losses = []

    best_predictions = []
    for epoch in range(num_epochs):
        model.train()
        train_epoch_losses = []
        for sequences, targets in train_loader:
            sequences = sequences.float().to(device)  # Convert to float
            targets = targets.float().to(device)  # Convert to float


            # Forward pass
            outputs = model(sequences)
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_epoch_losses.append(loss.item())

        model.eval()
        val_epoch_losses = []
        with torch.no_grad():
            for sequences, targets in val_loader:
                sequences = sequences.float().to(device)  # Convert to float
                targets = targets.float().to(device)  # Convert to float


                outputs = model(sequences)
                loss = criterion(outputs, targets)
                val_epoch_losses.append(loss.item())
     
        train_loss = np.mean(train_epoch_losses)
        val_loss = np.mean(val_epoch_losses)
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
             
        print(f'Epoch {epoch+1}/{num_epochs}, '
              f'Train Loss: {train_loss:.4f}, '
              f'Validation Loss: {val_loss:.4f}')


        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), 'best_model.pth')  # Save the model
            best_epoch = epoch


    # load best model weights
    model.load_state_dict(best_model_wts)

    print(f'Best Loss {best_loss}, Epoch = {best_epoch}')
    
    # Plot the train loss and validation loss per epoch
    plt.figure(figsize=(12, 8))
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title('Loss per epoch')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()


    # generate predictions based on the best model
    train_predictions = []
    valid_predictions = []
    with torch.no_grad():
        for sequences, _ in train_loader:
            sequences = sequences.float().to(device)
            outputs = model(sequences)
            train_predictions.extend(outputs.cpu().numpy().flatten().tolist())

        for sequences, _ in val_loader:
            sequences = sequences.float().to(device)
            outputs = model(sequences)
            valid_predictions.extend(outputs.cpu().numpy().flatten().tolist())

    return train_predictions, valid_predictions

# Use the function:
num_features = train.shape[-1]  # X is your input data
model = MultivariateLSTM(input_size=num_features, hidden_size=40, num_layers=2, output_size=1)
train_predictions, valid_predictions = train_model(model, train, train_target, valid, valid_target, learning_rate=0.001, num_epochs=100)


In [63]:
train_predictions = scaler.inverse_transform(train_predictions)
valid_predictions = scaler.inverse_transform(valid_predictions)

preds = [np.nan] * sequence_length + train_predictions + [np.nan] * sequence_length 




[nan, nan, nan, nan, nan]