In [2]:
import sys, os
sys.path.append(os.getcwd())

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
import plotly.graph_objects as go
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from utils.data_generator import dataset_generator
from utils.preprocessing import (
    minmax_normalization,
    load_data
)

# USD Stages

## Fetching Dataset

In [4]:
START_DATE="2016-01-01"
END_DATE="2023-01-01"
TICKER="USDIDR=x"

In [5]:
# PLOTTING USD-IDR
df_usd = dataset_generator(START_DATE, END_DATE, TICKER)

In [6]:
df_usd

Unnamed: 0,high,open,low,close,volume
1451520000,13800.000000,13740.000000,13725.000000,13845.000000,0.0
1451606400,13775.000000,13775.000000,13775.000000,13775.000000,0.0
1451865600,13928.000000,13845.000000,13815.000000,13775.000000,0.0
1451952000,13930.000000,13930.000000,13750.000000,13957.000000,0.0
1452038400,13960.000000,13800.000000,13795.000000,13805.000000,0.0
...,...,...,...,...,...
1672012800,15637.500000,15565.900391,15554.299805,15565.900391,0.0
1672099200,15687.299805,15607.000000,15600.299805,15607.000000,0.0
1672185600,15795.299805,15617.500000,15615.000000,15617.500000,0.0
1672272000,15789.000000,15789.000000,15598.099609,15789.000000,0.0


### Candlestick Chart

In [7]:
from utils.visualization import candle_stick, multiple_line_chart

In [8]:
candle_stick(df_usd, "USD to IDR")

### Line Chart (Close Prices)

In [9]:
multiple_line_chart([df_usd,], tickers=["USD to IDR"])

## Data Preprocessing

In [10]:
df_usd = minmax_normalization(df_usd)
df_usd



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,close,scaled
1451520000,13845.000000,-0.424333
1451606400,13775.000000,-0.461819
1451865600,13775.000000,-0.461819
1451952000,13957.000000,-0.364357
1452038400,13805.000000,-0.445754
...,...,...
1672012800,15565.900391,0.497215
1672099200,15607.000000,0.519224
1672185600,15617.500000,0.524847
1672272000,15789.000000,0.616686


In [11]:
look_back = 14
X_train, y_train, X_test, y_test = load_data(df_usd[['scaled']], look_back)

X_train size (1450, 13, 1)
y_train size (1450, 1)
X_test size (363, 13, 1)
y_test size (363, 1)


In [12]:
# generate train and test sets in torch
X_train = torch.from_numpy(X_train).type(torch.float)
y_train = torch.from_numpy(y_train).type(torch.float)
X_test = torch.from_numpy(X_test).type(torch.float)
y_test = torch.from_numpy(y_test).type(torch.float)

In [13]:
X_train.shape

torch.Size([1450, 13, 1])

### Generate Model

In [24]:
# Build model
#####################
input_dim = 1
hidden_dim = 32
num_layers = 2 
output_dim = 1

num_epochs = 100

In [17]:
config = {
    "alpha_vantage": {
        "key": "YOUR_API_KEY", # Claim your free API key here: https://www.alphavantage.co/support/#api-key
        "symbol": "IBM",
        "outputsize": "full",
        "key_adjusted_close": "5. adjusted close",
    },
    "data": {
        "window_size": 20,
        "train_split_size": 0.80,
    }, 
    "plots": {
        "show_plots": True,
        "xticks_interval": 90,
        "color_actual": "#001f3f",
        "color_train": "#3D9970",
        "color_val": "#0074D9",
        "color_pred_train": "#3D9970",
        "color_pred_val": "#0074D9",
        "color_pred_test": "#FF4136",
    },
    "model": {
        "input_size": 1, # since we are only using 1 feature, close price
        "num_lstm_layers": 2,
        "lstm_size": 32,
        "dropout": 0.2,
    },
    "training": {
        "device": "cpu", # "cuda" or "cpu"
        "batch_size": 64,
        "num_epoch": 100,
        "learning_rate": 0.01,
        "scheduler_step_size": 40,
    }
}


In [33]:
class LSTMModel(nn.Module):

    def __init__(self, input_size=1, hidden_layer_size=32, num_layers=2, output_size=1, dropout=.2):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        
        self.linear_1 = nn.Linear(input_size, hidden_layer_size)
        self.relu = nn.ReLU()
        self.lstm = nn.LSTM(hidden_layer_size, hidden_size=self.hidden_layer_size, num_layers=num_layers)
        self.dropout = nn.Dropout(dropout)
        
        self.linear_2 = nn.Linear(num_layers * hidden_layer_size, output_size)

        self.init_weights()

    def init_weights(self):
        for name, params in self.lstm.named_parameters():
            if 'bias' in name:
                nn.init.constant_(params, .0)
            elif 'weight_ih' in name:
                nn.init.kaiming_normal_(params)
            elif 'weight_hh' in name:
                nn.init.orthogonal_(params)
    
    def forward(self, x):
        batchsize = x.shape[0]

        x = self.linear_1(x)
        x = self.relu(x)

        # layer 1
        lstm_out, (h_n, c_n) = self.lstm(x)
        
        # reshape output from hidden cell into [batch, features] for `linear_2`
        x = h_n.permute(1, 0, 2).reshape(batchsize, -1)

        # layer 2
        x = self.dropout(x)
        predictions = self.linear_2(x)
        return predictions[:, -1]

In [31]:
def run_epoch(dataloader, is_training=False):
    epoch_loss = .0

    if is_training:
        model.train()
    else:
        model.eval()

    for idx, (x, y) in enumerate(dataloader):
        if is_training:
            optimizer.zero_grad()
        
        batchsize = x.shape[0]

        x = x.to(config['training']['device'])
        y = y.to(config['training']['device'])

        out = model(x)
        loss = criterion(out.contiguous(), y.contiguous())

        if is_training:
            loss.backward()
            optimizer.step()
        
        epoch_loss += (loss.detach().item() / batchsize)

    lr = scheduler.get_last_lr()[0]

    return epoch_loss, lr

In [34]:
model = LSTMModel(
    input_size=config["model"]["input_size"], 
    hidden_layer_size=config["model"]["lstm_size"],
    num_layers=config["model"]["num_lstm_layers"], 
    output_size=1, 
    dropout=config["model"]["dropout"]
)

In [26]:
nn.ReLU()


ReLU()

In [35]:
model.eval()

LSTMModel(
  (linear_1): Linear(in_features=1, out_features=32, bias=True)
  (relu): ReLU()
  (lstm): LSTM(32, 32, num_layers=2)
  (dropout): Dropout(p=0.2, inplace=False)
  (linear_2): Linear(in_features=64, out_features=1, bias=True)
)

In [49]:
predicted_train = np.array([])

for idx, x in enumerate(X_train):
    x = x.to(config["training"]["device"])
    out = model(x)
    out = out.cpu().detach().numpy()
    predicted_train = np.concatenate((predicted_train, out))

RuntimeError: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 2 is not equal to len(dims) = 3

In [47]:
for idx, data in enumerate(X_train):
    print(idx, data.shape)
    if idx == 9:
        break

0 torch.Size([13, 1])
1 torch.Size([13, 1])
2 torch.Size([13, 1])
3 torch.Size([13, 1])
4 torch.Size([13, 1])
5 torch.Size([13, 1])
6 torch.Size([13, 1])
7 torch.Size([13, 1])
8 torch.Size([13, 1])
9 torch.Size([13, 1])


In [50]:
data.to(config["training"]["device"]).shape

torch.Size([13, 1])

In [27]:
class LSTM(nn.Module):

    def __init__(self, input_size=1, hidden_layer_size=32, num_layers=2, output_size=1, dropout=0.2):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.linear_1 = nn.Linear(input_size, hidden_layer_size)
        self.relu = nn.ReLU()
    
        self.lstm = nn.LSTM(hidden_layer_size, hidden_size=self.hidden_layer_size, num_layers=num_layers, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(num_layers*hidden_layer_size, output_size)
        
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'bias' in name:
                 nn.init.constant_(param, 0.0)
            elif 'weight_ih' in name:
                 nn.init.kaiming_normal_(param)
            elif 'weight_hh' in name:
                 nn.init.orthogonal_(param)

    def forward(self, x):
        batchsize = x.shape[0]

        # layer 1
        x = self.linear_1(x)
        x = self.relu(x)
        
        # LSTM layer
        lstm_out, (h_n, c_n) = self.lstm(x)

        # reshape output from hidden cell into [batch, features] for `linear_2`
        x = h_n.permute(1, 0, 2).reshape(batchsize, -1) 
        
        # layer 2
        x = self.dropout(x)
        predictions = self.linear_2(x)
        return predictions[:,-1]

In [28]:
model = LSTM(input_size=input_dim, hidden_layer_size=hidden_dim, output_size=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.001)

NameError: name 'input_dim' is not defined

In [18]:
def run_epoch(dataloader, is_training=False):
    epoch_loss = 0

    if is_training:
        model.train()
    else:
        model.eval()

    for idx, (x, y) in enumerate(dataloader):
        if is_training:
            optimizer.zero_grad()

        batchsize = x.shape[0]

        x = x.to(config["training"]["device"])
        y = y.to(config["training"]["device"])

        out = model(x)
        loss = criterion(out.contiguous(), y.contiguous())

        if is_training:
            loss.backward()
            optimizer.step()

        epoch_loss += (loss.detach().item() / batchsize)

    lr = scheduler.get_last_lr()[0]

    return epoch_loss, lr

train_dataloader = X_train
val_dataloader = X_test

model = LSTM(input_size=config["model"]["input_size"], hidden_layer_size=config["model"]["lstm_size"], num_layers=config["model"]["num_lstm_layers"], output_size=1, dropout=config["model"]["dropout"])
model = model.to(config["training"]["device"])

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=config["training"]["learning_rate"], betas=(0.9, 0.98), eps=1e-9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=config["training"]["scheduler_step_size"], gamma=0.1)

for epoch in range(config["training"]["num_epoch"]):
    loss_train, lr_train = run_epoch(train_dataloader, is_training=True)
    loss_val, lr_val = run_epoch(val_dataloader)
    scheduler.step()
    
    print('Epoch[{}/{}] | loss train:{:.6f}, test:{:.6f} | lr:{:.6f}'
              .format(epoch+1, config["training"]["num_epoch"], loss_train, loss_val, lr_train))

NameError: name 'LSTM' is not defined

In [51]:
# Train model
#####################
num_epochs = 100
hist = np.zeros(num_epochs)

# Number of steps to unroll
seq_dim =look_back-1  

for t in range(num_epochs):
    # Initialise hidden state
    # Don't do this if you want your LSTM to be stateful
    #model.hidden = model.init_hidden()
    
    # Forward pass
    y_train_pred = model(X_train)

    loss = loss_fn(y_train_pred, y_train)
    if t % 10 == 0 and t !=0:
        print("Epoch ", t, "MSE: ", loss.item())
    hist[t] = loss.item()

    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()

    # Backward pass
    loss.backward()

    # Update parameters
    optimiser.step()

Epoch  10 MSE:  nan
Epoch  20 MSE:  nan
Epoch  30 MSE:  nan
Epoch  40 MSE:  nan
Epoch  50 MSE:  nan
Epoch  60 MSE:  nan
Epoch  70 MSE:  nan
Epoch  80 MSE:  nan
Epoch  90 MSE:  nan


In [46]:
model(X_train)

tensor([[nan],
        [nan],
        [nan],
        ...,
        [nan],
        [nan],
        [nan]], grad_fn=<AddmmBackward0>)

In [47]:
X_train.shape, X_test.shape

(torch.Size([1450, 13, 1]), torch.Size([363, 13, 1]))

In [49]:
y_train.shape, y_test.shape

(torch.Size([1450, 1]), torch.Size([363, 1]))

# JPY Exploration

In [54]:
## PLOTTING JPY-IDR
TICKER = "JPYIDR=X"
DF = dataset_generator(START_DATE, END_DATE, TICKER)

In [51]:
# Fungsi untuk merubah angka outlier
def detect_outlisr_jpy(column):
    return column.apply(lambda value: value / 100 if value > 10000 else ( value * 100 if value < 10 else value))


In [52]:
list_of_attr = ['open', 'close', 'volume', 'high', 'low']
DF[list_of_attr] = DF[list_of_attr].apply(detect_outlisr_jpy)

In [56]:
DF['timestamp'] = pd.to_datetime(DF.index, unit='s')
fig = go.Figure(data=[go.Candlestick(x=DF['timestamp'],
                                     open=DF['open'],
                                     high=DF['high'],
                                     low=DF['low'],
                                     close=DF['close'])])

fig.update_layout(
    title=f'Candlestick Chart {TICKER}',
    xaxis_title='Date',
    yaxis_title='Price',
    width=1000,
    height=800
)

fig.show()

In [47]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(LSTMModel, self).__init__()
        
        self.hidden_sizes = hidden_sizes
        
        self.lstm = nn.LSTM(input_size, hidden_sizes[0], num_layers=3, batch_first=True)
        self.hidden_layers = nn.ModuleList(
            [ nn.Linear(hidden_sizes[i], hidden_sizes[i+1]) for i in range(len(hidden_sizes)-1)]
        )
        self.fc = nn.Linear(hidden_sizes[-1], output_size)
        
    def forward(self, x):
        batch_size = x.size(0)
        seq_len = x.size(1)
        
        h0 = torch.zeros(3, batch_size, self.hidden_sizes[0]).to(x.device)
        c0 = torch.zeros(3, batch_size, self.hidden_sizes[0]).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        
        for layer in self.hidden_layers:
            out = layer(out)
            out = torch.relu(out)
        
        out = self.fc(out[:, -1, :])  # Mengambil output terakhir dari LSTM
        
        return out
# Membangun model
input_size = 7
hidden_sizes = [3, 4, 5]
output_size = 1

model = LSTMModel(input_size, hidden_sizes, output_size)

In [11]:
model

LSTMModel(
  (lstm): LSTM(7, 3, num_layers=3, batch_first=True)
  (hidden_layers): ModuleList(
    (0): Linear(in_features=3, out_features=4, bias=True)
    (1): Linear(in_features=4, out_features=5, bias=True)
  )
  (fc): Linear(in_features=5, out_features=1, bias=True)
)

In [44]:
def train(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            
            inputs = inputs.squeeze(0)
        
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
        
            optimizer.step()
        
            running_loss += loss.item()
        
        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch+1} / {num_epochs}, Loss = {epoch_loss:.4f}")

In [20]:
def test(model, test_loader):
    model.eval()
    predictions = []
    targets = []
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            
            predictions.extend(output.squeeze().tolist())
            targets.extend(labels.squeeze().tolist())
    
    return predictions, targets

In [40]:
class ForexDataset(Dataset):
    def __init__(self, data):
        self.data = data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        sample = self.data[index]
        inputs = torch.Tensor(sample[:-1])
        label = torch.Tensor([sample[-1]])
        return inputs, label


# Contoh penggunaan

# Data latih acak
train_data = [
    [1.2, 2.3, 3.4, 4.5, 5.6, 6.7],
    [2.1, 3.2, 4.3, 5.4, 6.5, 7.6],
    [3.4, 4.5, 5.6, 6.7, 7.8, 8.9],
    # ... tambahkan data latih lainnya
]

# Data uji acak
test_data = [
    [4.5, 5.6, 6.7, 7.8, 8.9, 9.0],
    [5.6, 6.7, 7.8, 8.9, 9.0, 9.1],
    # ... tambahkan data uji lainnya
]

# Inisialisasi dataset dan loader
train_dataset = ForexDataset(train_data)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = ForexDataset(test_data)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [41]:
# implementations

In [42]:
criterion = nn.MSELoss()
learning_rate = 0.001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [48]:
num_epoch = 10
train(model, train_loader, criterion, optimizer, num_epoch)

RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors