## Import

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch.optim as optim
from torch.autograd import Variable

## Custom Dataset
* Stock Dataset Class
* Transform Function

In [None]:
class StockDataset(Dataset):
    def __init__(self, seq_length, file_path, train=True, transform=None):
        
        # read file
        df = pd.read_excel(file_path)
        df_len = len(df) - 1

        df_len -= ( df_len % seq_length )
        total_size = df_len // seq_length
        train_size = int(total_size * 0.7)
        
        f = df.iloc[:(train_size*seq_length), 3:7] # data
        l = df.iloc[1:(train_size*seq_length)+1, 7:] #label
        data_size = train_size
        
        if not train:
            f = df.iloc[(train_size*seq_length):df_len, 3:7] # data
            l = df.iloc[(train_size*seq_length)+1:df_len+1, 7:] #label
            data_size = total_size - train_size
        
        self.features = np.array(f).reshape(data_size, seq_length, 4)
        self.labels = np.array(l).reshape(data_size, seq_length, 1)
        self.transform = transform
    
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):

        features = self.features[index]
        labels = self.labels[index]

        if self.transform:
            features = self.transform(features)
            labels = self.transform(labels)

        return features, labels

In [None]:
# Transform function
def toTensor(x):
  return torch.tensor(x)

## Dataset & Dataloader

In [None]:
# dataset
train_dataset = StockDataset(
    file_path=r'stock.xlsx',
    seq_length=5,
    train=True,
    transform=toTensor
    )

test_dataset = StockDataset(
    file_path = 'stock.xlsx',
    seq_length = 5,
    train = False,
    transform = toTensor
)

In [None]:
fetures, labels = train_dataset[2:9]
fetures.shape, labels.shape

(torch.Size([7, 5, 4]), torch.Size([7, 5, 1]))

In [None]:
# dataloader
train_loader = DataLoader(
    dataset = train_dataset, 
    batch_size = 5,
    shuffle = True
    )

test_loader = DataLoader(
    dataset = test_dataset, 
    batch_size = 5,
    shuffle = False
)

In [None]:
for idx, (seqs, labels) in enumerate(train_loader):
    print(seqs.shape)
    print(seqs)
    break

torch.Size([5, 5, 4])
tensor([[[3114.7700, 3117.6100, 3092.0900, 3103.0400],
         [3090.0700, 3093.4500, 3067.6900, 3078.6100],
         [3064.8500, 3084.2100, 3056.5600, 3080.5300],
         [3078.1700, 3090.8200, 3051.5900, 3052.7900],
         [3036.7900, 3063.5600, 3016.5300, 3061.5000]],

        [[3154.4000, 3159.5400, 3140.0400, 3153.0900],
         [3148.0900, 3167.4500, 3132.0300, 3166.9800],
         [3164.6900, 3186.8400, 3162.5700, 3183.1800],
         [3183.0100, 3205.0500, 3182.8000, 3196.7000],
         [3198.9900, 3219.4100, 3198.9900, 3216.8400]],

        [[3250.2500, 3262.2200, 3246.7000, 3261.6100],
         [3246.2200, 3255.7800, 3229.1300, 3245.2200],
         [3245.8100, 3262.0900, 3221.9300, 3248.5500],
         [3247.3500, 3275.2100, 3241.1200, 3269.4500],
         [3268.9200, 3283.2400, 3262.1200, 3266.9600]],

        [[3134.0100, 3155.9900, 3131.0400, 3153.7400],
         [3146.7500, 3149.1700, 3125.3500, 3130.6700],
         [3125.5900, 3137.5900, 3117.

## LSTM Model

In [None]:
# hyper-parameters
input_size = 4
hidden_size = 32 # memory_cell
num_layers = 1
num_class = 1 # output_layer

In [None]:
class LSTM(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_class):
    super(LSTM, self).__init__()
    
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    
    self.normal = nn.BatchNorm1d(5, affine=True) 
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # batch_first batch 為第一個維度
    self.fc = nn.Linear(hidden_size, num_class)
    self.sigmoid = nn.Sigmoid()
  
  def forward(self, x):
    h0 = Variable(
        # layer num, batch size, hidden size
        torch.zeros(num_layers, x.size(0), hidden_size)
        )
    c0 = Variable(torch.zeros(
        num_layers, x.size(0), hidden_size)
        )
    
    out = self.normal(x)
    out, (h_out, c) = self.lstm(out, (h0, c0))

    out = self.fc(out)
    out = self.sigmoid(out)

    return out

In [None]:
model = LSTM(input_size, hidden_size, num_layers, num_class)
model

LSTM(
  (normal): BatchNorm1d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm): LSTM(4, 32, batch_first=True)
  (fc): Linear(in_features=32, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [None]:
for idx, (seqs, labels) in enumerate(train_loader):
    input = seqs.float()
    output = model(input)
    print('Input Shape:', seqs.shape)
    print('Output Shape:', output.size())
    break

Input Shape: torch.Size([5, 5, 4])
Output Shape: torch.Size([5, 5, 1])


## Loss Function & Optimizer

In [None]:
loss_func = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

## Training Model

In [None]:
for epoch in range(2000):
  for idx, (seqs, labels) in enumerate(train_loader):
    seqs = Variable(seqs).float()
    labels = Variable(labels).float()
    
    optimizer.zero_grad()
    outputs = model(seqs)
    loss = loss_func(outputs, labels.float()) # crossentropy need float number
    loss.backward()
    optimizer.step()

    if (epoch+1) % 100 == 0:
      print('Epoch: %d, batch: %d, Loss: %.4f'%(epoch+1, idx+1, loss.data))

Epoch: 100, batch: 1, Loss: 0.6633
Epoch: 100, batch: 2, Loss: 0.6745
Epoch: 100, batch: 3, Loss: 0.6709
Epoch: 100, batch: 4, Loss: 0.7088
Epoch: 100, batch: 5, Loss: 0.6450
Epoch: 200, batch: 1, Loss: 0.6605
Epoch: 200, batch: 2, Loss: 0.6229
Epoch: 200, batch: 3, Loss: 0.6982
Epoch: 200, batch: 4, Loss: 0.6500
Epoch: 200, batch: 5, Loss: 0.7155
Epoch: 300, batch: 1, Loss: 0.6665
Epoch: 300, batch: 2, Loss: 0.6383
Epoch: 300, batch: 3, Loss: 0.6433
Epoch: 300, batch: 4, Loss: 0.6773
Epoch: 300, batch: 5, Loss: 0.6861
Epoch: 400, batch: 1, Loss: 0.6353
Epoch: 400, batch: 2, Loss: 0.6617
Epoch: 400, batch: 3, Loss: 0.6743
Epoch: 400, batch: 4, Loss: 0.6759
Epoch: 400, batch: 5, Loss: 0.6323
Epoch: 500, batch: 1, Loss: 0.6827
Epoch: 500, batch: 2, Loss: 0.7038
Epoch: 500, batch: 3, Loss: 0.6479
Epoch: 500, batch: 4, Loss: 0.6198
Epoch: 500, batch: 5, Loss: 0.6691
Epoch: 600, batch: 1, Loss: 0.6419
Epoch: 600, batch: 2, Loss: 0.6698
Epoch: 600, batch: 3, Loss: 0.6369
Epoch: 600, batch: 4

## Testing Model

In [None]:
correct = 0
total = 0
for seqs, labels in test_loader:
  seqs = Variable(seqs).float()
  outputs = model(seqs).float()

  predicted = outputs.gt(0.5) # bool
  total += (labels.size(0) * labels.size(1))
  correct += (predicted == labels).sum()

print('Acc: %.3f %%' % (100.0 * float(correct)//float(total)))

Acc: 76.000 %
