In [2]:
import numpy as np
import pandas as pd
import datetime
import torch
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
from matplotlib import pyplot as plt
from scipy.interpolate import make_interp_spline, BSpline
from sklearn.model_selection import train_test_split

import os
print(os.listdir("./datasets"))



['AUDUSD-2000-2020-15m.csv', 'EURCHF-2000-2020-15m.csv', 'EURJPY-2000-2020-15m.csv', 'EURUSD-2000-2020-15m.csv', 'EUR_USD Historical Data.csv', 'USDCAD-2000-2020-15m.csv', 'USDCHF-2000-2020-15m.csv', 'USDJPY-2000-2020-15m.csv']


In [3]:
class ArrayDataset(Dataset):
    def __init__(self, datasets):
        super(ArrayDataset, self).__init__()
        self._length = len(datasets[0])
        for i, data in enumerate(datasets):
            assert len(data) == self._length, \
                "All arrays must have the same length; \
                array[0] has length %d while array[%d] has length %d." \
                % (self._length, i+1, len(data))
        self.datasets = datasets

    def __len__(self):
        return self._length

    def __getitem__(self, idx):
        return tuple(torch.from_numpy(data[idx]).float() \
                     for data in self.datasets)
    
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

In [4]:
class FXDataModule(pl.LightningDataModule):
    def __init__(self, data_dir, batch_size, length, source_len, target_len, step):
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.length = length
        self.source_len = source_len
        self.target_len = target_len
        self.step = step

    def split_sequence(self, source, target, source_len, target_len, step, target_start_next):
        """ Split sequence with sliding window into
            sequences of context features and target.
            Args:
                source (np.array): Source sequence
                target (np.array): Target sequence
                source_len (int): Length of input sequence.
                target_len (int): Length of target sequence.
                target_start_next (bool): If True, target sequence
                        starts on the next time step of last step of source
                        sequence. If False, target sequence starts at the
                        same time step of source sequence.
            Return:
                X (np.array): sequence of features
                y (np.array): sequence of targets
        """
        assert len(source) == len(target), \
                'Source sequence and target sequence should have the same length.'

        X, y = list(), list()
        if not target_start_next:
            target = np.vstack((np.zeros(target.shape[1], dtype=target.dtype), target))
        for i in range(0, len(source), step):
            # Find the end of this pattern:
            src_end = i + source_len
            tgt_end = src_end + target_len
            # Check if beyond the length of sequence:
            if tgt_end > len(target):
                break
            # Split sequences:
            X.append(source[i:src_end, :])
            y.append(target[src_end:tgt_end, :])
        return np.array(X), np.array(y)

    def prepare_data(self):
        df = pd.read_csv(self.data_dir, parse_dates=['DATE_TIME'])
        self.data = df.iloc[:,1:].values
        self.src, self.tgt = self.split_sequence(
                self.data,
                self.data,
                self.source_len,
                self.target_len,
                self.step,
                True
        )

    def setup(self):
        # Split data into training set and test set :
        test_idx = int(len(self.src) * 0.7)
        src_train, src_test, tgt_train, tgt_test \
            = self.src[:test_idx], self.src[test_idx:], self.tgt[:test_idx], self.tgt[test_idx:]
        # Split training data into train set and validation set:
        src_train, src_val, tgt_train, tgt_val \
            = train_test_split(src_train, tgt_train, test_size=0.25, random_state=1)
        # Prepare datasets
        self.trainset = ArrayDataset([src_train, tgt_train])
        self.valset = ArrayDataset([src_val, tgt_val])
        self.testset = ArrayDataset([src_test, tgt_test])

    def train_dataloader(self):
        self.trainloader = DataLoader(
                self.trainset,
                batch_size=self.batch_size,
                shuffle=True
        )
        return self.trainloader

    def val_dataloader(self):
        self.valloader = DataLoader(
                self.valset,
                batch_size=self.batch_size,
                shuffle=False
        )
        return self.valloader

    def test_dataloader(self):
        self.testloader = DataLoader(
                self.testset,
                batch_size=self.batch_size,
                shuffle=False
        )
        return self.testloader

In [5]:
class FXModule(pl.LightningModule):
    def __init__(self,input_size=1, hidden_size=100, output_size=1):
        
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.linear = nn.Linear(hidden_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_size),
                            torch.zeros(1,1,self.hidden_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

In [9]:
from argparse import ArgumentParser

# Parse arguments:
parser = ArgumentParser()
parser = pl.Trainer.add_argparse_args(parser)
parser.add_argument("--batch_size", default=32)
parser.add_argument("--learning_rate", default=1e-3, type=float)
parser.add_argument("--cuda", default=False)

"""
args = parser.parse_args()

# Model & data module:
detector = PropagandaDetector(hparams=args)
prop_dm = PropagandaDataModule()

# Train & valid:
trainer = pl.Trainer.from_argparse_args(args, fast_dev_run=True)
trainer.fit(detector, prop_dm)
"""

fx_dm = FXDataModule(
        data_dir="./datasets/EURUSD-2000-2020-15m.csv",
        batch_size=256,
        length=100,
        source_len=192,
        target_len=4,
        step=4
)

fx = FXModule(
    source_size = 4
    target_size = 4
    hidden_size = 256
    num_layers = 1
    bidirectional=False,
    dropout=0
    lr=1e-3
)

fx_dm.prepare_data()
fx_dm.setup()
cac, lon = next(iter(fx_dm.train_dataloader()))
print(cac.shape, lon.shape)

torch.Size([256, 192, 4]) torch.Size([256, 4, 4])
