In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
import sys

plt.figure(figsize=(15, 10))

In [None]:
# bid_data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Forex/EURCAD_Candlestick_1_M_BID_01.01.2019-07.11.2020.csv')
# ask_data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Forex/EURCAD_Candlestick_1_M_ASK_01.01.2019-07.11.2020.csv')
bid_data = pd.read_csv('dataset/EURCAD_Candlestick_1_M_BID_01.01.2019-07.11.2020.csv')
ask_data = pd.read_csv('dataset/EURCAD_Candlestick_1_M_ASK_01.01.2019-07.11.2020.csv')

bid_data.columns = bid_data.columns + '_bid'
bid_data = bid_data.rename(columns={'Gmt time_bid': 'Gmt time'})

ask_data.columns = ask_data.columns + '_ask'
ask_data = ask_data.rename(columns={'Gmt time_ask': 'Gmt time'})

print(bid_data.shape)
bid_data.head()

In [None]:
print(ask_data.shape)
ask_data.head()

In [None]:
def extract_datetime(dtime):
  """
   receives a timestamp and returns 
   day in the week,
   year, month,  
  """
  d, t = dtime.split(' ')
  ds = np.array(d.split('.'), dtype=int)[::-1]
  ts = np.array(t.split(':'), dtype=float)
  date = datetime(ds[0], ds[1], ds[2])

  return date.weekday(), ds[0], ds[1], ds[2], ts[0], ts[1], ts[2]
  
extract_datetime(bid_data['Gmt time'][970000])

In [None]:
data = ask_data.join(bid_data.drop('Gmt time', axis=1))
# data = pd.concat([bid_data, ask_data], )
data.head()

In [None]:
expanded_datetime_columns = ['weekday', 'year', 'month', 'day', 'hour', 'minute', 'second']
expanded_datetime = np.array([extract_datetime(x) for x in data['Gmt time']], dtype=int)


data = data.drop('Gmt time', axis=1,)
data = pd.DataFrame(expanded_datetime, columns=expanded_datetime_columns).join(data)

In [None]:
data

In [None]:
idx_start = 200000
idx_end = 200100

data[['Close_bid', 'Close_ask' ]][idx_start:idx_end].plot(figsize=(15, 8))

In [None]:
data[['Volume_ask', 'Volume_bid']][idx_start:idx_end].plot(figsize=(15, 8))

In [None]:
from sklearn.model_selection import train_test_split

X = data.drop(['Open_ask',	'High_ask',	'Low_ask',	'Close_ask',	'Open_bid',	'High_bid',	'Low_bid',
               'Close_bid', 'Volume_ask',	'Volume_bid'], axis=1)
# y = data[['Open_ask',	'High_ask',	'Low_ask',	'Close_ask',	'Open_bid',	'High_bid',	'Low_bid',	'Close_bid']]
y = data['Close_bid']

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False)
print(X_train.shape)
print(y_test.shape)

In [None]:
y_train

In [None]:
cbm10 = y_train.rolling(8).mean()
X_train['cbm'] = cbm10
X_train['cbm'][:10] = X_train['cbm'][10]

In [None]:
def plot_performance(preds, truth, start, forward=1000):
    if forward == -1:
        end = len(y_preds)
    else:
        end = start + forward

    plt.figure(figsize=(15, 10))
    plt.plot(preds[start: end], label='predicted')
    plt.plot(truth[start: end], label='ground truth')
    plt.legend()

# Baseline Models

## Ada Boost

In [None]:
from sklearn.ensemble import AdaBoostRegressor

regr = AdaBoostRegressor(random_state=0, n_estimators=200, loss='exponential', learning_rate=0.5)
regr.fit(X_train, y_train)
regr.score(X_train, y_train)

In [None]:
X_test['cbm'] = np.zeros((y_test.shape[0], 1))
X_test

In [None]:
item = X_test.iloc[0]
item.iloc[[-1]] = 1
item

In [None]:
def predict(X_test, y_train, verbose=1):
  size = y_train.shape[0]

  y_preds = np.zeros(X_test.shape[0])
  y_preds[0] = np.mean(y_train.values[size-8:])

  for index in range(8):
     item = X_test.iloc[index].values
     item[-1] = (np.sum(y_train.values[size-(8 - index):]) + np.sum(y_preds[:index]))/8
     y_preds[index] = regr.predict(item.reshape(1, -1))

  for index in range(8, X_test.shape[0]):   
    item = X_test.iloc[index].values
    item[-1] = np.mean(y_preds[index-8: index])
    y_preds[index] = regr.predict(item.reshape(1, -1))

    if verbose==1 and index % 100 == 0:
      # print('fwe')
      print("", end='\r{:.2f} has completed'.format((index/y_preds.shape[0])*100))
      # sys.stdout.flush()
  print('')
  return y_preds

In [None]:
y_preds = predict(X_test, y_train)
y_preds[:10]

In [None]:
plot_performance(y_preds, y_test.to_numpy(), 0, forward=10000)

## LSTM

In [None]:
X_train = torch.from_numpy(X_train.values)
y_train = torch.from_numpy(y_train.values)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


input_size = 8
seq_length = 1
num_layers = 2
hidden_size = 256
learning_rate = 0.001
num_epochs = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, seq_length, num_layers):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.rnn = nn.RNN(input_size, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size * seq_length, 1)
        
    def forward(self, x):
        
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        
        return out
    
    
model = RNN(input_size, hidden_size, seq_length, num_layers)

        

In [None]:
critirion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
X_train.reshape(X_train.)

In [None]:
for epoch in range(num_epochs):
    
    scores = model(X_train)
    loss = critirion(scores, y_train)
    
    optimizer.zero_grad()
    loss.backward()
    
    optimizer.step()
    