In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import clear_output
import sys

data = pd.read_csv(r'EURUSD.txt', delimiter=',' )

""" Create datetime column"""
print "Data Loading..."
datetimes = []
nb=0
for idx in range(data.shape[0]):
    if (idx%10000)==0:
        nb +=1        
        clear_output()
        print nb
        sys.stdout.flush()
    
    DTYYYYMMDD = str(data['<DTYYYYMMDD>'].iloc[idx])
    TIME       = str(data['<TIME>'].iloc[idx])
    
    year = int(DTYYYYMMDD[:4] )
    month= int(DTYYYYMMDD[4:6])
    day = int(DTYYYYMMDD[6:])
    
    if len(TIME)<5:
        hour = 0
    elif len(TIME)==5:
        hour = int(TIME[0])
    else:
        hour = int(TIME[0:2])
        
    if int(TIME)==0:
        minute = 0
    elif len(TIME)==3:
        minute = int(TIME[0])
    elif len(TIME)==4:
        minute = int(TIME[0:2])
    elif len(TIME)==5:
        minute = int(TIME[1:3])
    else:
        minute=int(TIME[2:4])
    
    dt = datetime(year, month, day, hour, minute)
    datetimes.append(dt)

print "Finished"
data['datetime'] = datetimes

""" Index = datetime  --> for resampling"""
data = data.set_index(['datetime'])
index = data.index
series = pd.Series(data['<CLOSE>'], index=index)

570
Finished


In [3]:
#New aggregation function: create 1 hour from minute by minute dataset
data = data.resample('H').agg({'<OPEN>': 'first', 
                                 '<HIGH>': 'max', 
                                 '<LOW>': 'min', 
                                 '<CLOSE>': 'last'})

data = data.reset_index()                                 

In [4]:
""" Drop weekend NaN rows"""
data = data.dropna(how='any')        
data = data.reset_index()  #To fill holes in indexes

In [5]:
data.columns = ['index','date', 'high', 'close', 'open', 'low']

In [6]:
data = data[['date', 'high', 'close', 'open', 'low']]

# Feature Construction

* <h3>SMA

In [7]:
def SMA(df, column="close", period=20):

    sma = df[column].rolling(window=period, min_periods=period).mean()
    return df.join(sma.to_frame('SMA-'+str(period)))

In [9]:
slow_MA_period = 200

In [8]:
data = SMA(data, period=5)
data = SMA(data, period=20)
data = SMA(data, period=50)
data = SMA(data, period=200)

In [10]:
""" Drop SMA NaN rows"""
data = data.dropna(how='any')        
data = data.reset_index()  #To fill holes in indexes

In [11]:
data = data[data.columns.values[1:]]

* <h3> Crosses

In [12]:
"""
 1 - trend up
-1 - trend down
"""
trend = np.zeros(data.shape[0])
for idx in range(data.shape[0]):
    if data['SMA-20'].iloc[idx]== data['SMA-5'].iloc[idx]:
        trend[idx] = trend[idx-1]
    elif data['SMA-20'].iloc[idx] > data['SMA-5'].iloc[idx]:
        trend[idx] = -1
    else:
        trend[idx] = +1

data['Trend-5/20'] = trend

In [13]:
#Crosses 5-20
cross_prices   = []
cross_idx      = []

for idx in range(1, data.shape[0]):
    if data['Trend-5/20'].iloc[idx] > data['Trend-5/20'].iloc[idx-1]:
        cross_prices.append(data['close'].iloc[idx])
        cross_idx.append(idx)
    elif data['Trend-5/20'].iloc[idx] < data['Trend-5/20'].iloc[idx-1]:
        cross_prices.append(data['close'].iloc[idx])        
        cross_idx.append(idx)

In [14]:
def getCrossType(idx):
    if data['Trend-5/20'].iloc[idx] > data['Trend-5/20'].iloc[idx-1]:
        return 1 #long
    elif data['Trend-5/20'].iloc[idx] < data['Trend-5/20'].iloc[idx-1]:
        return -1 #short

In [15]:
def get_features(idx, ma_hist, past_days, nbCross, past_cross_points):
    #acquire features starting from day before, to avoid LOOKAHEAD BIAS
    features = []
    #past MA values
    for shift in range(1,ma_hist+1):
        features.append(data['SMA-5'].iloc[idx - shift])
        features.append(data['SMA-20'].iloc[idx - shift])
        features.append(data['SMA-50'].iloc[idx - shift])
        features.append(data['SMA-200'].iloc[idx - shift])
  
    #past days ohlc    
    for shift in range(1, past_days+1):
        features.append(data['close'].iloc[idx-shift])  
        features.append(data['high'].iloc[idx-shift]) 
        features.append(data['low'].iloc[idx-shift]) 
        features.append(data['open'].iloc[idx-shift]) 
    
    #past cross points   
    #print nbCross
    #print past_cross_points
    for idx in range(nbCross-past_cross_points, nbCross):
        if (idx < 0):
            print "Error in feat construction"
        features.append(cross_idx[idx])
        features.append(cross_prices[idx]) #time distance
        features.append(getCrossType(cross_idx[idx])) #price distance
    
    return np.array(features)[None,:]

In [16]:
def get_direction(idx):
    idx_23 = 23 - data['date'].iloc[idx].hour
    idx_00 = data['date'].iloc[idx].hour - 0
    if np.sign(data['close'].iloc[idx+idx_23] - data['open'].iloc[idx-idx_00]) > 0:
        return 1
    else:
        return 0

In [17]:
#Parameters 
ma_hist = 14
past_cross_points = 6
past_days_close = 24

* <h3> nbCross

In [18]:
nbCrossArray = np.zeros(data.shape[0])
nbCross = past_cross_points
for idx in range(cross_idx[0]+1,data.shape[0]):
    if idx-1== cross_idx[nbCross+1]:
        last_cross_idx=idx-1
        nbCross +=1

    nbCrossArray[idx]   = nbCross
            

# Feature matrix and normalization

In [20]:
phi_example = get_features(1000, ma_hist, past_days_close, int(nbCrossArray[1000]), past_cross_points)

(1, 170)


In [21]:
nbExamples = 50000

X = np.zeros((nbExamples, phi_example.shape[1]))
for idx in range(nbExamples):
    if idx%100==0:
        clear_output()
        print idx
        sys.stdout.flush()
        
    X[idx, :] = get_features(idx, ma_hist, past_days_close, int(nbCrossArray[idx]), past_cross_points)
    

49900


In [23]:
X_n = X[~np.isnan(X).any(axis=1)]
print X_n.shape

X_n = (X_n - X_n.mean(axis=0)) / X_n.std(axis=0)

(50000, 170)


In [25]:
Y = np.zeros((nbExamples, 1))
for idx in range(nbExamples):
    Y[idx,0] = get_direction(idx)
    
print Y.shape
Y_n = Y[~np.isnan(X).any(axis=1)]
Y_n.shape
print Y_n.shape

(50000, 1)
(50000, 1)


# NN model

In [132]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop, Adam

In [133]:
model = Sequential()
model.add(Dense(150, input_shape = phi_example.squeeze().shape))
model.add(Activation('relu'))

model.add(Dense(1))
model.add(Activation('sigmoid'))
adam = Adam(lr=0.001)
model.compile(optimizer=adam, loss='mse', metrics=['accuracy'])

# NN Training

In [28]:
trainset_idx = int(X_n.shape[0]*0.8)
print trainset_idx

40000


## SL experiment

In [129]:
# Fit the model
model.fit(X_n[cross_idx[past_cross_points]+1:trainset_idx,:], Y_n[cross_idx[past_cross_points]+1:trainset_idx,:], epochs=100, batch_size=24*2, verbose=0, shuffle=False)
print "Fine training..."

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

__Test set error__

In [131]:
# evaluate the model
scores = model.evaluate(X_n[trainset_idx:,:], Y_n[trainset_idx:,:])
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

acc: 64.35%


# NN + TD(0) learning

In [173]:
def generator(batchsize):
    idx = cross_idx[past_cross_points]+1
    
    batch_input = np.zeros((batchsize, phi_example.shape[1]))
    batch_target= np.zeros((batchsize, 1))
    while True:
        for i in range(batchsize):
            phi_state = X_n[idx+i,:][None,:]
            state_hour = data['date'].iloc[idx+i].hour

            if state_hour==23:
                Pt1 = Y_n[idx+i,0]  
            else:
                phi_nextState = X_n[idx+1+i,:][None,:]
                Pt1 = model.predict(phi_nextState, batch_size=1)

            batch_input[i,:] = phi_state
            batch_target[i,:]= Pt1
            
        #print "fine"
        idx += batchsize
        if (idx>=trainset_idx):
            idx=cross_idx[past_cross_points]+1
           
        yield(batch_input, batch_target)

        

In [170]:
batchsize=24*2

In [175]:
g = generator(batchsize)

In [176]:
model.fit_generator(g, steps_per_epoch=(trainset_idx - cross_idx[past_cross_points])/batchsize, epochs=100, verbose=0)

<keras.callbacks.History at 0x7f9abb657310>

<h4> Test set error

In [178]:
# evaluate the model
scores = model.evaluate(X_n[trainset_idx:,:], Y_n[trainset_idx:,:])
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

acc: 57.49%
