In [90]:
import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.metrics import mean_squared_error, classification_report
import matplotlib.pylab as plt
import datetime as dt
import time

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.recurrent import LSTM, GRU
from keras.layers import Convolution1D, MaxPooling1D

%matplotlib inline  

np.set_printoptions(suppress=True)

def parser(x):
    return pd.datetime.strptime(x, '%Y.%m.%d')

### Import file

In [35]:
df = pd.read_csv('EURUSD.csv', header=None, 
                  names = ['date', 'dummy', 'open', 'high', 'low','close','volume'], parse_dates=[0], date_parser=parser)

#drop columns
df.drop(df.loc[:,['dummy','high','low','volume']].head(0).columns, axis=1, inplace=True)

#create day of week field
df['weekday']= df['date'].dt.dayofweek
df.head()
print (df['weekday'].value_counts())

#select from Monday to Friday only
df1=df.loc[df['weekday'].isin([0,1,2,3,4])]

#distribution by field
print (df1['weekday'].value_counts())

2    490
0    489
1    488
3    487
4    487
6    245
Name: weekday, dtype: int64
2    490
0    489
1    488
3    487
4    487
Name: weekday, dtype: int64


In [40]:
df1['close_open']=df1['close'] - df1['open']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


### Preparing Train and Test

In [159]:
data =df1['close_open'].values
train=30

X, Y = [], []
for i in range(0, len(data)-train):
    try:
        x_i = data[i:i+train]
        y_i = data[i+train]
        
        if y_i > 0.:
            y_i = [1.,0.]
        else:
            y_i = [0.,1.]
            
    except:
        break

    X.append(x_i)
    Y.append(y_i)
    
X, Y = np.array(X), np.array(Y)


percentage=0.9
X_train = X[0:int(len(X) * percentage)]
Y_train = Y[0:int(len(X) * percentage)]

X_test = X[int(len(X) * percentage):]
Y_test = Y[int(len(X) * percentage):]


In [160]:
print(X.shape)
print(Y.shape)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(2411, 30)
(2411, 2)
(2169, 30)
(2169, 2)
(242, 30)
(242, 2)


### Distribution by direction(Y)

In [143]:
import collections
print (collections.Counter(Y_train.ravel()))
print (collections.Counter(Y_test.ravel()))

Counter({-1.0: 1090, 1.0: 1073, 0.0: 6})
Counter({1.0: 123, -1.0: 119})


col_0,-1.0
row_0,Unnamed: 1_level_1
-1.0,119
1.0,123


### Model Training

In [161]:

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], EMB_SIZE))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], EMB_SIZE))

In [118]:
print(X.shape)
print(Y.shape)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(2411, 30)
(2411, 1)
(2169, 30, 1)
(2169, 1)
(242, 30, 1)
(242, 1)


In [162]:
layers=[1,30,60,2]
model = Sequential()

model.add(LSTM(
    input_dim=layers[0],
    output_dim=layers[1],
    return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(
    layers[2],
    return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(
    output_dim=layers[3]))
model.add(Activation("softmax"))

start = time.time()
model.compile(optimizer='adam', 
              loss='mse',
              metrics=['accuracy'])
print("> Compilation Time : ", time.time() - start)

  import sys
  import sys


> Compilation Time :  0.030005216598510742


  app.launch_new_instance()


In [163]:
model.fit(
    X_train,
    Y_train,
    batch_size=30,
    nb_epoch=1,
    validation_split=0.05)



Train on 2060 samples, validate on 109 samples
Epoch 1/1


<keras.callbacks.History at 0x1b200f60>

In [164]:
predicted = model.predict(X_test)
predicted = np.reshape(predicted, (predicted.size,))

In [165]:
pd.crosstab(Y_test.ravel(),predicted)

col_0,0.494419038296,0.494422733784,0.494423151016,0.494425624609,0.494426459074,0.494427472353,0.494427740574,0.494428098202,0.494428902864,0.494429200888,...,0.505570828915,0.505571067333,0.505571842194,0.505572199821,0.505572617054,0.505573570728,0.505574345589,0.505576908588,0.505577206612,0.505581021309
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,1,1,1,1,0,0,0,1,1,2,...,0,0,0,1,1,1,0,0,0,0
1.0,0,0,0,0,1,1,1,0,0,0,...,1,1,1,0,0,0,1,1,1,1
