In [1]:
import numpy as np
import pandas as pd
import pandas_datareader as pdr
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from pprint import pprint
%matplotlib inline

In [2]:
start = datetime(2004,1,1)
end = datetime(2017,1,1)
test_size=0.25

In [3]:
amzn = pdr.get_data_yahoo('AMZN', start, end)
amzn.drop(['Close', 'Volume'], axis=1, inplace=True)

In [4]:
amzn.columns = ['Open', 'High', 'Low', 'Close'] # Adj Close -> Close

In [5]:
amzn.head()

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-12-31,52.82,53.48,52.400002,52.619999
2004-01-02,52.759998,53.130001,51.43,51.900002
2004-01-05,52.0,53.599998,51.369999,53.27
2004-01-06,53.110001,53.5,52.740002,53.029999
2004-01-07,52.18,52.66,50.939999,51.900002


### Create percentage change list

In [6]:
amzn_pct = amzn.pct_change().fillna(0)

In [7]:
scale_to_idx = int((1-test_size)*len(amzn_pct))
scale_to_idx

2455

In [8]:
scaler = StandardScaler()
scaler.fit(amzn_pct[0:scale_to_idx])
amzn_pct = pd.DataFrame(scaler.transform(amzn_pct), columns=amzn.columns, index=amzn.index)

In [9]:
amzn_pct.head()

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-12-31,-0.040432,-0.041358,-0.041659,-0.040392
2004-01-02,-0.08258,-0.302049,-0.781902,-0.539149
2004-01-05,-0.574893,0.311018,-0.088311,0.921802
2004-01-06,0.751574,-0.115673,1.024803,-0.204617
2004-01-07,-0.690136,-0.666784,-1.406451,-0.817113


### Check they are the same length

In [10]:
len(amzn) - len(amzn_pct)

0

In [11]:
class TradeDTO:
    
    BUY = 'buy'
    SELL = 'sell'
    
    def __init__(self, pct_data, enter_day, exit_day, direction, hold_days, price_diff):
        self.pct_data = pct_data
        self.enter_day = enter_day
        self.exit_day = exit_day
        self.direction = direction
        self.hold_days = hold_days
        self.price_diff = price_diff
        
    def __repr__(self):
        return 'From {} to {} | enter {} | exit {} | {} | {} hold days | {} diff'.format(
            self.pct_data.iloc[0].name.date(),
            self.pct_data.iloc[-1].name.date(),
            self.enter_day.name.date(),
            self.exit_day.name.date(),
            self.direction,
            self.hold_days,
            self.price_diff)
    
    def to_label(self):
        return self.direction + '_' + ('L' if self.hold_days > 5 else 'S')

### Parameters for trade collecting

In [12]:
min_hold_days = 1
max_hold_days = 10
hold_days_diff = max_hold_days - min_hold_days
window = 30

In [None]:
positions = []
for i in range(window, len(amzn)-max_hold_days):
    from_idx = i - window
    to_idx = i + 1
    pct_data_part = amzn_pct.iloc[from_idx:to_idx]
    
    current_day = amzn.iloc[i]
    
    future_from_idx = i+min_hold_days
    future_to_idx = i+max_hold_days
    
    check_data_part = amzn.iloc[future_from_idx:future_to_idx]
    max_ev_date = np.argmax(np.abs(current_day['Close'] - check_data_part['Close']))
    target_day = check_data_part.loc[max_ev_date]
    
    day_diff = len(amzn[current_day.name:target_day.name]) - 1
    price_diff = target_day['Close'] - current_day['Close']
    direction = TradeDTO.BUY if price_diff > 0 else TradeDTO.SELL
    #print('- {} days: {} to {}'.format(str(day_diff), current_day.name.date(), target_day.name.date()))
    trade = TradeDTO(pct_data_part, current_day, target_day, direction, day_diff, price_diff)
    positions.append(trade)

In [None]:
pprint(positions)

# Train Keras NN

### Setup X_train, X_test, etc

In [None]:
X = np.array([x.pct_data.values for x in positions])
X.shape

In [None]:
lb = LabelBinarizer()
y = lb.fit_transform([x.to_label() for x in positions])
y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

Scaling dataset size should be equal to the train set + window size

In [None]:
X_train.shape[0]+window == scale_to_idx

### Build model

In [None]:
batch_size = 16
epochs = 100

In [None]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.losses import mean_squared_error, categorical_crossentropy

In [None]:
model = Sequential()
model.add(Conv1D(32,  3, activation='relu', padding='same', input_shape=X[0].shape))
model.add(Conv1D(64,  4, activation='relu', padding='same'))
model.add(Conv1D(128, 5, activation='relu', padding='same'))
model.add(Conv1D(256, 6, activation='relu', padding='same'))

In [None]:
model.add(Flatten())

model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(y.shape[1]))
model.add(Activation('softmax'))

model.compile(loss=categorical_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=3)
print("Accuracy: %.2f%%" % (scores[1]*100))