In [13]:
import copy
import warnings
import pywt
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Reshape
from model import Company, StockLog, IndexLog, start_engine
from sqlalchemy import asc
from datetime import date, datetime
import matplotlib.pyplot as plt
from workalendar.oceania import NewSouthWales
%matplotlib inline
warnings.filterwarnings("ignore")

In [14]:
engine = start_engine()
start = datetime.strptime('2009-07-01', '%Y-%m-%d').date()

In [35]:
# Hypers
BATCH_SIZE = 20
EPOCHS = 20
DROPOUT = 0.25
LSTM_UNITS = 100
TRAIN_LIMIT = 1.0
DAYS = 20

In [38]:
diff = ['WOW', 'ABC', 'SYD', 'IAG', 'AGL']
companies = [c for c in Company().query().all() if c.code not in diff]
# companies = [Company().query().get('ABC')]

In [39]:
for company in companies:

    print('Processing', company.code)

    stock_log = StockLog().query().filter(StockLog.company==company)
    df = pd.read_sql(stock_log.statement, engine)
    df.sort_values(by='date', inplace=True)
    df.set_index('date', inplace=True)
    
    sectors = {'Consumer Discretionary': 'discretionary',
               'Consumer Staples': 'staples',
               'Energy': 'energy',
               'Financials': 'financials',
               'Health Care': 'healthcare',
               'Industrials': 'industrials',
               'Information Technology': 'infotech',
               'Materials': 'materials',
               'Telecommunication Services': 'telecom',
               'Utilities': 'utilities',
               'Real Estate': 'realestate'}
    sectors_cpy = copy.deepcopy(sectors)
    sectors_cpy.pop(company.sector)
    sectors_cpy = set([sectors_cpy[sector] for sector in sectors_cpy])

    index_log = IndexLog().query()
    fi = pd.read_sql(index_log.statement, engine)
    indicies = set(fi['index'])
    indicies -= sectors_cpy

    for idx in indicies:
        new = fi.loc[fi['index'] == idx]
        new = new.rename(columns={'value': idx})
        new.sort_values(by='date', inplace=True)
        new.set_index('date', inplace=True)
        new.drop(new.columns[[0]], axis=1, inplace=True)
        df = df.merge(new, how='outer', left_index=True, right_index=True)

    df.rename(columns={sectors[company.sector]: 'sector'}, inplace=True)    
    df.drop(df[df.opening == 0].index, inplace=True)
    df.dropna(subset=['opening'], inplace=True)
    
    df['prime'].interpolate(method='pad', inplace=True)
    df['world'].interpolate(method='pad', inplace=True)
    df['pacific'].interpolate(method='pad', inplace=True)
    df['axvi'].interpolate(method='pad', inplace=True)
    df['sector'].interpolate(method='pad', inplace=True)
    df['aud_usd'].interpolate(method='pad', inplace=True)
    df['twi'].interpolate(method='pad', inplace=True)
    
    df.drop(df[df.index < start].index, inplace=True)
    
    x = np.array(df['closing'])
    cA4, cD4, cD3, cD2, cD1 = pywt.wavedec(x, 'haar', level=4)
    cA4 = pywt.threshold(cA4, np.std(cA4), mode="hard")
    cD4 = pywt.threshold(cD4, np.std(cD4), mode="hard") 
    cD3 = pywt.threshold(cD3, np.std(cD3), mode="hard") 
    cD2 = pywt.threshold(cD2, np.std(cD2), mode="hard") 
    cD1 = pywt.threshold(cD1, np.std(cD1), mode="hard") 
    tx = pywt.waverec((cA4, cD4, cD3, cD2, cD1), 'haar')
    if len(tx) > df.shape[0]:
        tx = tx[1:]
    df['dwt'] = tx
    
    df.drop(df.columns[[0, 1, 2, 3, 4, 5, 6, -13, -12, -11, -10]], axis=1, inplace=True)
    df.dropna(inplace=True)
    dataset = df.values
    
    # Data split
    cut_off = dataset.shape[0] - DAYS
    training = dataset[:cut_off, :]
    testing = dataset[cut_off - BATCH_SIZE:, :]

    # Scale Training
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_scaled = scaler.fit_transform(training)
    test_scaled = scaler.transform(testing)
    
    # Batch up data
    x_train, y_train = [], []
    for i in range(BATCH_SIZE, len(training) - DAYS):
        x_train.append(train_scaled[i - BATCH_SIZE:i, :-1])
        y_train.append(train_scaled[i:i + DAYS, -1])
    x_train, y_train = np.array(x_train), np.array(y_train)
    
    # Define model
    model = Sequential()
    model.add(LSTM(units=LSTM_UNITS, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])))
    model.add(Dropout(DROPOUT))
    model.add(LSTM(units=LSTM_UNITS))
    model.add(Dropout(DROPOUT))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(DAYS))
    
    # Run training
    print('Training model')
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)
    
    # Batch up test data
    x_test = []
    for i in range(BATCH_SIZE, test_scaled.shape[0]):
        x_test.append(test_scaled[i - BATCH_SIZE:i, :-1])
    x_test = np.array(x_test)
    
    # Get prediction
    y_test = model.predict(x_test)
    x_test = x_test[:, 0, :]
    y_cut = y_test[-1:, :]
    y_cut = y_cut.transpose()
    prediction = np.concatenate((x_test, y_cut), axis=1)
    prediction = scaler.inverse_transform(prediction)
    prediction = prediction[:, -1]    
    
    cal = NewSouthWales()
    holidays = []
    for i in range(DAYS):
        holidays.append(cal.add_working_days(max(df.index), i+1))

    print('Writing to database')
    for i in range(DAYS):
        sl = StockLog()
        sl.date = holidays[i]
        sl.code = company.code
        sl.prediction = prediction[i]
        sl.save()        

Processing AWC
Training model
Writing to database
Processing ALU
Training model
Writing to database
Processing NAB
Training model
Writing to database
Processing AMC
Training model
Writing to database
Processing AMP
Training model
Writing to database
Processing ANN
Training model
Writing to database
Processing ANZ
Training model
Writing to database
Processing APA
Training model
Writing to database
Processing ALL
Training model
Writing to database
Processing ASX
Training model
Writing to database
Processing AST
Training model
Writing to database
Processing BOQ
Training model
Writing to database
Processing BEN
Training model
Writing to database
Processing BHP
Training model
Writing to database
Processing BSL
Training model
Writing to database
Processing BLD
Training model
Writing to database
Processing BXB
Training model
Writing to database
Processing CTX
Training model
Writing to database
Processing CGF
Training model
Writing to database
Processing CHC
Training model
Writing to database


KeyboardInterrupt: 