In [1]:
import pandas as pd
from bqplot import ColorScale, ColorAxis, DateScale, LinearScale, Axis, Lines, Figure
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)
from ipywidgets import Label, VBox, HBox, Layout, Accordion, Dropdown, widgets, interact, Button, Output
from bqplot.market_map import MarketMap
import os
from datetime import datetime
from iexfinance.stocks import get_historical_data
import numpy as np
import matplotlib.pyplot as plt
from pandas import Series
import warnings
from IPython.display import clear_output
warnings.simplefilter("ignore", DeprecationWarning)

## RETRIEVE AND CLEAN DATA

In [94]:
etf_data = pd.read_excel('../us_etfs.xlsx').drop_duplicates()
etf_data.columns = [i.lower() for i in etf_data.columns]
etf_list = list(etf_data['ticker'])
def get_iex_data(stock_list, start, end):
    return_list = []
    for i in stock_list:
        df = pd.DataFrame(get_historical_data(i, start, end, output_format='pandas')).interpolate()
        df['ticker'] = i
        return_list.append(df)
    return return_list

In [162]:
# full = get_iex_data(etf_list, start=None, end=None)

In [97]:
def lstm_clean_data(data):
    for i in range(len(data)):
        data[i] = data[i].reset_index().dropna()
        data[i]['date'] = pd.to_datetime(data[i]['date'])
        data[i] = data[i].set_index('date')
        data[i]['Reg_Target'] = data[i]['close'].shift(-1)
    return data

def add_past(etf_list, times):
    for i in range(len(etf_list)):
        for n in times:
            etf_list[i]['{}day return'.format(n)] = -etf_list[i]['close'].diff(periods=n).round(3)
    return etf_list

In [98]:
clean_full = lstm_clean_data(full)
data = add_past(clean_full, [1, 5, 21, 252])
# inp = add_past(clean_full, [1,2,3,4,5])

## RUN MODELS

In [7]:
from keras.models import Sequential
from keras.layers import Activation, Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
import timeit


Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.

Using TensorFlow backend.


In [8]:
def lstm_time_test_split(X, n_past, date):
    X = X.reset_index()
    scaler = MinMaxScaler()
    y_scaler = MinMaxScaler()
    ticker = X['ticker'].iloc[0]
    x_train = X[X['date'] < date].drop(columns=['date', 'Reg_Target', 'ticker', '1day return', '5day return', '21day return', '252day return'])
    scaler.fit(x_train)
    x_test = X[X['date'] >= date].drop(columns=['date', 'Reg_Target', 'ticker', '1day return', '5day return', '21day return', '252day return'])[:-1]
    x_train = scaler.transform(x_train)
    x_train = np.reshape(x_train,(x_train.shape[0], n_past, x_train.shape[1]))
    x_test = scaler.transform(x_test)
    x_test = np.reshape(x_test,(x_test.shape[0], n_past, x_test.shape[1]))
    y_train = np.array(X[X['date'] < date]['Reg_Target'].drop(columns='date')).ravel().astype('float').reshape(-1,1)
    y_scaler.fit(y_train)
    y_train = y_scaler.transform(y_train)
    y_test = np.array(X[X['date'] >= date]['Reg_Target'].drop(columns='date')).ravel().astype('float')[:-1].reshape(-1,1)
    y_test = y_scaler.transform(y_test)
    x_holdout = X[X['date'] >= date].drop(columns=['date', 'Reg_Target', 'ticker', '1day return', '5day return', '21day return', '252day return'])[-1:]
    x_holdout = scaler.transform(x_holdout)
    x_holdout = np.reshape(x_holdout,(x_holdout.shape[0], n_past, x_holdout.shape[1]))
#     y_test = scaler.transform(y_test)
    return ticker, x_train, x_test, x_holdout, y_train, y_test, scaler, y_scaler

def build_step_model(x_train, y_train, epoc):
    model = Sequential()
    model.add(LSTM(50, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.add(Activation('relu'))
    model.compile(loss='mse', optimizer='adam')
    history = model.fit(x_train, y_train, epochs=epoc, batch_size=64, validation_split=.07, verbose=2,shuffle=False)
    return model, history

def yield_preds(model, scaler, x_test, x_holdout, y_test):
    yhat = model.predict(x_test)
    preds = scaler.inverse_transform(yhat)
    true = scaler.inverse_transform(y_test)
    today = model.predict(x_holdout)
    today_pred = scaler.inverse_transform(today)
    return preds, today_pred

def run_all_lstms(data, split, epoc):
    out = pd.DataFrame()
    tomorrow = pd.DataFrame()
    start = timeit.default_timer()
    for i in range(len(data)):
        ticker, x_train, x_test, x_holdout, y_train, y_test, scaler, y_scaler = lstm_time_test_split(data[i], 1, split)
        print('Model #: {}'.format(i))
        model, history = build_step_model(x_train, y_train, epoc)
        preds, future = yield_preds(model, y_scaler, x_test, x_holdout, y_test)
        out[ticker] = preds.flatten()
        tomorrow[ticker] = future.flatten()
    out = out.set_index(data[0][-len(out):].index)
    stop = timeit.default_timer()
    print('Time: ', stop - start)
    return out, tomorrow

## MAKE DASHBOARD INPUTS

In [99]:
def make_yesterday_mapframe(ref):
    yesterday_inputs = pd.DataFrame([ref[i].reset_index().iloc[-1] for i in range(len(ref))])
    mapframe = pd.merge(etf_data, yesterday_inputs, on='ticker').drop(columns = 'Reg_Target')
    return mapframe

In [117]:
mapframe_y = make_yesterday_mapframe(data)
mapframe_y.columns = ['ticker','Fund Name','Issuer','Expense Ratio','Total Assets','spread (%)','Segment','date','Open','High','Low','Previous Close',
                    'volume','1 Day return', '1 Week return', '1 Month return','1 Year return']

In [102]:
def make_daily_mapframe(ref, value):
    mapframe = pd.DataFrame()
    for i in range(len(ref)):
        ticker = ref[i]['ticker'].iloc[0]
        mapframe[ticker] = ref[i][value]
#     mapframe = mapframe.set_index(ref[0]['date']) 
    return mapframe

In [103]:
mapframe_daily_close = make_daily_mapframe(data, 'close')

In [123]:
df = [i for i in full if etf_list in list(i['ticker'])]

In [13]:
mapframe_preds = pd.read_csv('important_backup.csv').set_index('date')

In [126]:
def explore(ticker):
    df = [i for i in full if ticker in list(i['ticker'])][0]
    trace = go.Candlestick(x=df.reset_index()['date'],
                           open=df['open'],
                           high=df['high'],
                           low=df['low'],
                           close=df['close'])
    layout = go.Layout(title='{} Price (USD)'.format(ticker),xaxis = dict(
            rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(count=6,
                     label='6m',
                     step='month',
                     stepmode='backward'),
                dict(step='all')
            ])),
            rangeslider = dict(
                visible = False)))
    inp = [trace]
    fig = go.Figure(data=inp,layout=layout)
    iplot(fig)
interact(explore, ticker=etf_list)

<function __main__.explore>

In [161]:
col = ColorScale()
segments = mapframe_y['Segment'].values
issuers = mapframe_y['Issuer'].values
market_map1 = MarketMap(names=etf_list,      
                       # basic data which needs to set for each map
                       ref_data=mapframe_y, groups=segments, cols=50, row_groups=8,
                       tooltip_fields=['Fund Name', 'Segment', 'Issuer', 'Expense Ratio', 'Total Assets', 'Previous Close', '1 Day return', '1 Week return', '1 Month return', '1 Year return'],
                       scales={'color': col},
                       layout={'min_width': '1500px', 'min_height': '1100px'})
market_map1.font_style = {'font-size': '8px', 'fill':'black'}
market_map1.title = 'ETF Market Map'
out = Output()

drop1 = Dropdown(options=['Groups', '1 Day return', '1 Week return', '1 Month return', '1 Year return'], value= 'Groups', description='Color By:', disabled=False,)
drop2 = Dropdown(options=['Segment', 'Issuer'], value='Segment', description='Group By:', disabled=False,)
button1 = Button(description='Clear Plots')
selectors = HBox([drop1, drop2, button1])


def on_d1_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        market_map1.color = mapframe_y[change['new']]
        market_map1.axes = [ColorAxis(scale=col, label=change['new'], visible=True)]
        
def on_d2_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        market_map1.groups = mapframe_y[change['new']]
        
def on_box_change(change):
    with out:
        df = [i for i in full if change['new'][-1] in list(i['ticker'])][-1]
        trace = go.Candlestick(x=df.reset_index()['date'], open=df['open'], high=df['high'], low=df['low'], close=df['close'])
        layout = go.Layout(title='{} Price (USD)'.format(change['new'][-1]),xaxis = dict(
                rangeselector=dict(
                buttons=list([
                    dict(count=1, label='1m', step='month', stepmode='backward'),
                    dict(count=6, label='6m', step='month', stepmode='backward'),
                    dict(step='all')
                ])),
                rangeslider = dict(visible = False)))
        inp = [trace]
        fig = go.Figure(data=inp,layout=layout)
        iplot(fig)
    
def clear_plots(click):
    out.clear_output()
    
drop1.observe(on_d1_change)
drop2.observe(on_d2_change)
button1.on_click(clear_plots)
market_map1.observe(on_box_change, 'selected')
first_page = VBox([selectors, market_map1, out])
first_page