In [None]:
import os.path
from os import path
import requests
from datetime import datetime, date, time, timezone
client_id = '' #your client id would go here
apikey = '' #your API key goes here
callback_url = '' #your callback URL goes here.
    
#TD Ameritrade API requests come with a datetime that is in the form of milliseconds from epoch.
#It's likely we will want to be able to convert standard datetimes into this form, if only to be able to convert dates to epochs
#so that we can request the appropriate date ranges.
def unix_time_millis(mydt):
    epoch = datetime.utcfromtimestamp(0)
    return (mydt - epoch).total_seconds() * 1000.0 

print(datetime.now())
print(unix_time_millis(datetime.now()))

In [None]:
#Here, I have compiled a CSV file of nasdaq traded stocks.  It is from this that I will make a list of the stocks that I want
#to request historical data for.
import csv 
import sys 
from collections import defaultdict
file_name = "nasdaqtraded.csv"
with open(file_name, 'r') as f: 
    reader = csv.reader(f)
    data = list(list(rec) for rec in csv.reader(f, delimiter=',')) #reads csv into a list of lists

stocks = []
for l in data:
    symb = l[1]
    stocks.append(symb)

In [None]:
from joblib import load,dump
#We can form our payload for our request ahead of time based on what type of data we are trying to get.
payload = {'apikey':apikey,
           'frequencyType':'minute',
           'frequency':'30',
           'startDate':'',
           'needExtendedHoursData':'true'}

for symbol in stocks:
    if path.exists("stonks/"+symbol+"30"):
        continue
    stocks30 = [[],[],[],[],[],[]]
    
    endpoint = r"https://api.tdameritrade.com/v1/marketdata/{}/pricehistory".format(symbol)

    #grab the 30min candles for the stock.  Set the date range to the appropriate epochs.  To cover
    #larger time periods, we must make multiple requests.
    for t in range(1556179200000,1578664377362,4000000000):
        sval = str(t)
        payload1['startDate'] = sval
        content = requests.get(url=endpoint,params=payload1)
        data = content.json()       
        
        for entry in data['candles']:
            myvolume = entry['volume']
            mydt = entry['datetime']
            myclose = entry['close']
            myopen = entry['open']
            myhigh = entry['high']
            mylow = entry['low']
            stocks30[0].append(int(mydt))
            stocks30[1].append(float(myvolume))
            stocks30[2].append(float(myopen))
            stocks30[3].append(float(myclose))
            stocks30[4].append(float(myhigh))
            stocks30[5].append(float(mylow))

    #We dump the historical data of each requested stock into it's own file for use later.
    dump(stocks30, "stonks/"+symbol+'30')


In [None]:
#Catalysts such as news, earning reports, dividends, etc are important for stock prediction.
#I manually scraped that information from within the TDAmeritrade GUI, and saved it into a CSV.  Now I can load that CSV into
#Pandas and save it in a more useful format
import pandas as pd
file_name = "Calendar.csv"
data = pd.read_csv(file_name)

#any given news event has an effect on stock prices both before and after the event.  So, we assign a range around the event of
#time periods that a given event will be applicable to
data['datetime'] = pd.to_datetime(data['datetime'])
data['datetime'] = data['datetime'].astype('int64')//1e9
data['range1'] = data['datetime'] - 60000
data['range2'] = data['datetime'] + 60000

from collections import defaultdict
from joblib import load,dump
sdict = defaultdict()
for indx in range(len(data)):
    s = data['symbol'].iloc[indx]
    r1 = data['range1'].iloc[indx]
    r2 = data['range2'].iloc[indx]
    m = int((r1+r2)/2)
    t = data['type'].iloc[indx]
    if s not in sdict:
        sdict[s] = defaultdict()
    sdict[s][(r1,m,r2)] = t
    
dump(sdict,"eventdict")

In [None]:
#At this point I began loading each file of stock data, and saving it in a dataframe with it's relevant news data.
#It is at this point that we also compute potentially useful features for experimenting with
#there is likely some confusing stuff in here - it was a large project with many challenges along the way.
#If someone actually reads through all this and is curious, feel free to email me with your questions and I will
#try to answer the best I can

import csv 
import sys 
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from datetime import *
from collections import defaultdict
from joblib import load,dump  
import os.path
from os import path
file_name = "nasdaqtraded.csv"
with open(file_name, 'r') as f: 
    reader = csv.reader(f)
    data = list(list(rec) for rec in csv.reader(f, delimiter=',')) #reads csv into a list of lists

stocks = []
for l in data:
    symb = l[1]
    mypath = 'stonks/'+symb+'30'
    if not path.exists(mypath):
        continue
    mypath = "dataframes2/"+symb+"df"
    if path.exists(mypath):
        continue        
      
    stocks.append(symb)
    
    
eventdict = load("eventdict")
for symbol in stocks:

    if symbol in found:
        continue
    
    newpath = "dataframes2/"+symbol+"df"
    if path.exists(newpath):
        found.add(symbol)
        continue
    stocks30 = load('stonks/'+symbol+'30')    
    
    if len(stocks30[1]) < 2000:
        found.add(symbol)
        continue
    
  
    df2 = pd.DataFrame()

    df2['vol'] = stocks30[1]
    df2['volpct'] = df2['vol'].pct_change()
    df2['open'] = stocks30[2]
    df2['close'] = stocks30[3]
    df2['high'] = stocks30[4]
    df2['low'] = stocks30[5]
    
    elist = [int(x)/1000 for x in stocks30[0]]
    df2['epoch'] = [int(x) for x in stocks30[0]]
    timelist = [datetime.fromtimestamp(epoch).time() for epoch in elist]
    hourlist = [int(str(t)[:2]) for t in timelist]
    minutelist = [int(str(t)[3:5]) for t in timelist]
    df2['hour'] = hourlist
    df2['minute'] = minutelist
    
    badrows = df2[df2['open'] == 0].index
    df2.drop(badrows , inplace=True)
    badrows = df2[df2['close'] == 0].index
    df2.drop(badrows , inplace=True)
    badrows = df2[df2['high'] == 0].index
    df2.drop(badrows , inplace=True)
    badrows = df2[df2['low'] == 0].index
    df2.drop(badrows , inplace=True)   
    df2 = df2.replace([np.inf, -np.inf], np.nan).dropna() 
    df2.reset_index(drop=True, inplace=True)
    
    df2['mean'] = (df2['open']+df2['close'])/2
    df2['highdiffclose'] = (df2['high']-df2['close'])/df2['close']
    df2['lowdiffclose'] = (df2['close']-df2['low'])/df2['close']
    
        
    #calculate vwap
    df2['pv'] = (df2['high']+df2['low']+df2['close'])/3
    df2['pv'] = df2['pv']*df2['vol']
    df2['voltotal'] = df2['vol'].rolling(200).sum()
    df2['pvtotal'] = df2['pv'].rolling(200).sum()
    df2['vwap'] = df2['pvtotal']/df2['voltotal']    
    del df2['pv']
    del df2['voltotal']
    del df2['pvtotal']    
    
    
    #calculate RSI
    df2['up'] = df2['close'].diff()
    df2['down'] = df2['close'].diff()
    df2.loc[df2['up'] <= 0, 'up'] = 0
    df2.loc[df2['down'] >= 0, 'down'] = 0
    df2['up'] = df2['up'].rolling(14).mean()
    df2['down'] = df2['down'].rolling(14).mean().abs()
    #AvgUt = 1/14 * Ut + 13/14 * AvgUt-1
    uplist = list(df2['up'])
    downlist = list(df2['down'])
    uplist2 = [0]*14
    downlist2 = [0]*14
    #use wilders
    for i in range(14,len(uplist)):
        upval = uplist[i]*1/14+uplist[i-1]*13/14
        downval = downlist[i]*1/14+downlist[i-1]*13/14
        uplist2.append(upval)
        downlist2.append(downval)
    df2['up'] = uplist2
    df2['down'] = downlist2
    df2['rs'] = df2['up']/df2['down']
    df2['rsi'] = 100 - 100/(1+df2['rs']) 
    del df2['up']
    del df2['down']
    del df2['rs']  
    
    df2['closepct'] = df2['close'].pct_change()
    
    
    #percent change trends
    sizelist = [9,30,90,200]
    for size in sizelist:
        df2['median'+str(size)] = df2['closepct'].rolling(size).median()
        df2['pctchange'+str(size)] = df2['close'].pct_change(size)
        df2['highest'] = df2['high'].rolling(size).max()
        df2['lowest'] = df2['low'].rolling(size).min()
        df2['hdc'+str(size)] = (df2['highest']-df2['close'])/df2['close']
        df2['ldc'+str(size)] = (df2['close']-df2['lowest'])/df2['close']
    
    
    df2['sma3'] = df2['close'].rolling(3).mean()
    df2['sma9'] = df2['close'].rolling(9).mean()
    
    #find touches, crosses, etc for vwap, sma3, sma9
    closeabovevwap = []
    closebelowvwap = []
    closeabovesma3 = []
    closebelowsma3 = []
    closeabovesma9 = []
    closebelowsma9 = []    
    
    vwap = list(df2['vwap'])
    sma3 = list(df2['sma3'])
    sma9 = list(df2['sma9'])
    close = list(df2['close']) 
    high = list(df2['high'])
    low = list(df2['low'])
    myopen = list(df2['open'])
    for indx in range(len(vwap)):
        c = close[indx]
        s3 = sma3[indx]
        s9 = sma9[indx]
        v = vwap[indx]
        if c > s3:
            closeabovesma3.append(1)
            closebelowsma3.append(0)
        else:
            closeabovesma3.append(0)
            closebelowsma3.append(1)
        if c > s9:
            closeabovesma9.append(1)
            closebelowsma9.append(0)
        else:
            closeabovesma9.append(0)
            closebelowsma9.append(1)
        if c > v:
            closeabovevwap.append(1)
            closebelowvwap.append(0)
        else:
            closeabovevwap.append(0)
            closebelowvwap.append(1)

    df2['closeabovesma3'] = closeabovesma3
    df2['closebelowsma3'] = closebelowsma3  
    df2['closeabovesma9'] = closeabovesma9  
    df2['closebelowsma9'] = closebelowsma9  
    df2['closeabovevwap'] = closeabovevwap  
    df2['closebelowvwap'] = closebelowvwap  
        
    #remember, touches mean open and close on same side but high and low not on same side    
    tusma3 = []
    tdsma3 = []
    tusma9 = []
    tdsma9 = []
    tuvwap = []
    tdvwap = []
    #remember, crosses means open and close on opp sides
    cusma3 = []
    cdsma3 = []
    cusma9 = []
    cdsma9 = []
    cuvwap = []
    cdvwap = []    
    
    for i in range(len(df2)):
        
        vc = vwap[i]
        s3c = sma3[i]
        s9c = sma9[i]
        c = close[i]
        h = high[i]
        l = low[i]
        o = myopen[i]

        #vwap crosses and touches
        if o <= vc and c > vc:
            #cross up
            cuvwap.append(1)
            cdvwap.append(0)
            tuvwap.append(0)
            tdvwap.append(0)
        elif o <= vc and c <= vc:
            cuvwap.append(0)
            cdvwap.append(0)
            if l <= vc and h > vc:
                tuvwap.append(1)
                tdvwap.append(0) 
            else:
                tuvwap.append(0)
                tdvwap.append(0)
        elif o > vc and c <= vc:
            #cross down
            cuvwap.append(0)
            cdvwap.append(1)
            tuvwap.append(0)
            tdvwap.append(0)                
        elif o > vc and c > vc:
            cuvwap.append(0)
            cdvwap.append(0)
            if l <= vc and h > vc:
                tuvwap.append(0)
                tdvwap.append(1) 
            else:
                tuvwap.append(0)
                tdvwap.append(0) 
        else:
            cuvwap.append(0)
            cdvwap.append(0)
            tuvwap.append(0)
            tdvwap.append(0)            
        
        #sma3 crosses and touches
        if o <= s3c and c > s3c:
            #cross up
            cusma3.append(1)
            cdsma3.append(0)
            tusma3.append(0)
            tdsma3.append(0)
        elif o <= s3c and c <= s3c:
            cusma3.append(0)
            cdsma3.append(0)
            if l <= s3c and h > s3c:
                tusma3.append(1)
                tdsma3.append(0) 
            else:
                tusma3.append(0)
                tdsma3.append(0)
        elif o > s3c and c <= s3c:
            #cross down
            cusma3.append(0)
            cdsma3.append(1)
            tusma3.append(0)
            tdsma3.append(0)                
        elif o > s3c and c > s3c:
            cusma3.append(0)
            cdsma3.append(0)
            if l <= s3c and h > s3c:
                tusma3.append(0)
                tdsma3.append(1) 
            else:
                tusma3.append(0)
                tdsma3.append(0)   
        else:
            cusma3.append(0)
            cdsma3.append(0)
            tusma3.append(0)
            tdsma3.append(0)                   
        
        #sma9 crosses and touches
        if o <= s9c and c > s9c:
            #cross up
            cusma9.append(1)
            cdsma9.append(0)
            tusma9.append(0)
            tdsma9.append(0)
        elif o <= s9c and c <= s9c:
            cusma9.append(0)
            cdsma9.append(0)
            if l <= s9c and h > s9c:
                tusma9.append(1)
                tdsma9.append(0) 
            else:
                tusma9.append(0)
                tdsma9.append(0)
        elif o > s9c and c <= s9c:
            #cross down
            cusma9.append(0)
            cdsma9.append(1)
            tusma9.append(0)
            tdsma9.append(0)                
        elif o > s9c and c > s9c:
            cusma9.append(0)
            cdsma9.append(0)
            if l <= s9c and h > s9c:
                tusma9.append(0)
                tdsma9.append(1) 
            else:
                tusma9.append(0)
                tdsma9.append(0)             
        else:
            cusma9.append(0)
            cdsma9.append(0)
            tusma9.append(0)
            tdsma9.append(0)      

    
    df2['tusma3'] = tusma3
    df2['tdsma3'] = tdsma3
    df2['tusma9'] = tusma9
    df2['tdsma9'] = tdsma9
    df2['tuvwap'] = tuvwap
    df2['tdvwap'] = tdvwap       
    df2['cusma3'] = cusma3
    df2['cdsma3'] = cdsma3
    df2['cusma9'] = cusma9
    df2['cdsma9'] = cdsma9
    df2['cuvwap'] = cuvwap
    df2['cdvwap'] = cdvwap       

    
    
    #confirmations and bouncebacks
    confupsma3 = [0,0]
    confdownsma3 = [0,0]
    confupsma9 = [0,0]
    confdownsma9 = [0,0]
    confupvwap = [0,0]
    confdownvwap = [0,0]
    bbupsma3 = [0,0]
    bbdownsma3 = [0,0]
    bbupsma9 = [0,0]
    bbdownsma9 = [0,0]
    bbupvwap = [0,0]
    bbdownvwap = [0,0]    
    

    #now we do bouncebacks and confirmations
    for i in range(2,len(df2)):
        if (tusma3[i-1] == 1 or tdsma3[i-1] == 1) and (tusma3[i] == 0 and tdsma3[i] == 0 and cusma3[i] == 0 and cdsma3[i] == 0):
            confupsma3.append(0)
            confdownsma3.append(0)
            if close[i] > sma3[i]:
                bbupsma3.append(1)
                bbdownsma3.append(0)
            else:
                bbupsma3.append(0)
                bbdownsma3.append(1)
        else:
            bbupsma3.append(0)
            bbdownsma3.append(0)
            #we can check for confirmation here
            if close[i-2] < sma3[i-2] and cusma3[i-1] == 1 and close[i] > sma3[i]:
                confupsma3.append(1)
                confdownsma3.append(0)
            elif close[i-2] > sma3[i-2] and cdsma3[i-1] == 1 and close[i] < sma3[i]:
                confupsma3.append(0)
                confdownsma3.append(1)
            else:
                confupsma3.append(0)
                confdownsma3.append(0)    
                
        if (tusma9[i-1] == 1 or tdsma9[i-1] == 1) and (tusma9[i] == 0 and tdsma9[i] == 0 and cusma9[i] == 0 and cdsma9[i] == 0):
            confupsma9.append(0)
            confdownsma9.append(0)
            if close[i] > sma9[i]:
                bbupsma9.append(1)
                bbdownsma9.append(0)
            else:
                bbupsma9.append(0)
                bbdownsma9.append(1)
        else:
            bbupsma9.append(0)
            bbdownsma9.append(0)
            #we can check for confirmation here
            if close[i-2] < sma9[i-2] and cusma9[i-1] == 1 and close[i] > sma9[i]:
                confupsma9.append(1)
                confdownsma9.append(0)
            elif close[i-2] > sma9[i-2] and cdsma9[i-1] == 1 and close[i] < sma9[i]:
                confupsma9.append(0)
                confdownsma9.append(1)
            else:
                confupsma9.append(0)
                confdownsma9.append(0)                   
                
        if (tuvwap[i-1] == 1 or tdvwap[i-1] == 1) and (tuvwap[i] == 0 and tdvwap[i] == 0 and cuvwap[i] == 0 and cdvwap[i] == 0):
            confupvwap.append(0)
            confdownvwap.append(0)
            if close[i] > vwap[i]:
                bbupvwap.append(1)
                bbdownvwap.append(0)
            else:
                bbupvwap.append(0)
                bbdownvwap.append(1)
        else:
            bbupvwap.append(0)
            bbdownvwap.append(0)
            #we can check for confirmation here
            if close[i-2] < vwap[i-2] and cuvwap[i-1] == 1 and close[i] > vwap[i]:
                confupvwap.append(1)
                confdownvwap.append(0)
            elif close[i-2] > vwap[i-2] and cdvwap[i-1] == 1 and close[i] < vwap[i]:
                confupvwap.append(0)
                confdownvwap.append(1)
            else:
                confupvwap.append(0)
                confdownvwap.append(0)                   
        
    #now we can place events
    earnings = [0]*len(df2)
    econoday = [0]*len(df2)
    futures = [0]*len(df2)
    split = [0]*len(df2)
    dividend = [0]*len(df2)
    confcall = [0]*len(df2)
    
    if symbol in eventdict:
        curr = eventdict[symbol]
        
        for i in range(len(df2)):
            t = df2['epoch'].iloc[i]/1000
            for (r1,m,r2) in list(curr.keys()):
                if t > r1 and t < r2:
                    mytype = curr[(r1,m,r2)]
                    if t >= m:
                        if mytype == 'Earnings':
                            earnings[i] = 1
                        elif mytype == 'Econoday event':
                            econoday[i] = 1
                        elif mytype == 'Futures Liquidation':
                            futures[i] = 1
                        elif mytype == 'Split':
                            split[i] = 1
                        elif mytype == 'Dividend':
                            dividend[i] = 1
                        elif mytype == 'Conference Call':
                            confcall[i] = 1
                    else:
                        if mytype == 'Earnings':
                            earnings[i] = -1
                        elif mytype == 'Econoday event':
                            econoday[i] = -1
                        elif mytype == 'Futures Liquidation':
                            futures[i] = -1
                        elif mytype == 'Split':
                            split[i] = -1
                        elif mytype == 'Dividend':
                            dividend[i] = -1
                        elif mytype == 'Conference Call':
                            confcall[i] = -1                
      
    
    targets = list(df2['closepct'].rolling(2).sum())
    targets = targets + [0,0]
    targets = targets[2:]
    df2['targets'] = targets
    
    #print(len(bbdownvwap),len(df2))
    
    
    df2['confupsma3'] = confupsma3
    df2['confdownsma3'] = confdownsma3
    df2['confupsma9'] = confupsma9
    df2['confdownsma9'] = confdownsma9
    df2['confupvwap'] = confupvwap
    df2['confdownvwap'] = confdownvwap
    df2['bbupsma3'] = bbupsma3
    df2['bbdownsma3'] = bbdownsma3
    df2['bbupsma9'] = bbupsma9
    df2['bbdownsma9'] = bbdownsma9
    df2['bbupvwap'] = bbupvwap
    df2['bbdownvwap'] = bbdownvwap   
    
    df2['earnings'] = earnings
    df2['econoday'] = econoday
    df2['futures'] = futures
    df2['split'] = split
    df2['dividend'] = dividend
    df2['confcall'] = confcall  
    
    
    dump(df2, "dataframes2/"+symbol+"df")
    placed.append(symbol)
    
    tsize += len(df2)
    

In [None]:
#Now I form datasets for my neural network.  I'll make a testing set, and a training set.  
#I am limited by memory, but there is no reason a person could not make many training sets and alternate
#data between batches.
#The datasets are in the form of numpy arrays, since they will be used with keras/tensorflow.
#My labels array has three rows, each row corresponds to a different "prediction range" - meaning that we
#will only want to use one of them at a time.
import csv 
import sys 
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from datetime import *
from collections import defaultdict
from joblib import load,dump  
import os.path
from os import path
file_name = "nasdaqtraded.csv"
with open(file_name, 'r') as f: 
    reader = csv.reader(f)
    data = list(list(rec) for rec in csv.reader(f, delimiter=',')) #reads csv into a list of lists

stocks = []
for l in data:
    symb = l[1]
    mypath = "dataframes2/"+symb+"df"
    if not path.exists(mypath):
        continue        
      
    stocks.append(symb)

    
    
    
    
import random

random.shuffle(stocks)

pa = np.zeros((60000,500,4)) #closepct,hdc,ldc,vdc,
cat = np.zeros((60000,500,5)) #'earnings', 'econoday', 'futures', 'split', 'dividend', 'confcall',
time = np.zeros((60000,500,2)) #create on the fly
targets = np.zeros((60000,3)) #targets,targets1,targets5

for repeat in range(2):

snum = 0
for symbol in stocks:
    if symbol in found:
        continue
    
    df = load("dataframes2/"+symbol+"df")
    if len(df) < 1000:
        found.add(symbol)
        continue      
          

    
    arr3 = np.array(df['targets'])[200:-500]
    indexes = np.random.choice(np.arange(200,arr3.shape[0]-500), 60, replace=False)
        
    for z in indexes:
        targets[total,0] = np.array(df['targets'])[z+700]
        targets[total,1] = np.array(df['targets1'])[z+700]
        targets[total,2] = np.array(df['targets5'])[z+700]

        pa[total,:,0] = np.array(df['closepct'])[z+200:z+700]
        pa[total,:,1] = np.array(df['highdiffclose'])[z+200:z+700]     
        pa[total,:,2] = np.array(df['lowdiffclose'])[z+200:z+700]
        pa[total,:,3] = np.array(df['vwapdiffclose'])[z+200:z+700]           
        cat[total,:,0] = np.array(df['earnings'])[z+200:z+700]
        cat[total,:,1] = np.array(df['econoday'])[z+200:z+700]     
        cat[total,:,2] = np.array(df['futures'])[z+200:z+700]
        cat[total,:,3] = np.array(df['split'])[z+200:z+700]   
        cat[total,:,4] = np.array(df['confcall'])[z+200:z+700]     
   
        hours = list(df['hour'])[z+200:z+700]
        minutes = list(df['minute'])[z+200:z+700]
        for indx in range(len(hours)):
            hour = hours[indx]
            minute = minutes[indx]
            time[total,indx,0] = hour
            time[total,indx,1] = minute
        
    jj += 1
    if snum == 1000:
        dump(pa,"datasets/pa"+str(repeat))
        dump(cat,"datasets/cat"+str(repeat))
        dump(time,"datasets/time"+str(repeat))
        dump(targets,"datasets/target"+str(repeat))
        found.add(symbol)    
    
   

In [None]:
#Now we can finally get to deep learning.  I used google cloud for training this network, so some of the code may be
#specific to using google cloud.
import tensorflow as tf
import numpy as np
import pandas as pd
from joblib import load,dump
#!pip3 install git+git://github.com/keras-team/keras.git --upgrade --no-deps
from keras.layers import Input, Dense, Concatenate, Dropout, BatchNormalization, LSTM, Bidirectional, Flatten, Permute, RepeatVector, Multiply, Lambda, Reshape, Cropping1D, Cropping2D, Conv1D, Layer
from keras.models import Model
from keras import optimizers
from keras import regularizers
from keras.initializers import Ones
from keras import backend as K
targets = load('target2') #targets,targets1,targets5
targets = targets[:,0]
targets = targets.reshape((-1,1))
pa = load('pa2') #closepct,hdc,ldc,vdc,
cat = load('cat2') #'earnings', 'econoday', 'futures', 'split', 'dividend', 'confcall',
times = load('time2') #create on the fly


#It may seem strange, but I trim my data to 59968 samples.  I did this in an earlier iteration
#of the notebook, because I was experimenting with wackier model designs,
#and the designs I created were breaking Keras in a way where it couldn't handle
#the inputs if they were not an exact multiple of my batch size.
time = np.zeros((59968,500,14))
#At this point I made my time data sequence a one-hot encoded sequence.  It seems I never actually used an embedding layer for
#this, and that's something I may want to try in the future
for indx in range(59968):
  for subindex in range(500):
    hour = int(times[indx,subindex,0])
    minute = int(times[indx,subindex,1])
    if hour < 12:
        time[indx,subindex,0] = 1
    else:
        hour -= 12
        time[indx,subindex,hour] = 1
        time[indx,subindex,12] = 1
    if minute == 30:
        time[indx,subindex,13] = 1

del times
pa = pa[:59968]
time = time[:59968]
cat = cat[:59968]
labels = labels[:59968]

labels = np.zeros((59968,1)) 
for i in range(59968): #I will use binary crossentropy because the model is more likely to learn if the labels are simple
  if targets[i,0] > 0:
    labels[i,0] = 1


In [None]:
memone = np.ones((59968,1))
pa_input = Input(shape=(500,4))
cat_input = Input(shape=(500,5))
time_input = Input(shape=(500,14))
mem_input = Input(shape=(1,))

cat_encoding = LSTM(4, activation='relu', return_sequences=True, dropout=0.5)(cat_input)
time_encoding = LSTM(4, activation='relu', return_sequences=True, dropout=0.5)(time_input)
mainseq = Concatenate()([pa_input, cat_encoding, time_encoding])
activations = Bidirectional(LSTM(16, activation='relu', return_sequences=True))(mainseq)
attention = Dense(1,activation='tanh')(activations)
attention = Flatten()(attention)
attention = Dense(500, activation='softmax')(attention)
attention = RepeatVector(32)(attention)
attention = Permute([2, 1])(attention)
result = Multiply()([activations, attention])
result = Flatten()(result)
result = Dense(256,activation='relu')(result)
result = BatchNormalization()(result)

last5 = Cropping1D((495,0))(mainseq)
last5 = Bidirectional(LSTM(16,activation='relu'))(last5)
total = Concatenate()([result,last5])
total = Dense(64, activation='relu')(total)
context = BatchNormalization()(total)
output = Dense(1, activation='sigmoid')(total)
  
model = Model(inputs=[pa_input,cat_input,time_input,mem_input], outputs=output)

sgd = optimizers.SGD(lr=0.02, decay=1e-7, momentum=0.9, nesterov=True)

model.compile(optimizer=sgd,
              loss='binary_crossentropy',
              metrics=['accuracy'])    
    
    

In [None]:
model.fit([pa,cat,time,memone],labels,epochs=25,batch_size=64)

#Actual best result on Google Cloud was around 52.5% for both training and validation.  Sounds bad but I think
#It's actually pretty decent