In [1]:
# Test of Git. Commit 10

# Instructions
# Download the Jupyter notebook containing the code: LeopardA.ipynb
# and open it (on Colorado) in VS Code.
# Create a python environment (in Anaconda), or use the existing environment on Anaconda on Colorado named: envLeopard
# Install the four packages listed at the start of the file into that environment:
# 	# pip install yfinance
# 	# pip install tensorflow
# 	# pip install scikit-learn
# 	# pip install matplotlib
#   # pip install google.auth
#   # pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

# Copy the model file: modelLeopardA.keras
# and the source data file: LeopardA.csv
# into a local directory, if necessary updating the corresponding directories in the code to point to them.
# Scroll down to the bottom of the code, and update variable iRunMain = 0
# Run the notebook by clicking "Run All" at the top of VS Code.
# Next, change iRunMain to equal 1, 
# Next run the second from last field by clicking debug cell, then place a red breakpoint marker just above: print(oPredicted)
# Then run debug cell on the last field, to start the program running properly.

# It will take a minute or so to complete, running the existing model to generate predictions on the 200 Test records.

# Once it hits the breakpoint, we can take a look at the running variables.

# The question is: Can the code detect the pattern that I have put in the source data?

# First look at array: nbxTest4D
# Within this, go to record [14], and look at the 20 items within this. The lowest item ([19]) = -0.00051344
# and the other items at lower numbers (corresponding to data further to the left) are all less than 0.001

# Next to to record [15], and look at the 20 items. Here item [19] = -0.019608
# which is a decline of 2% in the way I've done it.
# This is the pattern that should be detected, which is that every time you get a decline of 2%, there should be a corresponding increasse of 10% seven time periods later.

# So is this predicted?

# To see whether this is the case, go to array: nPredictedFinal
# and go to record [14]. Look at the 20 records within it, and note that the values are all small (<0.01).
# Now go to record [15], and look at the 20 records there. There we can see that up to record [5] the values are small. But from [6] onwards it is 0.0984 - i.e. around 0.1 or 10%. So the prediction has worked.

In [2]:

# LeopardF sample data documentation
# **********************************
# 
# So for price, it is 29-30	(7 forwards)
# For Social1, it is 38-39	(10 forwards)
# For Social2 it is 02-03		(15 forwards)
# 
# Source file: LeopardF_100000_sample_data.csv
# contains sample data with patterns I have created manually, for 10 stocks each with 10,000 Runs. So 100,000 rows in total, plus the header row.
# It contains the following columns:
# 
# viShareId			Integer ID from 10 to 19
# viRunId				Inter runID from 1 to 10000
# vsTicker			S1 to S9
# vdtDateTime			datetime
# vfPrice				price as float. Note this is only used as a label in the script (no calculation is done on it).
# vfPriceChange		Ratio of the previous price to the next, minus one. This has been calculated in the source data. So values are like 0.0025 or 0.1 or -0.004
# vfSocial1Change		Social ratio 1
# vfSocial2Change		Social ratio 2
# 
# This is loaded into a pandas array, and the 6 numerical ones (all but vsTicker and vdtDateTime) are loaded into a numpy array: nAAll2D
# 
# This is split into training and test sets with 9800 and 200 records each.
# X and Y datasets are created from these, where each row in X is made up of not just the current record, but the previous 20 records, and each row in Y is the cumulative total of each of the following 20 records.
# Each training row is therefore training on the effect on the share price at every point from 1 to 20 in the future.
# So the X train dataset nBXtrain3D has shape (98800, 20, 3)	(with the 3 being price change and social 1 and social 2), and the Y train dataset nBYtrain3D has shape (98800, 20, 1) - just the cumulative pricechange.
# Note that in both X and Y datasets, the numbering goes from earliest to latest. So X[19] is closest to the X value (looking backwards), and Y[0] is closest to the Y value (looking forwards).
# 
# 
# Once the model is trained on the training data, the predictions are run on the test data.
# The test X dataset nBXAlltest4D has shape (10,160,20,6). Note this is 160 not 200 because we need to go 20 forward and 20 back from each data point to get all the data we need - so 40 rows have to be rejected as primary test records.
# The 6 records are: stock, run, price, pricechange, social1, social2
# 
# 
# When the predictions are run, these predictions are saved into a copy of the test X dataset nBXAlltest4D, named nPredictions4D. This has the predicted value saved in the 7th field, and the actual cumulative value (from Y) in the 8th field.
# This is a slight conceptual bodge because it implies in error there is a connection between the pricechange and socials values in X, and the predicted values and actual values in Y.
# 
# I am defining the four dimensions as being:
# 	Stock	(0-9)
# 	Run		(0-199)
# 	Spread	(0-19)
# 	Asset	(0-7)
# 
# nPredictions4D[StockIndex(0-9), RunIndex(0-199), twenty spread (0-19), asset]
# where the above values are:
# 
# 	0	viShareId			Integer ID from 10 to 19
# 	1	viRunId				Inter runID from 1 to 10000
# 	2	vfPrice				price as float. Note this is only used as a label in the script (no calculation is done on it).
# 	3	vfPriceChange		Ratio of the previous price to the next, minus one. An X value. This has been calculated in the source data. So values are like 0.0025 or 0.1 or -0.004
# 	4	vfSocial1Change		Social ratio 1. An X value.
# 	5	vfSocial2Change		Social ratio 2. An X value.
# 	6	Prediction
# 	7	Actual Y
# 
# Note that StockIndex and RunIndex are not the same as StockId and RunId. This is because you can have gaps in the StockId and RunId (due to missing data etc) but you cannot have gaps in StockIndex and RunIndex - which must start at 0 and rise without gaps.
# 
# This is all best explained with examples.
# 
# 
# Price spike
# ***********
# Open the above csv file, and scroll down to line 29852.
# This has data for the third stock (stockID=12 referenced as 2), for run 29852. And we can see here that pricechange is 0.02 - which is much higher than the typical random values of circa 0.001 etc.
# Consistent with the pattern in this data, 7 lines later at run 29859 the pricechange value spikes to 0.1
# We want therefore to see that the model has predicted this rise, and how this prediction is stored in nPredictions4D along with the actual Y value.
# 
# Looking at nPredictions4D (which has shape (10,200,20,6)), we want to look at the first record where the 0.02 change first appears as an X value
# So take line 29852, subtract 1 for the header row to give 29851, then subtract 20000 to give 9851 to have just the rows for this stock
# , and then subtract 9800 to remove the training data (leaving only the 200 for the test set) gives 51
# And then subtract 20, to get to 31 to reflect the fact that the data has to start (and end) 20 items in to the data, in order to ensure there are always 20 runs prior for X, and following for Y.
# So we want to find the 31st record in nPredictions4D. Which is referenced as (30) as we start from 0.
# And as the 0.02 record appears for the first time, it will be present at the most recent point in the 20, which is referenced as 19.
# 
# So let's run:
#   fPrint(nPredictions4D[2, 30, 19, 3])	#0.02
# to verify that this is indeed the 0.02 value that triggers the detection of the price change. It is.
# 
# (note that I use function fPrint, that I have defined earlier in the script, to do the printing, in order to stop the use of exponents E01 etc in the output). So: fPrint(nPredictions4D[2, 30, 6, :])
# 
# Next move 7 runs forward, to verify that the latest pricechange is indeed 0.1:
#   fPrint(nPredictions4D[2, 37, 19, 3])	#0.1
# 
# Next go back to nPredictions4D[2,30,19,3], and now look at the actual value (Y value) of the price for 6 and 7 spreads into the future (these are spreadindexes 5 and 6), and verify that the second is around 0.1 higher than the first.
#   fPrint(nPredictions4D[2,30,5,7])	#0.0004791183753658501
#   fPrint(nPredictions4D[2,30,6,7])	#0.10052703021290244
# These output:
# 	0.0004791183753658501
# 	0.10052703021290244
# So the difference between these (remember they are cumulative) is around 0.1
# 
# Next, is this change predicted? To see this, run on index 6 (predicted values) rather than 7 (actual Y values):
#   fPrint(nPredictions4D[2,30,5,6])	#0.0011756770545616746
#   fPrint(nPredictions4D[2,30,6,6])	#0.09948894381523132
# so yes it is.
# 
# Finally let's go back one run to before the 0.02 rise, and verify that the rise is NOT predicted.
# (note we have to increment the spread by one)
#   fPrint(nPredictions4D[2,29,6,6])	#0.0052693127654492855
#   fPrint(nPredictions4D[2,29,7,6])	#0.008663389831781387
# 
# and that of course the Y value do still have the rise:
#   fPrint(nPredictions4D[2,29,6,7])	#0.0204887007428729
#   fPrint(nPredictions4D[2,29,7,7])	#0.12253757081716032
# 
# 
# Social1 spike
# *************
# The same principle applies with the social spike 1 a little later than the price spike example)
# So in the above CSV file go to line 29861
# Here we can see that vfSocial1Change is shown as: -0.019608000000000
# (which is higher than the typical values in this column).
# Then go forward 10 lines to 29871 and see that there is a price spike of -0.090909090909091
# 
# So to see the first social change input X values, let's run the same first query, but on run 39 instead of 30, and :
# 	fPrint(nPredictions4D[2, 39, 19, 4])	#-0.019608
# Then 10 lines further down, look at the price change:
# 	fPrint(nPredictions4D[2, 49, 19, 3])  #-0.090909090909091
# 
# Now go back to fPrint(nPredictions4D[2, 39, 19, 4]), and now look at the actual value (Y value) of the price for 9 and 10 spreads into the future (these are spreadindex values 8 and 9), and verify that the second is around 0.1 lower than the first.
#   fPrint(nPredictions4D[2, 39, 8, 7])   #0.021082481702138667
#   fPrint(nPredictions4D[2, 39, 9, 7])   #-0.07174319845260135
# So yes the change is here, with the difference being around -0.1 as they are cumulative.
# 
# Next, is this change predicted? To see this, run the above on index 6 (predicted values) rather than 7 (actual Y values):
#   fPrint(nPredictions4D[2, 39, 8, 6])   #0.010913881473243237
#   fPrint(nPredictions4D[2, 39, 9, 6])   #-0.07884889841079712
# so yes it is (remember we are interested in the difference here, as the values are cumulative).
# 
# Finally let's go back one run to before the social change, and verify that the rise is NOT predicted.
# (note we have to increment the spread by one)
#   fPrint(nPredictions4D[2, 38, 9, 6])   #0.014400970190763474
#   fPrint(nPredictions4D[2, 38, 10, 6])  #0.016602344810962677
# and indeed it is not. Note that it is the -difference- between the above two we are interested in here.
# 
# and that of course the Y value from that Run (38) does still have the 0.1 drop:
#   fPrint(nPredictions4D[2, 38, 9, 7])   #0.02024992907694667
#   fPrint(nPredictions4D[2, 38, 10, 7])  #-0.07250006447550317
# 
# 
# Social2 spike
# *************
# Social spike 2 is seen in the CSV file on line 29825
# where we can see that vfSocial1Change is shown as:	-0.019608000000000
# Then go forward 15 lines to line 29840, and see that there is a price spike of: -0.090909090909091
# 
# So to see the first social change on line 29825, run the following to see the input X:
#   fPrint(nPredictions4D[2, 3, 19, 5])	#-0.019608
#   fPrint(nPredictions4D[2, 18, 19, 3])	#-0.090909090909091
# 
# Now go back to fPrint(nPredictions4D[2, 3, 19, 5]), and now look at the actual value (Y value) of the price for 14 and 15 spreads into the future (these are spreadindex values 13 and 14), and verify that the second is around 0.1 lower than the first.:
#   fPrint(nPredictions4D[2, 3, 13, 7])   #-0.0005855898365195733
#   fPrint(nPredictions4D[2, 3, 14, 7])   #-0.09144144530592702
# 
# Next, is this change predicted? To see this, run on index 6 (predicted values) rather than 7 (actual Y values):
#   fPrint(nPredictions4D[2, 3, 13, 6])   #0.030499937012791634
#   fPrint(nPredictions4D[2, 3, 14, 6])   #-0.060842156410217285
# so yes it is (remember we are interested in the difference here, as the values are cumulative).
# 
# Finally let's go back one run to before the social change, and verify that the rise is NOT predicted.
# (note we have to increment the spread by one)
#   fPrint(nPredictions4D[2, 2, 14, 6])   #0.018615230917930603
#   fPrint(nPredictions4D[2, 2, 15, 6])   #0.018593236804008484
# and indeed it is not. Note that it is the -difference- between the above two we are interested in here.
# 
# and that of course the Y value from that Run (2) does still have the 0.1 drop:
#   fPrint(nPredictions4D[2, 2, 14, 7])   #-0.0008223257331689826
#   fPrint(nPredictions4D[2, 2, 15, 7])   #-0.09165665975742654

In [3]:
# %pip install yfinance
# %pip install tensorflow
# %pip install scikit-learn
# %pip install matplotlib
# %pip install pyodbc
# %pip install google.auth
# %pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

import pandas as pd
import numpy as np
import yfinance as yf
import pyodbc, struct
import datetime
import tensorflow as tf
from sklearn.metrics import mean_absolute_percentage_error # max_error #mean_absolute_percent_error
import matplotlib.pyplot as plt
import keras.models
import keras.layers
import keras.layers
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers import RepeatVector
from keras.layers import Dropout


def fBuildModel(n_outputs, n_timesteps, n_features):
    model = Sequential()
    # input layer: this is taking our raw data and convoluting it, essentially its picking up salient features.
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features), padding='same')) 
    # this is our first hidden layer, its taking in the ouput of our input layer and finding subtle patterns in our features
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'))  
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(RepeatVector(n_outputs))
    # time series magic occurs
    model.add(LSTM(200, activation='relu', input_shape=(n_timesteps, n_features), return_sequences=True))
    model.add(LSTM(200, activation='relu', input_shape=(n_timesteps, n_features), return_sequences=True))
    model.add(TimeDistributed(Dense(100, activation='relu'))) # gonna change this from relu to softmax
    model.add(TimeDistributed(Dense(1)))
    # compile the model
    model.compile(optimizer='adam', loss='mse')
    return model


def fPrint(nInputArray):
    sInputArrayType = str(type(nInputArray))
    # print(type(nInputArray))
    if sInputArrayType == "<class 'numpy.ndarray'>":
        sStringArrayToPrint = np.array_str(nInputArray, precision=8, suppress_small=True)
    else:
        sStringArrayToPrint = str(nInputArray)
    print(sStringArrayToPrint)


In [4]:
def fGetSpread(nSpreadX3D, iDepthX, iDepthY) -> tuple[np.array, np.array, np.array, np.array]:
  X, y, listX3D, listY3D, listX4D, listY4D = [], [], [], [], [], []

  iNumStocksLocal = nSpreadX3D.shape[0]
  iNumRecordsPerStockLocal = nSpreadX3D.shape[1]

  # iLenX = nSpreadX3D.shape[0] #len(nSpreadX3D)

  iStockIndex = 0
  iRecord = 0
  
  for _ in range(iNumStocksLocal):
    for _ in range(iNumRecordsPerStockLocal):
      if iRecord >= iDepthX+1 and iRecord <= iNumRecordsPerStockLocal - iDepthY:
        listRowX = nSpreadX3D[iStockIndex,iRecord-iDepthX:iRecord,:]
        listX3D.append(listRowX)
        listRowY = nSpreadX3D[iStockIndex,iRecord:iRecord+iDepthY,:]
        listY3D.append(listRowY)
      iRecord += 1
    listX4D.append(listX3D)
    listY4D.append(listY3D)
    listX3D = []
    listY3D = []
    iRecord = 0
    iStockIndex += 1

  nXAll4D = np.array(listX4D) # contains share price, PriceChange, and social media
  nXPriceChanges3D = nXAll4D[:, :, :, 3] # contains the PriceChange
  nXPriceChanges4D = nXPriceChanges3D.reshape((nXPriceChanges3D.shape[0], nXPriceChanges3D.shape[1], nXPriceChanges3D.shape[2], 1))
  nXSocialChanges3D = nXAll4D[:, :, :, 4:6] # contains the SocialChange
  nXSocialChanges4D = nXSocialChanges3D.reshape((nXSocialChanges3D.shape[0], nXSocialChanges3D.shape[1], nXSocialChanges3D.shape[2], 2))

  nX4D = np.concatenate((nXPriceChanges4D, nXSocialChanges4D), axis=-1)
  nX3D = nX4D.reshape(nX4D.shape[0]*nX4D.shape[1], nX4D.shape[2], nX4D.shape[3])

  nYAll4Draw = np.array(listY4D)

  nYStocks3D = nYAll4Draw[:, :, :, 0]
  nYRuns3D = nYAll4Draw[:, :, :, 1]
  nYPrices3D = nYAll4Draw[:, :, :, 2]
  nYPriceChanges3D = nYAll4Draw[:, :, :, 3]
  nYPriceRatios3D = nYPriceChanges3D.copy()
  nYPriceRatios3D[:, :] += 1

  nYCumulativeByRatios3D = np.cumprod(nYPriceRatios3D, axis=2) 

  nYCumulative3D = nYCumulativeByRatios3D.copy()
  nYCumulative3D[:, :] -= 1
  
  nYStocks4D = nYStocks3D[..., np.newaxis]
  nYRuns4D = nYRuns3D[..., np.newaxis]
  nYPrices4D = nYPrices3D[..., np.newaxis]
  nYPriceChanges4D = nYPriceChanges3D[..., np.newaxis]
  nYCumulative4D = nYCumulative3D[..., np.newaxis]

  nYAll4D = np.concatenate((nYStocks4D, nYRuns4D, nYPrices4D, nYPriceChanges4D, nYCumulative4D), axis=-1)
  nY4D = nYCumulative4D
  nY3D = nY4D.reshape(nY4D.shape[0]*nY4D.shape[1], nY4D.shape[2], nY4D.shape[3])

  return nXAll4D, nX4D, nX3D, nYAll4D, nY4D, nY3D


In [5]:
def fMain(iAction, sIsAzureYN, sModelFileName
          , sSourceFileName, sPredictionsFileName, sTradesFileName, fPredictTarget
          , iEpochs, iNumTestSplitRecordsPerStock, iDepthX, iDepthY):
  # Step 1: Create the data
  # iLenATest = 200
  # iDepthX=20
  # iDepthY=20
  sSourceFileFullPath = ""

  if sIsAzureYN == "Y":
    sSourceFileFullPath = "files/repoLeopard/sourcefiles/" + sSourceFileName
    sModelFileFullPath = "files/repoLeopard/modelfiles/" + sModelFileName
    sPredictionsFileFullPath = "files/repoLeopard/predictionsfiles/" + sPredictionsFileName
    sTradesFileFullPath = "files/repoLeopard/tradesfiles/" + sTradesFileName

  if sIsAzureYN == "N":
    sSourceFileFullPath = "C:\\FILES\\IB_API_Dev\\Tiger\\repoLeopard\\sourcefiles\\" + sSourceFileName
    sModelFileFullPath = "C:\\FILES\\IB_API_Dev\\Tiger\\repoLeopard\\modelfiles\\" + sModelFileName
    sPredictionsFileFullPath = "C:\\FILES\\IB_API_Dev\\Tiger\\repoLeopard\\Predictionsfiles\\" + sPredictionsFileName
    sTradesFileFullPath = "C:\\FILES\\IB_API_Dev\\Tiger\\repoLeopard\\Tradesfiles\\" + sTradesFileName

  dfSourceFile = pd.read_csv(sSourceFileFullPath)
  dfSourceFileNumbersOnly = dfSourceFile.filter(['viShareId','viRunId','vfPrice','vfPriceChange','vfSocial1Change','vfSocial2Change'], axis=1)
  
  nAAll2D = dfSourceFileNumbersOnly.to_numpy()

  # nAAll2Db = []
  # for element in nAAll2Da:
  #   print(element[2])
  #   nAAll2Db.append(element)
  # 
  # nAAll2D = np.array(nAAll2Db)

  # nAAll2D = nAAll2Da[0:1,:]
  iNumRecords = nAAll2D.shape[0]
  oDiscard, oNumStocks = np.unique(nAAll2D[:, 0], return_counts=True)
  iNumStocks = oNumStocks.shape[0]
  iNumRecordsPerStock = iNumRecords // iNumStocks


  nAAll3D = nAAll2D.reshape(iNumStocks, iNumRecordsPerStock, 6)  # was 5
  nA3D = nAAll3D[:,:,2:6] # was 5
  
  iLenAAll = iNumRecordsPerStock # nAAll2D.shape[0] #len(nAAll) #= 100,000
  iLenATrain = iLenAAll - iNumTestSplitRecordsPerStock # = 10,000 - 200 = 9,800

  # nATrain3D = nA3D[:,0:iLenATrain,:]  
  # nATest3D = nA3D[:,iLenATrain:,:]   

  nATrain3D = nAAll3D[:,0:iLenATrain,:]  
  nATest3D = nAAll3D[:,iLenATrain:,:]   

  tupleTrainSpread = fGetSpread(nATrain3D, iDepthX, iDepthY)

  nBXAlltrain4D    = tupleTrainSpread[0]
  # nBXtrain4D       = tupleTrainSpread[1]
  nBXtrain3D       = tupleTrainSpread[2]
  nBYAlltrain4D    = tupleTrainSpread[3]
  # nBYtrain4D       = tupleTrainSpread[4]
  nBYtrain3D       = tupleTrainSpread[5]

  tupleTestSpread = fGetSpread(nATest3D, iDepthX, iDepthY)

  nBXAlltest4D     = tupleTestSpread[0]
  # nBXtest4D        = tupleTestSpread[1]
  nBXtest3D        = tupleTestSpread[2]
  nBYAlltest4D     = tupleTestSpread[3]
  # nBYtest4D        = tupleTestSpread[4]
  nBYtest3D        = tupleTestSpread[5]

  # Design the model by running the following function:
  oModel = fBuildModel(iDepthX, iDepthX, 3)  #n_outputs, n_timesteps, n_features   #was 2 features

  # Train the model by providing the X values (99,800 x 20),
  # and the Z values (which are the cumulative Y values):
  if iAction == 1:
    oModelHistory = oModel.fit(nBXtrain3D, nBYtrain3D, epochs=iEpochs, verbose=1, validation_data=(nBXtest3D, nBYtest3D))
    oModel.save(sModelFileFullPath)
    history_dict = oModelHistory.history
    loss_values = history_dict['loss']
    val_loss_values = history_dict['val_loss']
    epochs = range(1, len(loss_values) + 1)
    plt.plot(epochs, loss_values, 'bo', label='Training loss')
    plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()


  oModelLoaded = keras.models.load_model(sModelFileFullPath)

  if iAction == 33: #Not used now
  # Generate the predictions from the model on the test dataset of 200 x 20 records,
  # by iterating though each of the 200, loading the output into oPredicted, and then reshaped into nPredictedFinal
    oPredicted = []
    # nBXtest3Dold = nBXtest4D[2,:,:,:]
    nBXtesting3D = nBXAlltest4D[2,:,:,3:6]

    for i in range(nBXtesting3D.shape[0]):
      print("Row: " , str(i))
      oRow2D = nBXtesting3D[i]
      oRowReshaped3D = oRow2D.reshape(1,nBXtesting3D.shape[1],3)  #1,5,1  # was 2

      oRowPredicted = oModelLoaded.predict(oRowReshaped3D)
      oPredicted.append(oRowPredicted)

    nPredicted = np.array(oPredicted)
    nPredictedFinal = nPredicted.reshape(nPredicted.shape[0],nPredicted.shape[2])


  # Generate the predictions from the test part of the source file, and save them into 
  # value 6 in nPredictions4D (which is a copy of nBXAlltest4D), with value 7 having the Y actual values
  if iAction == 2:   
    print("ModelFit3")
    nPredictions4Draw = nBXAlltest4D.copy()
    nZeros = np.zeros(shape=(nBXAlltest4D.shape[0],nBXAlltest4D.shape[1],nBXAlltest4D.shape[2],2))
    nPredictions4D = np.append(nPredictions4Draw, nZeros, axis=3)
    
    for iStockIndex in range(nBXAlltest4D.shape[0]):
      nBXAlltestOneStock3D = nBXAlltest4D[iStockIndex,:,:,:]
      print("Row: " , str(iStockIndex))
      for iRunIndex in range(nBXAlltestOneStock3D.shape[0]):
        nBXAlltestOneStockOneRunRecord2D = nBXAlltestOneStock3D[iRunIndex,:,:]
        nPredictInput2D = nBXAlltestOneStockOneRunRecord2D[:,3:6]
        nPredictInput3D = nPredictInput2D.reshape(1,nPredictInput2D.shape[0],3)
        nPredictTarget = nBYAlltest4D[iStockIndex,iRunIndex,:,4]
        nPredictions4D[iStockIndex,iRunIndex,:,7] = nPredictTarget
        oRowPredictedRaw = oModelLoaded.predict(nPredictInput3D)
        oRowPredicted = oRowPredictedRaw.reshape(oRowPredictedRaw.shape[1])
        nPredictions4D[iStockIndex,iRunIndex,:,6] = oRowPredicted
        print(  "Row: " , str(iStockIndex)
              , " Run: ", str(iRunIndex)
              , " Prediction: ", str(nPredictions4D[iStockIndex,iRunIndex,6,6])
              , " Target: ", str(nPredictions4D[iStockIndex,iRunIndex,6,7])
              )
        #if iStockIndex == 2 and iRunIndex == 30:
        #  oFred2 = nPredictions4D[iStockIndex,iRunIndex,:,:]
        #  oFred = nPredictInput3D

    np.save(sPredictionsFileFullPath, nPredictions4D)

  nPredictions4D = np.load(sPredictionsFileFullPath)

  # fPrint(nPredictions4D[2, 30, 6, :])
  # fPrint(nPredictions4D[2, 30, 19, 3])  #0.02
  # fPrint(nPredictions4D[2, 37, 19, 3])  #0.1

  # fPrint(nPredictions4D[2,30,5,7])  #0.0004791183753658501
  # fPrint(nPredictions4D[2,30,6,7])  #0.10052703021290244

  # fPrint(nPredictions4D[2,30,5,6])  #0.0011756770545616746
  # fPrint(nPredictions4D[2,30,6,6])  #0.09948894381523132

  # fPrint(nPredictions4D[2,29,6,6])	#0.0052693127654492855
  # fPrint(nPredictions4D[2,29,7,6])	#0.008663389831781387

  # fPrint(nPredictions4D[2,29,6,7])	#0.0204887007428729
  # fPrint(nPredictions4D[2,29,7,7])	#0.12253757081716032

  # print("Social1 spike:")

  # fPrint(nPredictions4D[2, 39, 19, 4])	#-0.019608
  # fPrint(nPredictions4D[2, 49, 19, 3])  #-0.090909090909091
  
  # fPrint(nPredictions4D[2, 39, 8, 7])   #0.021082481702138667
  # fPrint(nPredictions4D[2, 39, 9, 7])   #-0.07174319845260135

  # fPrint(nPredictions4D[2, 39, 8, 6])   #0.010913881473243237
  # fPrint(nPredictions4D[2, 39, 9, 6])   #-0.07884889841079712

  # fPrint(nPredictions4D[2, 38, 9, 6])   #0.014400970190763474
  # fPrint(nPredictions4D[2, 38, 10, 6])  #0.016602344810962677

  # fPrint(nPredictions4D[2, 38, 9, 7])   #0.02024992907694667
  # fPrint(nPredictions4D[2, 38, 10, 7])  #-0.07250006447550317

  # print("Social2 spike:")

  # fPrint(nPredictions4D[2, 3, 19, 5])	  #-0.019608
  # fPrint(nPredictions4D[2, 18, 19, 3])	#-0.090909090909091

  # fPrint(nPredictions4D[2, 3, 13, 7])   #-0.0005855898365195733
  # fPrint(nPredictions4D[2, 3, 14, 7])   #-0.09144144530592702

  # fPrint(nPredictions4D[2, 3, 13, 6])   #0.030499937012791634
  # fPrint(nPredictions4D[2, 3, 14, 6])   #-0.060842156410217285

  # fPrint(nPredictions4D[2, 2, 14, 6])   #0.018615230917930603
  # fPrint(nPredictions4D[2, 2, 15, 6])   #0.018593236804008484

  # fPrint(nPredictions4D[2, 2, 14, 7])   #-0.0008223257331689826
  # fPrint(nPredictions4D[2, 2, 15, 7])   #-0.09165665975742654


  fProfit = float(0)  # set the datatype to float
  fProfitAbs = float(0)
  iTrades = 0
  # Generate the trades that would occur if the trade is done where
  # the prediction is >= the value in fPredictTarget below
  if iAction == 3:
    print("Generating Trades..")
  
    iStockIndex = 0
    iRunIndex = 0
    iSpreadIndex = 0
  
    iStockIndexMax = nPredictions4D.shape[0]
    iRunIndexMax = 0
    iSpreadIndexMax = 0
    oTrade = []
    oTrades = []

    while (iStockIndex <= iStockIndexMax-1):
      print("Processing StockI: ")
      print(iStockIndex)
      nPredictionsStock3D = nPredictions4D[iStockIndex,:,:,:]
      iRunIndex = 0
      iRunIndexMax = nPredictionsStock3D.shape[0]
      while (iRunIndex <= iRunIndexMax-1):
        nPredictionsStockRun2D = nPredictionsStock3D[iRunIndex,:,:]
        iSpreadIndex = 0
        iSpreadIndexMax = nPredictionsStockRun2D.shape[0]
        while (iSpreadIndex <= iSpreadIndexMax-1):
          nPredictionsStockRunSpread1D = nPredictionsStockRun2D[iSpreadIndex,:]
          fPrediction = nPredictionsStockRunSpread1D[6]
          fActual = nPredictionsStockRunSpread1D[7]
          if fPrediction >= fPredictTarget:
            iTrades = iTrades + 1
            fProfit = fProfit + fActual
            fProfitAbs = fProfitAbs + abs(fActual)
            print("Stock: ", str(iStockIndex), " Run: "
                  , iRunIndex, " Spread: ", iSpreadIndex
                  , " fPrediction: ", fPrediction, " fActual: ", fActual)
            oTrade = [iStockIndex, iRunIndex, iSpreadIndex, -1, -1, -1, fPrediction, fActual]
            oTrades.append(oTrade)
            iRunIndex = iRunIndex + iSpreadIndex #move forward to next trade potential
            iSpreadIndex = iSpreadIndexMax # break out of loop
          iSpreadIndex = iSpreadIndex + 1
        iRunIndex = iRunIndex + 1
      iStockIndex = iStockIndex + 1

    nTrades2D = np.array(oTrades)
    np.save(sTradesFileFullPath, nTrades2D)

      # for iStockIndex in range(nPredictions4D.shape[0]):
      #   nPredictionsStock3D = nPredictions4D[iStockIndex,:,:,:]
      #   for iRunIndex in range(nPredictionsStock3D.shape[0]):
      #     nPredictionsStockRun2D = nPredictionsStock3D[iRunIndex,:,:]
      #     for iSpreadIndex in range(nPredictionsStockRun2D.shape[0]):
      #       nPredictionsStockRunSpread1D = nPredictionsStockRun2D[iSpreadIndex,:]
      #       fPrediction = nPredictionsStockRunSpread1D[6]
      #       fActual = nPredictionsStockRunSpread1D[7]
      #       if iSpreadIndex == 1:
      #         iSpreadIndex = 7
      #       if fPrediction >= 2:
      #         iTrades = iTrades + 1
      #         fProfit = fProfit + fActual
      #         fProfitAbs = fProfitAbs + abs(fActual)
      #         # print("gt 0.5b")
      #
  # End of if iAction == 3:
  

  nPredictionsOverTarget3 = np.where(nPredictions4D[:,:,:,6] >= fPredictTarget)
  nPredictionsOverTarget2 = np.array(nPredictionsOverTarget3)
  nPredictionsOverTarget = np.transpose(nPredictionsOverTarget2)
  print("fPredictTarget: ", fPredictTarget, " nPredictionsOverTarget: ", nPredictionsOverTarget.shape[0])

  nTrades2D = np.load(sTradesFileFullPath)

  for nTrades1D in nTrades2D:
    fPrediction = nTrades1D[6]
    fProfit = fProfit + nTrades1D[7]
    fProfitAbs = fProfitAbs + abs(nTrades1D[7])
    print(iTrades, fPrediction, nTrades1D[7])
    iTrades += 1

  print("end")
  print("end")



In [7]:
iRunMain = 1

if iRunMain == 1:
  output = fMain(
        iAction=33  # 0 to read the files only, 1 to train the model
          #, 2 to run the predictions, 3 to generate the trades to be run
          #, 33 to generate oPredictionsFinal, a numpy array of the predictions
      , sIsAzureYN = "N"    # Y for Azure, N for Windows
      , sModelFileName = "modelLeopardF.keras"
      , sSourceFileName = "LeopardF.csv"
      , sPredictionsFileName = "PredictionsF.npy"
      , sTradesFileName = "TradesF.npy"
      , fPredictTarget = 0
      , iEpochs = 5
      , iNumTestSplitRecordsPerStock = 200
      , iDepthX = 20
      , iDepthY = 20
    )

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)


Row:  0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Row:  1
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
Row:  2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
Row:  3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
Row:  4
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
Row:  5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Row:  6
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Row:  7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Row:  8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Row:  9
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Row:  10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Row:  11
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Row:  12
[1m1/1[0m [32m━━━━━━━━━━━━━━