In [1]:
import datetime
import Model
import numpy as np
import pandas as pd
import math
from multiprocessing import Pool
import os
import warnings
import time
import matplotlib.pyplot as plt

warnings.simplefilter('ignore', FutureWarning)

# positive numbers, start should be lower than end
# a rebalance index represents the month before the rebalancing takes place
# so returns are calculated starting at rebalanceIndex + 1
def rebalanceIndexes(startIndex, endIndex):
    indexes = list(range(startIndex, endIndex, 3))
    indexes.reverse()
    return (indexes)


# get ln returns for an equally balanced portfolio of stocks
def getReturns(portfolio, index, length):
	returns = 0
	print(portfolio)
	print(-1 * index + 1)
	print(-1 * index + length)
	nancount = 0
	for stock in portfolio:
		indReturn = Model.rateOfReturn(Model.retrieveData(stock, 'Last Price', -1 * index + 1, -1 * index + length, []))
		print(stock + ": " + str(indReturn))
		if (not math.isnan(indReturn)):
			returns += indReturn
		else:
			nancount += 1
	if len(portfolio) - nancount == 0:
		total = 0
	else:
		total = returns/(len(portfolio)-nancount)
	print("Start Date: " + str(Model.convertIndexToDate(-1 * index + 1)))
	print("Total Return: " +str(total))
	return (total)


# make a portfolio with predicted probabilities higher than a hardcoded threshold
def makePortfolio(treeTuple):
	featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
	addedStocks, probabilities = Model.predict_probabilities(treeTuple[1], startIndex = -1 * treeTuple[0] - 11, endIndex = -1 * treeTuple[0], features = featureList, sector = "Financials")
	probabilityThreshold = 0.7
	stockTuples = zip(addedStocks, probabilities)
	stockTuples = list(filter(lambda x: x[1][1] > probabilityThreshold, stockTuples))
	if len(stockTuples) == 0:
		print("No portfolio, probabilities lower than threshold of " + str(probabilityThreshold))
		return 0
	stocks, probabilities = zip(*stockTuples)
	return(getReturns(stocks, treeTuple[0], 3))

In [None]:
def poolBuild(trainIndexes):
    t = time.time()
    featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
    forest = Model.buildWithIndexesTripleClass(modelType = Model.randomForestClassifier, indexes = trainIndexes, target= 'Rate of Return', features = featureList, \
                                               featureLength = 12, targetLength = 3, sector = "Financials", percentileTarget = 90, percentileAvoid = 10)
    print("Build time: " + str(time.time()-t) + " seconds.")
    return forest

In [None]:
indexes = []
for i in rebalanceIndexes(4,28):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestList = pool.map(poolBuild, indexes)
print(forestList)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsList = pool.map(makePortfolio, zip(rebalanceIndexes(4,28), forestList))
print(returnsList)

In [None]:
indexes = []
for i in rebalanceIndexes(28,52):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestList = pool.map(poolBuild, indexes)
print(forestList)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(28,52), forestList))
print(returnsListTemp)
returnsList.extend(returnsListTemp)

In [None]:
indexes = []
for i in rebalanceIndexes(52,76):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestList = pool.map(poolBuild, indexes)
print(forestList)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(52,76), forestList))
print(returnsListTemp)
returnsList.extend(returnsListTemp)

In [None]:
indexes = []
for i in rebalanceIndexes(76,100):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestList = pool.map(poolBuild, indexes)
print(forestList)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(76,100), forestList))
print(returnsListTemp)
returnsList.extend(returnsListTemp)

In [None]:
indexes = []
for i in rebalanceIndexes(100,124):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestList = pool.map(poolBuild, indexes)
print(forestList)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(100,124), forestList))
print(returnsListTemp)
returnsList.extend(returnsListTemp)

In [None]:
indexes = []
for i in rebalanceIndexes(124,148):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestList = pool.map(poolBuild, indexes)
print(forestList)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(124,148), forestList))
print(returnsListTemp)
returnsList.extend(returnsListTemp)

In [None]:
indexes = []
for i in rebalanceIndexes(148,172):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestList = pool.map(poolBuild, indexes)
print(forestList)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(148,172), forestList))
print(returnsListTemp)
returnsList.extend(returnsListTemp)

In [None]:
indexes = []
for i in rebalanceIndexes(172,196):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestList = pool.map(poolBuild, indexes)
print(forestList)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(172,196), forestList))
print(returnsListTemp)
returnsList.extend(returnsListTemp)

In [None]:
print(returnsList)
print("Mean: " + str(np.mean(returnsList)))
print("Std Dev: " + str(np.std(returnsList)))
print("Total Sum: " + str(sum(returnsList)))
plt.hist(returnsList, 8, facecolor='green', alpha=0.75)