In [1]:
import datetime
import Model
import numpy as np
import pandas as pd
import math
from multiprocessing import Pool
import os
import warnings
import time
import matplotlib.pyplot as plt
from importlib import reload

reload(Model)
warnings.simplefilter('ignore', FutureWarning)

# positive numbers, start should be lower than end
# a rebalance index represents the month before the rebalancing takes place
# so returns are calculated starting at rebalanceIndex + 1
def rebalanceIndexes(startIndex, endIndex):
    indexes = list(range(startIndex, endIndex, 3))
    return (indexes)


# get ln returns for an equally balanced portfolio of stocks
def getReturns(portfolio, index, length):
	returns = 0
	print(portfolio)
	print(-1 * index + 1)
	print(-1 * index + length)
	nancount = 0
	for stock in portfolio:
		indReturn = Model.rateOfReturn(Model.retrieveData(stock, 'Last Price', -1 * index + 1, -1 * index + length, []))
		print(stock + ": " + str(indReturn))
		if (not math.isnan(indReturn)):
			returns += indReturn
		else:
			nancount += 1
	if len(portfolio) - nancount == 0:
		total = 0
	else:
		total = returns/(len(portfolio)-nancount)
	print("Start Date: " + str(Model.convertIndexToDate(-1 * index + 1)))
	print("Total Return: " +str(total))
	return (total)


# make a portfolio with predicted probabilities higher than a hardcoded threshold
def makePortfolio(treeTuple):
	featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
	addedStocks, probabilities = Model.predict_probabilities(treeTuple[1], startIndex = -1 * treeTuple[0] - 11, endIndex = -1 * treeTuple[0], features = featureList, sector = "Real Estate")
	probabilityThreshold = 0.8
	stockTuples = zip(addedStocks, probabilities)
	stockTuples = list(filter(lambda x: x[1][1] > probabilityThreshold, stockTuples))
	if len(stockTuples) == 0:
		print("No portfolio, probabilities lower than threshold of " + str(probabilityThreshold))
		return 0
	stocks, probabilities = zip(*stockTuples)
	return(getReturns(stocks, treeTuple[0], 3))

In [2]:
indexes = []
for i in rebalanceIndexes(4,56):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

18
[array([-219, -216, -213, -210, -207, -204, -201, -198, -195, -192, -189,
       -186, -183, -180, -177, -174, -171, -168, -165, -162, -159, -156,
       -153, -150, -147, -144, -141, -138, -135, -132, -129, -126, -123,
       -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,  -90,
        -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,  -57,
        -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,  -24,
        -21]), array([-222, -219, -216, -213, -210, -207, -204, -201, -198, -195, -192,
       -189, -186, -183, -180, -177, -174, -171, -168, -165, -162, -159,
       -156, -153, -150, -147, -144, -141, -138, -135, -132, -129, -126,
       -123, -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,
        -90,  -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,
        -57,  -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,
        -24]), array([-225, -222, -219, -216, -213, -210, -207, -204, -201, -198, -195,
       -192, -189

In [3]:
print("# of multiprocess cpus: " + str(os.cpu_count()))
sector = "Real Estate"
featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
forestList = []
for ind in indexes:
	randForest = Model.buildWithIndexesTripleClass(modelType = Model.randomForestClassifier, indexes = ind, target= 'Rate of Return', features = featureList, featureLength = 12,\
									targetLength = 3, sector = sector, percentileTarget = 90, percentileAvoid = 10, verbose = True)
	forestList.append(randForest)
print(forestList)

# of multiprocess cpus: 8
Finished data retrieval, starting model training. Time taken: 122.68200778961182 seconds.
Finished fitting. Time taken: 31.89304494857788 seconds.
Finished data retrieval, starting model training. Time taken: 124.17559504508972 seconds.
Finished fitting. Time taken: 31.329559087753296 seconds.
Finished data retrieval, starting model training. Time taken: 123.41144800186157 seconds.
Finished fitting. Time taken: 31.686346769332886 seconds.
Finished data retrieval, starting model training. Time taken: 124.28566312789917 seconds.
Finished fitting. Time taken: 31.28397512435913 seconds.
Finished data retrieval, starting model training. Time taken: 120.69742798805237 seconds.
Finished fitting. Time taken: 31.81396722793579 seconds.
Finished data retrieval, starting model training. Time taken: 135.81085991859436 seconds.
Finished fitting. Time taken: 31.965649127960205 seconds.
Finished data retrieval, starting model training. Time taken: 117.495924949646 seconds.
F

In [4]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsList = pool.map(makePortfolio, zip(rebalanceIndexes(4,56), forestList))
print(returnsList)

# of multiprocess cpus: 8
('HRI', 'TGH')
-1
-3
HRI: 0.10481964619700435
TGH: 0.06535518104412752
Start Date: 2017-11-30
Total Return: 0.08508741362056593
('ACTG', 'ALSN', 'BECN', 'CMPR', 'FTV', 'GNRC', 'GTLS', 'HEES', 'HRI', 'KLXI', 'PLOW', 'PRLB', 'THR', 'TITN', 'ULH')
BECN: 0.16248708185688576
-6
-4
ACTG: 0.325422400434628
ALSN: 0.20166488737073784
CMPR: 0.16607167635190567
FTV: 0.1063451008270544
GNRC: 0.2546383788724089
GTLS: 0.25407686097681603
HEES: 0.3359866229173636
HRI: 0.13784467296820502
KLXI: 0.13483425650321923
PLOW: 0.18399160370483392
PRLB: 0.19489308502835812
THR: 0.26394617518680663
TITN: 0.143462535328291
ULH: 0.26329061932762166
Start Date: 2017-08-31
Total Return: 0.2085970638436757
('AVAV', 'CAI', 'DSKE', 'GMS')
-9
-7
AVAV: 0.19676782204637977
CAI: 0.30939273314498905
DSKE: 0.25565996333577123
GMS: -0.08428726133700293
Start Date: 2017-05-31
Total Return: 0.16938331429753428
('ABCO', 'CAI', 'OFLX')
Total Return: 0.26761241272311126
-12
-10
ABCO: 0.30260772266845626