In [1]:
import datetime
import Model
import numpy as np
import pandas as pd
import math
from multiprocessing import Pool
import os
import warnings
import time
import matplotlib.pyplot as plt
from importlib import reload

reload(Model)
warnings.simplefilter('ignore', FutureWarning)

# positive numbers, start should be lower than end
# a rebalance index represents the month before the rebalancing takes place
# so returns are calculated starting at rebalanceIndex + 1
def rebalanceIndexes(startIndex, endIndex):
    indexes = list(range(startIndex, endIndex, 3))
    return (indexes)


# get ln returns for an equally balanced portfolio of stocks
def getReturns(portfolio, index, length):
	returns = 0
	print(portfolio)
	print(-1 * index + 1)
	print(-1 * index + length)
	nancount = 0
	for stock in portfolio:
		indReturn = Model.rateOfReturn(Model.retrieveData(stock, 'Last Price', -1 * index + 1, -1 * index + length, []))
		print(stock + ": " + str(indReturn))
		if (not math.isnan(indReturn)):
			returns += indReturn
		else:
			nancount += 1
	if len(portfolio) - nancount == 0:
		total = 0
	else:
		total = returns/(len(portfolio)-nancount)
	print("Start Date: " + str(Model.convertIndexToDate(-1 * index + 1)))
	print("Total Return: " +str(total))
	return (total)


# make a portfolio with predicted probabilities higher than a hardcoded threshold
def makePortfolio(treeTuple):
	featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
	addedStocks, probabilities = Model.predict_probabilities(treeTuple[1], startIndex = -1 * treeTuple[0] - 11, endIndex = -1 * treeTuple[0], features = featureList, sector = "Consumer Staples")
	probabilityThreshold = 0.8
	stockTuples = zip(addedStocks, probabilities)
	stockTuples = list(filter(lambda x: x[1][1] > probabilityThreshold, stockTuples))
	if len(stockTuples) == 0:
		print("Start Date: " + str(Model.convertIndexToDate(-1 * treeTuple[0] + 1)))
		print("No portfolio, probabilities lower than threshold of " + str(probabilityThreshold))
		return 0
	stocks, probabilities = zip(*stockTuples)
	return(getReturns(stocks, treeTuple[0], 3))

In [2]:
indexes = []
for i in rebalanceIndexes(4,56):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

18
[array([-219, -216, -213, -210, -207, -204, -201, -198, -195, -192, -189,
       -186, -183, -180, -177, -174, -171, -168, -165, -162, -159, -156,
       -153, -150, -147, -144, -141, -138, -135, -132, -129, -126, -123,
       -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,  -90,
        -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,  -57,
        -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,  -24,
        -21]), array([-222, -219, -216, -213, -210, -207, -204, -201, -198, -195, -192,
       -189, -186, -183, -180, -177, -174, -171, -168, -165, -162, -159,
       -156, -153, -150, -147, -144, -141, -138, -135, -132, -129, -126,
       -123, -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,
        -90,  -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,
        -57,  -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,
        -24]), array([-225, -222, -219, -216, -213, -210, -207, -204, -201, -198, -195,
       -192, -189

In [3]:
print("# of multiprocess cpus: " + str(os.cpu_count()))
sector = "Consumer Staples"
featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
forestList = []
for ind in indexes:
	randForest = Model.buildWithIndexesTripleClass(modelType = Model.randomForestClassifier, indexes = ind, target= 'Rate of Return', features = featureList, featureLength = 12,\
									targetLength = 3, sector = sector, percentileTarget = 90, percentileAvoid = 10, verbose = True)
	forestList.append(randForest)
print(forestList)

# of multiprocess cpus: 8
Finished data retrieval, starting model training. Time taken: 9.837730169296265 seconds.
Finished fitting. Time taken: 2.1970858573913574 seconds.
Finished data retrieval, starting model training. Time taken: 8.885336875915527 seconds.
Finished fitting. Time taken: 2.191037178039551 seconds.
Finished data retrieval, starting model training. Time taken: 7.857598066329956 seconds.
Finished fitting. Time taken: 2.169656991958618 seconds.
Finished data retrieval, starting model training. Time taken: 7.819164276123047 seconds.
Finished fitting. Time taken: 2.1707987785339355 seconds.
Finished data retrieval, starting model training. Time taken: 7.905659198760986 seconds.
Finished fitting. Time taken: 2.204190254211426 seconds.
Finished data retrieval, starting model training. Time taken: 7.894398927688599 seconds.
Finished fitting. Time taken: 2.424528121948242 seconds.
Finished data retrieval, starting model training. Time taken: 8.744706869125366 seconds.
Finishe

In [4]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsList = pool.map(makePortfolio, zip(rebalanceIndexes(4,56), forestList))
print(returnsList)

# of multiprocess cpus: 8
('VG',)
-3
-1
VG: 0.09459551120145715
Start Date: 2017-11-30
Total Return: 0.09459551120145715
Start Date: 2017-08-31
No portfolio, probabilities lower than threshold of 0.8
('HCOM', 'OOMA')
-9
-7
HCOM: 0.1518732932815796
OOMA: -0.017910926566529817
Start Date: 2017-05-31
Total Return: 0.0669811833575249
('WIFI',)
-12
-10
WIFI: 0.2698801920967968
Start Date: 2017-02-28
Total Return: 0.2698801920967968
Start Date: 2016-11-30
No portfolio, probabilities lower than threshold of 0.8
('VG',)
-18
-16
VG: 0.16612687087397404
Start Date: 2016-08-31
Total Return: 0.16612687087397404
('S', 'TMUS')
-21
-19
S: 0.4798236754267111
TMUS: 0.0804024328473858
Start Date: 2016-05-31
Total Return: 0.28011305413704846
('IRDM',)
-24
-22
IRDM: 0.15229366908004538
Start Date: 2016-02-29
Total Return: 0.15229366908004538
Start Date: 2015-11-30
No portfolio, probabilities lower than threshold of 0.8
Start Date: 2015-08-31
No portfolio, probabilities lower than threshold of 0.8
Start Da