In [1]:
import datetime
import Model
import numpy as np
import pandas as pd
import math
from multiprocessing import Pool
import os
import warnings
import time
import matplotlib.pyplot as plt
from importlib import reload

reload(Model)
warnings.simplefilter('ignore', FutureWarning)

# positive numbers, start should be lower than end
# a rebalance index represents the month before the rebalancing takes place
# so returns are calculated starting at rebalanceIndex + 1
def rebalanceIndexes(startIndex, endIndex):
    indexes = list(range(startIndex, endIndex, 3))
    return (indexes)


# get ln returns for an equally balanced portfolio of stocks
def getReturns(portfolio, index, length):
	returns = 0
	print(portfolio)
	print(-1 * index + 1)
	print(-1 * index + length)
	nancount = 0
	for stock in portfolio:
		indReturn = Model.rateOfReturn(Model.retrieveData(stock, 'Last Price', -1 * index + 1, -1 * index + length, []))
		print(stock + ": " + str(indReturn))
		if (not math.isnan(indReturn)):
			returns += indReturn
		else:
			nancount += 1
	if len(portfolio) - nancount == 0:
		total = 0
	else:
		total = returns/(len(portfolio)-nancount)
	print("Start Date: " + str(Model.convertIndexToDate(-1 * index + 1)))
	print("Total Return: " +str(total))
	return (total)


# make a portfolio with predicted probabilities higher than a hardcoded threshold
def makePortfolio(treeTuple):
	featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
	addedStocks, probabilities = Model.predict_probabilities(treeTuple[1], startIndex = -1 * treeTuple[0] - 11, endIndex = -1 * treeTuple[0], features = featureList, sector = "Health Care")
	probabilityThreshold = 0.8
	stockTuples = zip(addedStocks, probabilities)
	stockTuples = list(filter(lambda x: x[1][1] > probabilityThreshold, stockTuples))
	if len(stockTuples) == 0:
		print("No portfolio, probabilities lower than threshold of " + str(probabilityThreshold))
		return 0
	stocks, probabilities = zip(*stockTuples)
	return(getReturns(stocks, treeTuple[0], 3))

In [2]:
indexes = []
for i in rebalanceIndexes(4,56):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

18
[array([-219, -216, -213, -210, -207, -204, -201, -198, -195, -192, -189,
       -186, -183, -180, -177, -174, -171, -168, -165, -162, -159, -156,
       -153, -150, -147, -144, -141, -138, -135, -132, -129, -126, -123,
       -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,  -90,
        -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,  -57,
        -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,  -24,
        -21]), array([-222, -219, -216, -213, -210, -207, -204, -201, -198, -195, -192,
       -189, -186, -183, -180, -177, -174, -171, -168, -165, -162, -159,
       -156, -153, -150, -147, -144, -141, -138, -135, -132, -129, -126,
       -123, -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,
        -90,  -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,
        -57,  -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,
        -24]), array([-225, -222, -219, -216, -213, -210, -207, -204, -201, -198, -195,
       -192, -189

In [3]:
print("# of multiprocess cpus: " + str(os.cpu_count()))
sector = "Health Care"
featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
forestList = []
for ind in indexes:
	randForest = Model.buildWithIndexesTripleClass(modelType = Model.randomForestClassifier, indexes = ind, target= 'Rate of Return', features = featureList, featureLength = 12,\
									targetLength = 3, sector = sector, percentileTarget = 90, percentileAvoid = 10, verbose = True)
	forestList.append(randForest)
print(forestList)

# of multiprocess cpus: 8
Finished data retrieval, starting model training. Time taken: 141.92156386375427 seconds.
Finished fitting. Time taken: 21.719910860061646 seconds.
Finished data retrieval, starting model training. Time taken: 150.30966687202454 seconds.
Finished fitting. Time taken: 18.168582916259766 seconds.
Finished data retrieval, starting model training. Time taken: 151.14323616027832 seconds.
Finished fitting. Time taken: 11.82422399520874 seconds.
Finished data retrieval, starting model training. Time taken: 125.5440309047699 seconds.
Finished fitting. Time taken: 22.381747007369995 seconds.
Finished data retrieval, starting model training. Time taken: 144.76649284362793 seconds.
Finished fitting. Time taken: 13.604840993881226 seconds.
Finished data retrieval, starting model training. Time taken: 123.99750399589539 seconds.
Finished fitting. Time taken: 12.983163118362427 seconds.
Finished data retrieval, starting model training. Time taken: 116.27580213546753 seconds

In [4]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsList = pool.map(makePortfolio, zip(rebalanceIndexes(4,56), forestList))
print(returnsList)

# of multiprocess cpus: 8
('EDIT', 'ENTA', 'GTS')
-3
-1
EDIT: 0.23478320199270186
ENTA: 0.5368630680379205
GTS: -0.21246885024834494
Start Date: 2017-11-30
Total Return: 0.18639247326075914
('ABBV', 'ALGN', 'ASMB', 'EDIT', 'ENTA', 'JUNO', 'MDGL')
Total Return: 0.29066121413827334
-6
-4
ABBV: 0.18110346240714303
ALGN: 0.3017001403317492
ASMB: 0.08482652544427438
EDIT: 0.1610967913107073
ENTA: 0.14746213907382844
JUNO: 0.08452464333515586
MDGL: 1.073914797065055
Start Date: 2017-08-31
('ACOR', 'ALR', 'CCXI', 'CORT', 'CUTR', 'ENSG', 'ENTA', 'FGEN', 'JUNO', 'LMAT', 'PODD')
Total Return: 0.23615714562430298
-9
-7
ACOR: 0.45033686228533965
ALR: 0.24983378523724786
CCXI: 0.42337603467469465
CORT: 0.09763846956391609
CUTR: 0.1332572322389809
ENSG: 0.19645765857449726
ENTA: 0.23661621580412984
FGEN: 0.26119364683308
JUNO: 0.20200037888059041
LMAT: 0.16576953061712807
PODD: 0.18124878715772796
Start Date: 2017-05-31
('ALGN', 'BEAT', 'CORT', 'ENZ', 'GMED', 'LMAT', 'MASI', 'MDSO', 'MDXG', 'SUPN', 