In [1]:
import datetime
import Model
import numpy as np
import pandas as pd
import math
from multiprocessing import Pool
import os
import warnings
import time
import matplotlib.pyplot as plt
from importlib import reload

reload(Model)
warnings.simplefilter('ignore', FutureWarning)

# positive numbers, start should be lower than end
# a rebalance index represents the month before the rebalancing takes place
# so returns are calculated starting at rebalanceIndex + 1
def rebalanceIndexes(startIndex, endIndex):
    indexes = list(range(startIndex, endIndex, 3))
    return (indexes)


# get ln returns for an equally balanced portfolio of stocks
def getReturns(portfolio, index, length):
	returns = 0
	print(portfolio)
	print(-1 * index + 1)
	print(-1 * index + length)
	nancount = 0
	for stock in portfolio:
		indReturn = Model.rateOfReturn(Model.retrieveData(stock, 'Last Price', -1 * index + 1, -1 * index + length, []))
		print(stock + ": " + str(indReturn))
		if (not math.isnan(indReturn)):
			returns += indReturn
		else:
			nancount += 1
	if len(portfolio) - nancount == 0:
		total = 0
	else:
		total = returns/(len(portfolio)-nancount)
	print("Start Date: " + str(Model.convertIndexToDate(-1 * index + 1)))
	print("Total Return: " +str(total))
	return (total)


# make a portfolio with predicted probabilities higher than a hardcoded threshold
def makePortfolio(treeTuple):
	featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
	addedStocks, probabilities = Model.predict_probabilities(treeTuple[1], startIndex = -1 * treeTuple[0] - 11, endIndex = -1 * treeTuple[0], features = featureList, sector = "Energy")
	probabilityThreshold = 0.8
	stockTuples = zip(addedStocks, probabilities)
	stockTuples = list(filter(lambda x: x[1][1] > probabilityThreshold, stockTuples))
	if len(stockTuples) == 0:
		print("No portfolio, probabilities lower than threshold of " + str(probabilityThreshold))
		return 0
	stocks, probabilities = zip(*stockTuples)
	return(getReturns(stocks, treeTuple[0], 3))

In [2]:
indexes = []
for i in rebalanceIndexes(4,56):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

18
[array([-219, -216, -213, -210, -207, -204, -201, -198, -195, -192, -189,
       -186, -183, -180, -177, -174, -171, -168, -165, -162, -159, -156,
       -153, -150, -147, -144, -141, -138, -135, -132, -129, -126, -123,
       -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,  -90,
        -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,  -57,
        -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,  -24,
        -21]), array([-222, -219, -216, -213, -210, -207, -204, -201, -198, -195, -192,
       -189, -186, -183, -180, -177, -174, -171, -168, -165, -162, -159,
       -156, -153, -150, -147, -144, -141, -138, -135, -132, -129, -126,
       -123, -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,
        -90,  -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,
        -57,  -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,
        -24]), array([-225, -222, -219, -216, -213, -210, -207, -204, -201, -198, -195,
       -192, -189

In [3]:
print("# of multiprocess cpus: " + str(os.cpu_count()))
sector = "Energy"
featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
forestList = []
for ind in indexes:
	randForest = Model.buildWithIndexesTripleClass(modelType = Model.randomForestClassifier, indexes = ind, target= 'Rate of Return', features = featureList, featureLength = 12,\
									targetLength = 3, sector = sector, percentileTarget = 90, percentileAvoid = 10, verbose = True)
	forestList.append(randForest)
print(forestList)

# of multiprocess cpus: 8
Finished data retrieval, starting model training. Time taken: 47.33082985877991 seconds.
Finished fitting. Time taken: 8.013718843460083 seconds.
Finished data retrieval, starting model training. Time taken: 50.008206844329834 seconds.
Finished fitting. Time taken: 9.531250238418579 seconds.
Finished data retrieval, starting model training. Time taken: 49.33250188827515 seconds.
Finished fitting. Time taken: 7.889972925186157 seconds.
Finished data retrieval, starting model training. Time taken: 49.70875310897827 seconds.
Finished fitting. Time taken: 7.923579216003418 seconds.
Finished data retrieval, starting model training. Time taken: 50.18398070335388 seconds.
Finished fitting. Time taken: 8.370191812515259 seconds.
Finished data retrieval, starting model training. Time taken: 50.862263679504395 seconds.
Finished fitting. Time taken: 7.6772801876068115 seconds.
Finished data retrieval, starting model training. Time taken: 48.42533206939697 seconds.
Finish

In [4]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsList = pool.map(makePortfolio, zip(rebalanceIndexes(4,56), forestList))
print(returnsList)

# of multiprocess cpus: 8
No portfolio, probabilities lower than threshold of 0.8
('BBG', 'CLR', 'CVI', 'FET', 'NGS', 'PBF', 'RICE', 'XEC')
Total Return: 0.21234648264067077
-6
-4
BBG: 0.5000656006520037
CLR: 0.18246895159118504
CVI: 0.24804110293593684
FET: 0.21622310846963577
NGS: 0.16378930866502683
PBF: 0.2016300017654542
RICE: 0.027043692928508722
XEC: 0.15951009411761508
Start Date: 2017-08-31
('PBF',)
Total Return: 0.16430305129127598
-9
-7
PBF: 0.16430305129127598
Start Date: 2017-05-31
No portfolio, probabilities lower than threshold of 0.8
('LPG', 'NNA')
Total Return: 0.3166056159165611
-15
-13
LPG: 0.4826383733537478
NNA: 0.15057285847937435
Start Date: 2016-11-30
('CLD', 'EPE', 'JONE', 'REI', 'SLCA', 'WLL')
CLD: 0.39979937057248716
-18
-16
EPE: -0.146092618497496
JONE: 0.3921471219360937
REI: -0.07209387278202062
SLCA: 0.16281188021409676
WLL: 0.1224967979008138
Start Date: 2016-08-31
Total Return: 0.1431781132239958
('CLD', 'ECR', 'REN', 'RSPP')
Total Return: 0.40064969525