In [1]:
import datetime
import Model
import numpy as np
import pandas as pd
import math
from multiprocessing import Pool
import os
import warnings
import time
import matplotlib.pyplot as plt
from importlib import reload

reload(Model)
warnings.simplefilter('ignore', FutureWarning)

# positive numbers, start should be lower than end
# a rebalance index represents the month before the rebalancing takes place
# so returns are calculated starting at rebalanceIndex + 1
def rebalanceIndexes(startIndex, endIndex):
    indexes = list(range(startIndex, endIndex, 3))
    return (indexes)


# get ln returns for an equally balanced portfolio of stocks
def getReturns(portfolio, index, length):
	returns = 0
	print(portfolio)
	print(-1 * index + 1)
	print(-1 * index + length)
	nancount = 0
	for stock in portfolio:
		indReturn = Model.rateOfReturn(Model.retrieveData(stock, 'Last Price', -1 * index + 1, -1 * index + length, []))
		print(stock + ": " + str(indReturn))
		if (not math.isnan(indReturn)):
			returns += indReturn
		else:
			nancount += 1
	if len(portfolio) - nancount == 0:
		total = 0
	else:
		total = returns/(len(portfolio)-nancount)
	print("Start Date: " + str(Model.convertIndexToDate(-1 * index + 1)))
	print("Total Return: " +str(total))
	return (total)


# make a portfolio with predicted probabilities higher than a hardcoded threshold
def makePortfolio(treeTuple):
	featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
	addedStocks, probabilities = Model.predict_probabilities(treeTuple[1], startIndex = -1 * treeTuple[0] - 11, endIndex = -1 * treeTuple[0], features = featureList, sector = "Materials")
	probabilityThreshold = 0.8
	stockTuples = zip(addedStocks, probabilities)
	stockTuples = list(filter(lambda x: x[1][1] > probabilityThreshold, stockTuples))
	if len(stockTuples) == 0:
		print("Start Date: " + str(Model.convertIndexToDate(-1 * treeTuple[0] + 1)))
		print("No portfolio, probabilities lower than threshold of " + str(probabilityThreshold))
		return 0
	stocks, probabilities = zip(*stockTuples)
	return(getReturns(stocks, treeTuple[0], 3))

In [2]:
indexes = []
for i in rebalanceIndexes(4,56):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

18
[array([-219, -216, -213, -210, -207, -204, -201, -198, -195, -192, -189,
       -186, -183, -180, -177, -174, -171, -168, -165, -162, -159, -156,
       -153, -150, -147, -144, -141, -138, -135, -132, -129, -126, -123,
       -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,  -90,
        -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,  -57,
        -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,  -24,
        -21]), array([-222, -219, -216, -213, -210, -207, -204, -201, -198, -195, -192,
       -189, -186, -183, -180, -177, -174, -171, -168, -165, -162, -159,
       -156, -153, -150, -147, -144, -141, -138, -135, -132, -129, -126,
       -123, -120, -117, -114, -111, -108, -105, -102,  -99,  -96,  -93,
        -90,  -87,  -84,  -81,  -78,  -75,  -72,  -69,  -66,  -63,  -60,
        -57,  -54,  -51,  -48,  -45,  -42,  -39,  -36,  -33,  -30,  -27,
        -24]), array([-225, -222, -219, -216, -213, -210, -207, -204, -201, -198, -195,
       -192, -189

In [3]:
print("# of multiprocess cpus: " + str(os.cpu_count()))
sector = "Materials"
featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
forestList = []
for ind in indexes:
	randForest = Model.buildWithIndexesTripleClass(modelType = Model.randomForestClassifier, indexes = ind, target= 'Rate of Return', features = featureList, featureLength = 12,\
									targetLength = 3, sector = sector, percentileTarget = 90, percentileAvoid = 10, verbose = True)
	forestList.append(randForest)
print(forestList)

# of multiprocess cpus: 8
Finished data retrieval, starting model training. Time taken: 24.109548091888428 seconds.
Finished fitting. Time taken: 6.226154088973999 seconds.
Finished data retrieval, starting model training. Time taken: 24.90213704109192 seconds.
Finished fitting. Time taken: 7.146939992904663 seconds.
Finished data retrieval, starting model training. Time taken: 25.920135974884033 seconds.
Finished fitting. Time taken: 6.6256632804870605 seconds.
Finished data retrieval, starting model training. Time taken: 31.664777994155884 seconds.
Finished fitting. Time taken: 7.552135944366455 seconds.
Finished data retrieval, starting model training. Time taken: 30.601070880889893 seconds.
Finished fitting. Time taken: 6.923440933227539 seconds.
Finished data retrieval, starting model training. Time taken: 30.310789346694946 seconds.
Finished fitting. Time taken: 6.318671941757202 seconds.
Finished data retrieval, starting model training. Time taken: 27.520404815673828 seconds.
Fi

In [4]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsList = pool.map(makePortfolio, zip(rebalanceIndexes(4,56), forestList))
print(returnsList)

# of multiprocess cpus: 8
No portfolio, probabilities lower than threshold of 0.8
('DYN', 'PCYO')
Total Return: 0.1630161563740733
-6
-4
DYN: 0.2788855343224448
PCYO: 0.04714677842570181
Start Date: 2017-08-31
('AGR', 'CPN', 'DYN', 'PEGI')
Total Return: 0.07533229766104022
-9
-7
AGR: -0.00022014309389950526
CPN: 0.11249454095167177
DYN: 0.08236537327850835
PEGI: 0.10668941950788025
Start Date: 2017-05-31
('GNE', 'PEGI', 'SPKE', 'TERP')
Total Return: 0.20240560105701716
-12
-10
GNE: 0.37755631999108785
PEGI: 0.05747902942461236
SPKE: 0.284900429489904
TERP: 0.08968662532246441
Start Date: 2017-02-28
('NRG', 'ORA')
Total Return: 0.2466026675592763
-15
-13
NRG: 0.377445391295939
ORA: 0.11575994382261356
Start Date: 2016-11-30
No portfolio, probabilities lower than threshold of 0.8
No portfolio, probabilities lower than threshold of 0.8
('AWK', 'DYN', 'ORA')
Total Return: 0.267757494743117
-24
-22
AWK: 0.11555215593652068
DYN: 0.5590487337665566
ORA: 0.12867159452627375
Start Date: 2016-02