In [10]:
import datetime
import Model
import numpy as np
import pandas as pd
import math
from multiprocessing import Pool
import os
import warnings
import time
import matplotlib.pyplot as plt
from importlib import reload

reload(Model)
warnings.simplefilter('ignore', FutureWarning)

# positive numbers, start should be lower than end
# a rebalance index represents the month before the rebalancing takes place
# so returns are calculated starting at rebalanceIndex + 1
def rebalanceIndexes(startIndex, endIndex):
    indexes = list(range(startIndex, endIndex, 3))
    indexes.reverse()
    return (indexes)


# get ln returns for an equally balanced portfolio of stocks
def getReturns(portfolio, index, length):
	returns = 0
	print(portfolio)
	print(-1 * index + 1)
	print(-1 * index + length)
	nancount = 0
	for stock in portfolio:
		indReturn = Model.rateOfReturn(Model.retrieveData(stock, 'Last Price', -1 * index + 1, -1 * index + length, []))
		print(stock + ": " + str(indReturn))
		if (not math.isnan(indReturn)):
			returns += indReturn
		else:
			nancount += 1
	if len(portfolio) - nancount == 0:
		total = 0
	else:
		total = returns/(len(portfolio)-nancount)
	print("Start Date: " + str(Model.convertIndexToDate(-1 * index + 1)))
	print("Total Return: " +str(total))
	return (total)


# make a portfolio with predicted probabilities higher than a hardcoded threshold
def makePortfolio(treeTuple):
	featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
	addedStocks, probabilities = Model.predict_probabilities(treeTuple[1], startIndex = -1 * treeTuple[0] - 11, endIndex = -1 * treeTuple[0], features = featureList, sector = "Financials")
	probabilityThreshold = 0.8
	stockTuples = zip(addedStocks, probabilities)
	stockTuples = list(filter(lambda x: x[1][1] > probabilityThreshold, stockTuples))
	if len(stockTuples) == 0:
		print("No portfolio, probabilities lower than threshold of " + str(probabilityThreshold))
		return 0
	stocks, probabilities = zip(*stockTuples)
	return(getReturns(stocks, treeTuple[0], 3))

In [2]:
def poolBuild(trainIndexes):
    t = time.time()
    featureList = ['EPS Growth', 'Volatility 180 D', 'Trailing EPS', 'Price to Cash Flow', 'EPS', 'Volume', 'Return on Assets', 'Price to Book', 'Dividend Yield', 'Total Debt to Total Equity', 'Return on Invested Capital', 'Return on Common Equity']
    forest = Model.buildWithIndexesTripleClass(modelType = Model.randomForestClassifier, indexes = trainIndexes, target= 'Rate of Return', features = featureList, \
                                               featureLength = 12, targetLength = 3, sector = "Financials", percentileTarget = 90, percentileAvoid = 10)
    print("Build time: " + str(time.time()-t) + " seconds.")
    return forest

In [3]:
indexes = []
for i in rebalanceIndexes(4,28):
    maxLength = 300
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

8
[array([-340, -337, -334, -331, -328, -325, -322, -319, -316, -313, -310,
       -307, -304, -301, -298, -295, -292, -289, -286, -283, -280, -277,
       -274, -271, -268, -265, -262, -259, -256, -253, -250, -247, -244,
       -241, -238, -235, -232, -229, -226, -223, -220, -217, -214, -211,
       -208, -205, -202, -199, -196, -193, -190, -187, -184, -181, -178,
       -175, -172, -169, -166, -163, -160, -157, -154, -151, -148, -145,
       -142, -139, -136, -133, -130, -127, -124, -121, -118, -115, -112,
       -109, -106, -103, -100,  -97,  -94,  -91,  -88,  -85,  -82,  -79,
        -76,  -73,  -70,  -67,  -64,  -61,  -58,  -55,  -52,  -49,  -46,
        -43]), array([-337, -334, -331, -328, -325, -322, -319, -316, -313, -310, -307,
       -304, -301, -298, -295, -292, -289, -286, -283, -280, -277, -274,
       -271, -268, -265, -262, -259, -256, -253, -250, -247, -244, -241,
       -238, -235, -232, -229, -226, -223, -220, -217, -214, -211, -208,
       -205, -202, -199, -196, -1

In [4]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestList = pool.map(poolBuild, indexes)
print(forestList)

# of multiprocess cpus: 8
Finished data retrieval, starting model training.
Finished data retrieval, starting model training.
Finished data retrieval, starting model training.
Finished data retrieval, starting model training.
Finished data retrieval, starting model training.
Finished data retrieval, starting model training.
Finished data retrieval, starting model training.
Finished data retrieval, starting model training.
Finished fitting.
Build time: 3680.7723479270935 seconds.
Finished fitting.
Build time: 3684.011799097061 seconds.
Finished fitting.
Finished fitting.
Build time: 3686.2608730793 seconds.
Build time: 3687.0983028411865 seconds.
Finished fitting.
Build time: 3699.268303871155 seconds.
Finished fitting.
Finished fitting.
Build time: 3702.9995131492615 seconds.
Build time: 3703.768177986145 seconds.
Finished fitting.
Build time: 3736.223797082901 seconds.
[RandomForestClassifier(bootstrap=True, class_weight='balanced',
            criterion='gini', max_depth=None, max_fe

In [11]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsList = pool.map(makePortfolio, zip(rebalanceIndexes(4,28), forestList))
print(returnsList)

# of multiprocess cpus: 8
('AGM', 'AMBC', 'DFS', 'EIG', 'ENVA', 'GWB', 'HASI', 'IVR', 'LTXB', 'NAVI', 'PB', 'SAMG', 'SFBS', 'SMMF', 'TCBI', 'TSC')
-24
-22
AGM: 0.2260361280302603
AMBC: 0.06097126229595862
DFS: 0.19243113259889633
EIG: 0.06755091017005022
ENVA: 0.42148375726380194
GWB: 0.24746940058671374
HASI: 0.09680614356116779
IVR: 0.1276565212131997
LTXB: 0.32879698181477357
NAVI: 0.23288358972295908
PB: 0.26587620391307576
SAMG: 0.17742558618174753
SFBS: 0.29883713302947656
SMMF: 0.34799446607147333
TCBI: 0.34872508528800816
TSC: 0.10652553201660764
Start Date: 2016-02-29
Total Return: 0.22171686460988566
('EARN', 'GNBC', 'LTXB', 'MRLN', 'TREE', 'UBFO', 'WSBF')
-21
-19
EARN: 0.08482782978591841
GNBC: 0.1628798751793692
LTXB: 0.057736090769568094
MRLN: 0.19151400497811855
TREE: 0.19307434857021644
UBFO: 0.11864555025007317
WSBF: 0.07961762796469118
Start Date: 2016-05-31
Total Return: 0.12689933249970786
('AMTD', 'CARO', 'ESNT', 'ETFC', 'FBMS', 'FCFP', 'FFNW', 'FGBI', 'GHL', 'HBMD'

In [12]:
indexes = []
for i in rebalanceIndexes(28,52):
    maxLength = 300
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

8
[array([-364, -361, -358, -355, -352, -349, -346, -343, -340, -337, -334,
       -331, -328, -325, -322, -319, -316, -313, -310, -307, -304, -301,
       -298, -295, -292, -289, -286, -283, -280, -277, -274, -271, -268,
       -265, -262, -259, -256, -253, -250, -247, -244, -241, -238, -235,
       -232, -229, -226, -223, -220, -217, -214, -211, -208, -205, -202,
       -199, -196, -193, -190, -187, -184, -181, -178, -175, -172, -169,
       -166, -163, -160, -157, -154, -151, -148, -145, -142, -139, -136,
       -133, -130, -127, -124, -121, -118, -115, -112, -109, -106, -103,
       -100,  -97,  -94,  -91,  -88,  -85,  -82,  -79,  -76,  -73,  -70,
        -67]), array([-361, -358, -355, -352, -349, -346, -343, -340, -337, -334, -331,
       -328, -325, -322, -319, -316, -313, -310, -307, -304, -301, -298,
       -295, -292, -289, -286, -283, -280, -277, -274, -271, -268, -265,
       -262, -259, -256, -253, -250, -247, -244, -241, -238, -235, -232,
       -229, -226, -223, -220, -2

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestListTemp = pool.map(poolBuild, indexes)
print(forestListTemp)
forestListTemp.extend(forestList)
forestList = forestListTemp

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(28,52), forestList[0:8]))
print(returnsListTemp)
returnsListTemp.extend(returnsList)
returnsList = returnsListTemp

In [None]:
indexes = []
for i in rebalanceIndexes(52,76):
    maxLength = 200
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestListTemp = pool.map(poolBuild, indexes)
print(forestListTemp)
forestListTemp.extend(forestList)
forestList = forestListTemp

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(52,76), forestList[0:8]))
print(returnsListTemp)
returnsListTemp.extend(returnsList)
returnsList = returnsListTemp

In [None]:
indexes = []
for i in rebalanceIndexes(76,100):
    maxLength = 300
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestListTemp = pool.map(poolBuild, indexes)
print(forestListTemp)
forestListTemp.extend(forestList)
forestList = forestListTemp

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(76,100), forestList[0:8]))
print(returnsListTemp)
returnsListTemp.extend(returnsList)
returnsList = returnsListTemp

In [None]:
indexes = []
for i in rebalanceIndexes(100,124):
    maxLength = 300
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestListTemp = pool.map(poolBuild, indexes)
print(forestListTemp)
forestListTemp.extend(forestList)
forestList = forestListTemp

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(100,124), forestList[0:8]))
print(returnsListTemp)
returnsListTemp.extend(returnsList)
returnsList = returnsListTemp

In [None]:
indexes = []
for i in rebalanceIndexes(124,148):
    maxLength = 300
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestListTemp = pool.map(poolBuild, indexes)
print(forestListTemp)
forestListTemp.extend(forestList)
forestList = forestListTemp

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(124,148), forestList[0:8]))
print(returnsListTemp)
returnsListTemp.extend(returnsList)
returnsList = returnsListTemp

In [None]:
indexes = []
for i in rebalanceIndexes(148,172):
    maxLength = 300
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestListTemp = pool.map(poolBuild, indexes)
print(forestListTemp)
forestListTemp.extend(forestList)
forestList = forestListTemp

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(148,172), forestList[0:8]))
print(returnsListTemp)
returnsListTemp.extend(returnsList)
returnsList = returnsListTemp

In [None]:
indexes = []
for i in rebalanceIndexes(172,196):
    maxLength = 300
    targetLength = 3
    featureLength = 12
    indexes.append(np.arange(-1 * (targetLength + featureLength) - i + maxLength * -1, -1 * (targetLength + featureLength) - i, targetLength))
print(len(indexes))
print(indexes)

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
forestListTemp = pool.map(poolBuild, indexes)
print(forestListTemp)
forestListTemp.extend(forestList)
forestList = forestListTemp

In [None]:
pool = Pool(os.cpu_count())
print("# of multiprocess cpus: " + str(os.cpu_count()))
returnsListTemp = pool.map(makePortfolio, zip(rebalanceIndexes(172,196), forestList[0:8]))
print(returnsListTemp)
returnsListTemp.extend(returnsList)
returnsList = returnsListTemp

In [None]:
print(returnsList)
print("Mean: " + str(np.mean(returnsList)))
print("Std Dev: " + str(np.std(returnsList)))
print("Total Sum: " + str(sum(returnsList)))
plt.hist(returnsList, 8, facecolor='green', alpha=0.75)