# Defining a target with multiple stocks
## Goal
* Quantify how good our models have to be if we have one model for each stock in Dow Jones
* In this set up, each model is trying to predict if the asset is going to be higher in exactly 20 mins when compared to the next minute.

## Main takeouts



In [None]:
cd ~/Desktop/MyProjects/moneyManager/

In [None]:
import pandas as pd
import random
import numpy as np
from datetime import datetime
from modeling.loadTimeSeries import loadTimeSeries
from modeling.loadTimeSeries import getListOfAvailableStocks
from modeling.loadTimeSeries import loadPriceTimeSeries
from modeling.marketSimulator import marketSimulator

In [None]:
priceTimeSeries = loadPriceTimeSeries()
bestPossiblePredictions = {stock: priceTimeSeries[stock].shift(-20)>priceTimeSeries[stock].shift(-1) for stock in priceTimeSeries}    


def simulateStrategyMultipleTimes(precision, recall, availableStocks, numSimulations):
    outcome = []
    numMovements = []
    for i in range(numSimulations):
        strategy =strategySimulatorWithMultipleStocks(precision, recall, availableStocks)
        outcome.append(marketSimulator(strategy).values[-1])
        numMovements.append(countPositionChanges(strategy))
    return outcome, numMovements
    
def strategySimulatorWithMultipleStocks(precision, recall, availableStocks, startTime=datetime(2019,1,1), endTime=datetime(2020,1,1)):
    """
       We are loading/using all bestPossiblePredictions ... maybe we should not
       this probably only works with the default startTime and endTIme
    """
    availableMarketMinutes = loadTimeSeries('GS').index
    strategyTimes = [t for t in availableMarketMinutes if t>=startTime and t<endTime]
    possibleMoves=[[] for _ in range(len(strategyTimes))]
    
    for stock in availableStocks:
        
        allPositives = bestPossiblePredictions[stock].value_counts()[True]
        predictedPositives = int(allPositives*recall/precision)
        truePositives = int(precision*predictedPositives)
        P = bestPossiblePredictions[stock].values
        TP = random.choices([i for i in range(len(P)) if P[i]==True], k=truePositives)
        FP = random.choices([i for i in range(len(P)) if P[i]==False], k=predictedPositives-truePositives)
        PP = TP+FP
        for p in PP:possibleMoves[p].append(stock)

    strategy = [None for _ in range(len(strategyTimes))]
    strategy[0] = random.choice(availableStocks)
    for i in range(1, len(strategy)):
        if len(possibleMoves[i])==0:
            strategy[i]=strategy[i-1]
        else:
            strategy[i] = random.choice(possibleMoves[i])
            
    
    counter = 1 # force our positions to be held for at least 20 minutes
    for i in range(1, len(strategy)):
        if strategy[i]==strategy[i-1]:
            counter+=1
        else:
            if counter<20:
                strategy[i]=strategy[i-1]
                counter+=1
            else:
                counter=0
    
    
    strategy=pd.Series(strategy, index=strategyTimes)
    return strategy


def countPositionChanges(strategy):
    counter=0
    for i in range(1,len(strategy)):
        counter+=1 if strategy[i]!=strategy[i-1] else 0
    return counter



# Use the following in test_marketSimulator.py
def testCaseForModelSimulator(STOCK):
    S=loadTimeSeries(STOCK, datetime(2019,1,1), datetime(2020,1,1)).consolidated
    positionTS=pd.Series([STOCK]*len(S),index=S.index)
    win = marketSimulator(positionTS, initialAmount=1)
    return win.values[-1]

In [None]:
stratOutcome, stratMoves = simulateStrategyMultipleTimes(precision=0.6, recall=0.02, availableStocks=['GS', 'MSFT', 'IBM'], numSimulations=200)
randOutcome, randMoves = simulateStrategyMultipleTimes(precision=0.5, recall=0.02, availableStocks=['GS', 'MSFT', 'IBM'], numSimulations=200)

In [None]:
dr = pd.DataFrame({'stratOutcome':stratOutcome, 'randOutcome': randOutcome})
dr.plot.hist(bins=20, alpha=0.5);
plt.title('Performance of strategies with precision=0.5 and precision=0.6')

In [None]:
dm = pd.DataFrame({'stratMoves':stratMoves, 'randMoves': randMoves})
dm.plot.hist(bins=20, alpha=0.5);
plt.title('Number of movements of strategies with precision=0.5 and precision=0.6')

In [None]:
# just checks how was the gain of GS, IBM, MSFT
winGS = testCaseForModelSimulator('GS')
winIBM = testCaseForModelSimulator('IBM')
winMSFT = testCaseForModelSimulator('MSFT')

In [None]:
print(winGS, winIBM, winMSFT)

In [None]:
import matplotlib.pyplot as plt
plt.close('all')
dr.plot.hist(bins=10, alpha=0.5);
fig = plt.gcf()
fig.set_size_inches(10, 5)
plt.show()

In [None]:
difference = dr.stratOutcome-dr.randOutcome