# MC030 - Projeto final de Graduação
## Análise e Previsão do Mercado de Ações

## Descrição do modelo

A partir de um conjunto de cotações diárias de dólar para real (USD-BRL) desejamos modelar esse comportamento a partir de uma série de indicadores de mercado financeiro para classificarmos cada dia como um momento de compra ou momento de venda.

# ...

# Código

### Definindo endpoints da API (https://www.alphavantage.co/documentation/)

In [1]:
apiToken = '3GX5M109KQK9B6HO'
baseUrl = 'https://www.alphavantage.co/query?&outputsize=full&apikey=' + apiToken
usdBrlUrl = 'https://www.alphavantage.co/query?function=FX_DAILY&from_symbol=USD&to_symbol=BRL&outputsize=full&apikey=' + apiToken
stockUrl = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=demo'

### Obtendo dados da API

In [2]:
import requests
import json
import time

callsMade = 0

def incrementAndVerifyCallsMade():
    global callsMade
    if callsMade == 4:
        print('Made 5 requests to the API in the last minute - sleeping for 1 minute')
        time.sleep(65)
        callsMade = 0
    callsMade += 1
    
def parsePriceRetrieved(stocksRetrieved):
    stockPrices = {}
    for date, candle in stocksRetrieved.items():
        stockPrices[date] = candle['1. open']
    return stockPrices
def parseIndicatorRetrieved(indicatorsRetrieved):
    indicatorValues = {}
    for date, indicator in indicatorsRetrieved.items():
        for indicatorSymbol, indicatorValue in indicator.items():
            if indicatorSymbol == 'Chaikin A/D':
                indicatorSymbol = 'AD'
            elif indicatorSymbol == 'MACD_Signal' or indicatorSymbol == 'MACD_Hist':
                continue
            indicatorValues[date] = {indicatorSymbol: indicatorValue}
    return indicatorValues
# returns [{date: price}]
def getStockPrice(stockSymbol):
    stockUrl = baseUrl + '&function=TIME_SERIES_DAILY&symbol=' + stockSymbol
    print('Fetching ' + stockUrl)
    incrementAndVerifyCallsMade()
    stocksRetrieved = requests.get(stockUrl).json();
    stocks = parsePriceRetrieved(stocksRetrieved['Time Series (Daily)'])
    print('Retrieved and parsed '+ stockSymbol)
    return stocks
# returns [{date: price}]
def getForexPrice(fromSymbol, toSymbol):
    forexUrl = baseUrl + '&function=FX_DAILY&from_symbol=' + fromSymbol + '&to_symbol=' + toSymbol
    print('Fetching ' + forexUrl)
    incrementAndVerifyCallsMade()
    forexRetrieved = requests.get(forexUrl).json();
    forex = parsePriceRetrieved(forexRetrieved)
    print('Retrieved and parsed '+ fromSymbol + toSymbol)
    return forex
# returns [date: [{indicatorFunction: indicatorValue}]]
def getIndicator(indicatorFunction, symbol):
    indicatorUrl = baseUrl + '&interval=daily&series_type=open&function=' + indicatorFunction + '&symbol=' + symbol
    if indicatorFunction=='SMA':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='EMA':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='WMA':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='DEMA':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='TEMA':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='TRIMA':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='KAMA':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='MAMA':
        indicatorUrl += '&fastlimit=0.01&slowlimit=0.01'
    elif indicatorFunction=='T3':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='MACD':
        indicatorUrl += '&fastperiod=12&slowperiod=26&signalperiod=9'
    elif indicatorFunction=='MACDEXT':
        indicatorUrl += '&fastperiod=12&slowperiod=26&signalperiod=9&fastmatype=0&slowmatype=0&signalmatype=0'
    elif indicatorFunction=='STOCH':
        indicatorUrl += '&fastkperiod=5&slowkperiod=3&slowdperiod=3&slowkmatype=0&slowdmatype=0'
    elif indicatorFunction=='STOCHF':
        indicatorUrl += '&fastkperiod=5&fastdperiod=3&fastdmatype=0'
    elif indicatorFunction=='RSI':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='STOCHRSI':
        indicatorUrl += '&time_period=10&fastkperiod=5&fastdperiod=3&fastdmatype=0'
    elif indicatorFunction=='WILLR':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='ADX':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='ADXR':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='APO':
        indicatorUrl += '&fastperiod=12&slowperiod=26&matype=0'
    elif indicatorFunction=='PPO':
        indicatorUrl += '&fastperiod=12&slowperiod=26&matype=0'
    elif indicatorFunction=='MOM':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='BOP':
        indicatorUrl += ''
    elif indicatorFunction=='CCI':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='CMO':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='CCI':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='ROC':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='ROCR':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='AROON':
        indicatorUrl += '&time_period=14'
    elif indicatorFunction=='AROONOSC':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='MFI':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='TRIX':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='ULTOSC':
        indicatorUrl += '&timeperiod1=7&timeperiod2=14&timeperiod3=28'
    elif indicatorFunction=='DX':
        indicatorUrl += '&time_period=10'
    elif indicatorFunction=='AD':
        indicatorUrl += ''
        indicatorFunction = 'Chaikin A/D'
    elif indicatorFunction=='OBV':
        indicatorUrl += ''
    # TODO: terminar isso aqi
    print('Fetching ' + indicatorUrl)
    incrementAndVerifyCallsMade();
    indicatorRetrieved = requests.get(indicatorUrl).json()
    indicators = parseIndicatorRetrieved(indicatorRetrieved['Technical Analysis: ' + indicatorFunction])
    print('Retrieved and parsed '+ indicatorFunction)
    return indicators
# returns [date: [{indicatorFunction: indicatorValue}]]
def getIndicatorList(indicatorFunctions, symbol):
    indicators = {}
    for indicatorFunction in indicatorFunctions:
        indicatorDict = getIndicator(indicatorFunction, symbol)
        for date, indicator in indicatorDict.items():
            for indicatorFunction, indicatorValue in indicator.items():
                if  date not in indicators:
                    indicators[date] = {}
                indicators[date][indicatorFunction] = indicatorValue
    return indicators

In [3]:
# Filtrando dados válidos - apenas aqueles cujas datas estão na lista de preços e de indicadores
def filterData(prices, indicators, indicatorFunctions):
    filteredPrices = {date: price for date, price in prices.items() if date in indicators.keys()}
    filteredIndicators = {date: indicators for date, indicators in indicators.items() if date in prices.keys()}
    return filteredPrices, filteredIndicators

def getData(stockSymbol, indicatorFunctions):
    stockPrices = getStockPrice(stockSymbol)
    indicators = getIndicatorList(indicatorFunctions, stockSymbol)
    stockPrices, indicators = filterData(stockPrices, indicators, indicatorFunctions)
    return stockPrices, indicators

def getForexData(fromForexSymbol, toForexSymbol, indicatorFunctions):
    forexPrices = getForexPrice(fromForexSymbol, toForexSymbol)
    indicators = getIndicatorList(indicatorFunctions, fromForexSymbol + toForexSymbol)
    forexPrices, indicators = filterData(forexPrices, indicators)
    return forexPrices, indicators

### Métodos de anotação de dados

In [4]:
import collections

def annotateBuyOrSellTwoClasses(prices):
    dateAnnotated = {}
    yesterdayDate = ''
    earlierDate = ''
    last = ''
    for key, value in prices.items():
        yesterdayDate = key
        yesterdayPrice = float(value)
        break
    for date, price in prices.items():
        if float(price) > yesterdayPrice:
            dateAnnotated[yesterdayDate] = 'B'
            last = 'B'
        elif float(price) < yesterdayPrice:
            dateAnnotated[yesterdayDate] = 'S'
            last = 'S'
        else:
            dateAnnotated[yesterdayDate] = last
        yesterdayDate = date
        yesterdayPrice = float(price)
    return dateAnnotated

def annotateBuyOrSellEightClasses(prices):
    dateAnnotated = {}
    yesterdayDate = ''
    earlierDate = ''
    last = ''
    for key, value in prices.items():
        yesterdayDate = key
        yesterdayPrice = float(value)
        break
    for date, price in prices.items():
        if float(price) > yesterdayPrice and float(price)/yesterdayPrice <= 1.005:
            dateAnnotated[yesterdayDate] = 'B0'
            last = 'B0'
        elif float(price) > yesterdayPrice and float(price)/yesterdayPrice <= 1.01:
            dateAnnotated[yesterdayDate] = 'B1'
            last = 'B1'
        elif float(price) > yesterdayPrice and float(price)/yesterdayPrice <= 1.02:
            dateAnnotated[yesterdayDate] = 'B2'
            last = 'B2'
        elif float(price) > yesterdayPrice and float(price)/yesterdayPrice > 1.02:
            dateAnnotated[yesterdayDate] = 'B3'
            last = 'B3'
        elif float(price) < yesterdayPrice and float(price)/yesterdayPrice >= .995:
            dateAnnotated[yesterdayDate] = 'S0'
            last = 'S0'
        elif float(price) < yesterdayPrice and float(price)/yesterdayPrice >= .99:
            dateAnnotated[yesterdayDate] = 'S1'
            last = 'S1'
        elif float(price) < yesterdayPrice and float(price)/yesterdayPrice >= .98:
            dateAnnotated[yesterdayDate] = 'S2'
            last = 'S2'
        elif float(price) < yesterdayPrice and float(price)/yesterdayPrice < .98:
            dateAnnotated[yesterdayDate] = 'S3'
            last = 'S3'
        else:
            dateAnnotated[yesterdayDate] = last
        yesterdayDate = date
        yesterdayPrice = float(price)
    return dateAnnotated

def annotateMaxProfit(prices):
    dateAnnotated = {}
    yesterdayDate = ''
    yesterdayPrice = 0
    currentStatus = ''
    for key, value in prices.items():
        yesterdayDate = key
        yesterdayPrice = value
        break
    for date, price in prices.items():
        if price > yesterdayPrice and currentStatus != 'B':
            dateAnnotated[yesterdayDate] = 'B'
            currentStatus = 'B'
        elif price < yesterdayPrice and currentStatus != 'S':
            dateAnnotated[yesterdayDate] = 'S'
            currentStatus = 'S'
        yesterdayDate = date
        yesterdayPrice = price
    return dateAnnotated

### Normalização dos dados

In [5]:
# Dúvida - como faremos a normalização?
# Ideia: normalizarmos de acordo com o preço da ação no momento

# Exemplo: temos o valor de uma ação atualmente em R$3.00, mas sua média dos últimos 10 dias é de R$2.90,
# logo esta ação está aumentando de valor. Se normalizarmos todos os indicadores de acordo com o preço atual
# da ação podemos ter mais sucesso, pois no momento de atuação do modelo tudo que ele terá acesso será o preço 
# atual da ação e seus indicadores... Normalizar de acordo com um teto e piso de valores acho que pode dar ruim.

import copy

# Normaliza um conjunto de indicadores a partir do preço da ação
def normalizeIndicators(indicatorSet, stockPrice):
    normalizedIndicatorSet = {}
    for indicatorFunction, indicatorValue in indicatorSet.items():
        normalizedIndicatorSet[indicatorFunction] = float(indicatorValue)/float(stockPrice)
    return normalizedIndicatorSet

# Retorna os indicadores normalizados: indicadores = [date: {indicators: ['SMA': 0.89, 'EMA': 0.93], annotation: 'B'}]
def getIndicatorsNormalizedAndAnnotaded(stockPrices, indicators, dateAnnotated):
    print('Normalizing and annotating indicators')
    indicatorsAnnotated = {}
    for date, annotation in dateAnnotated.items():
        if date not in indicatorsAnnotated:
            indicatorsAnnotated[date] = {'indicators': {}, 'annotation': ''}
        indicatorsAnnotated[date]['indicators'] = normalizeIndicators(indicators[date], stockPrices[date])
        indicatorsAnnotated[date]['annotation'] = annotation
    return indicatorsAnnotated

# Deprecated
def normalizeIndicatorsOld(indicatorsAnnotated):
    normalizedIndicators = {'B': [], 'S': []}
    for indicatorSet in indicatorsAnnotated['B']:
        normalizedIndicatorSet = []
        for indicator, value in indicatorSet.items():
            print(indicator)
            normalizedIndicatorSet.append({indicator: value/1000})
        normalizedIndicators[B].append(normalizedIndicatorSet)
    for indicators in indicatorsAnnotated['S']:
        print(indicators)

### Machine Learning

In [6]:
import keras
adam = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)

def getModelMLP(n_steps, n_features, n_outputs):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(64, activation='relu', input_shape=(n_steps*n_features,)))
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))
    if (n_outputs > 1):
        activation = 'softmax'
        loss = 'categorical_crossentropy'
    else:
        activation = 'sigmoid'
        loss = 'binary_crossentropy'
    model.add(keras.layers.Dense(n_outputs, activation=activation))
    model.compile(loss=loss, optimizer=adam, metrics=['accuracy'])
    return model

def getModelBidirectionalLSTM(n_steps, n_features, n_outputs):    
    model = keras.models.Sequential()
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(64, activation='relu'), input_shape=(n_steps, n_features)))
    if (n_outputs > 1):
        activation = 'softmax'
        loss = 'categorical_crossentropy'
    else:
        activation = 'sigmoid'
        loss = 'binary_crossentropy'
    model.add(keras.layers.Dense(n_outputs, activation=activation))
    model.compile(loss=loss, optimizer=adam, metrics=['accuracy'])
    return model

def getModelCNN(n_steps, n_features):
    model = keras.models.Sequential()
    model.add(keras.layers.Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
    model.add(keras.layers.Conv1D(filters=64, kernel_size=2, activation='relu'))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(50, activation='relu'))
    if (n_outputs > 1):
        activation = 'softmax'
        loss = 'categorical_crossentropy'
    else:
        activation = 'sigmoid'
        loss = 'binary_crossentropy'
    model.add(keras.layers.Dense(n_outputs, activation=activation))
    model.compile(loss=loss, optimizer='adam', metrics=['accuracy'])
    return model

ModuleNotFoundError: No module named 'keras'

### Análise de dados

In [None]:
import itertools
import matplotlib.pyplot as plt

from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.utils.multiclass import unique_labels

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    accuracy = np.trace(cm) / np.sum(cm).astype('float')
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

### Main

In [None]:
# Cheat-sheet com os simbolos
__indicatorSymbols = ['SMA', 'EMA', 'WMA', 'DEMA', 'TEMA', 'TRIMA', 'KAMA', 'MAMA',
                      'T3', 'MACD', 'MACDEXT', 'STOCH', 'STOCHF', 'RSI', 'STOCHRSI', 'WILLR', 
                      'ADX', 'ADXR', 'APO', 'PPO', 'MOM', 'BOP', 'CCI', 'CMO', 'ROC', 'ROCR',
                      'AROON', 'AROONOSC', 'MFI', 'TRIX', 'ULTOSC', 'DX', 'MINUS_DI', 'PLUS_DI',
                      'MINUS_DM', 'PLUS_DM', 'BBANDS', 'MIDPOINT', 'SAR', 'TRANGE', 'ATR',
                      'NATR', 'AD', 'ADOSC', 'OBV', 'HT_TRENDLINE', 'HT_SINE', 'HT_TRENDMODE',
                      'HT_DCPERIOD', 'HT_DCPHASE', 'HT_PHASOR']
__forexSymbols = ['BRL', 'EUR']
__stockSymbols = ['MSFT']

# Definindo simbolos para o modelo
indicatorFunctions = ['SMA', 'EMA', 'WMA']
indicatorFunctions2 = ['SMA', 'EMA', 'MACD', 'RSI', 'ADX', 'CCI', 'AD', 'OBV']
indicatorFunctions3 = ['SMA', 'EMA', 'MACD', 'RSI', 'ADX', 'CCI', 'AD', 'OBV', 'MOM', 'WILLR', 'ADOSC']
# fromForexSymbol = 'USD'
# toForexSymbol = 'BRL'
stockSymbol = 'MSFT'

stockPrices, indicators = getData(stockSymbol, indicatorFunctions)
# stockPrices, indicators2 = getData(stockSymbol, indicatorFunctions2)
# stockPrices, indicators3 = getData(stockSymbol, indicatorFunctions3)
# forexPrices, indicators = getForexData(fromForexSymbol, toForexSymbol, indicatorFunctions)
# dateAnnotated = annotateMaxProfit(stockPrices)

In [None]:
import pandas as pd

df = pd.DataFrame.from_dict(stockPrices, orient='index')

div = df[0].astype('float').div(df[0].astype('float').shift(-1))

In [None]:
df.sort_index(inplace=True)
df[0].astype('float').plot()

In [None]:
div.sort_index(inplace=True)
div.plot()

In [None]:
div.describe(percentiles=[.01,.1,.25,.5,.75,.9,.95,.99])

In [None]:
dateAnnotatedTwo = annotateBuyOrSellTwoClasses(stockPrices)
df = pd.DataFrame.from_dict(dateAnnotatedTwo, orient='index')
df[0].value_counts()

In [None]:
dateAnnotatedEight = annotateBuyOrSellEightClasses(stockPrices)
df = pd.DataFrame.from_dict(dateAnnotatedEight, orient='index')
df[0].value_counts()

In [None]:
normalizedIndicatorsTwo = getIndicatorsNormalizedAndAnnotaded(stockPrices, indicators, dateAnnotatedTwo)
normalizedIndicatorsEight = getIndicatorsNormalizedAndAnnotaded(stockPrices, indicators, dateAnnotatedEight)

dataTwo = pd.DataFrame.from_dict(normalizedIndicatorsTwo, orient='index')
dataEight = pd.DataFrame.from_dict(normalizedIndicatorsEight, orient='index')

In [None]:
dataTwo = pd.DataFrame.merge(pd.DataFrame(dataTwo.indicators.values.tolist(), index=dataTwo.index), dataTwo, left_index=True, right_index=True, how='outer').drop(columns=['indicators'])
dataEight = pd.DataFrame.merge(pd.DataFrame(dataEight.indicators.values.tolist(), index=dataEight.index), dataEight, left_index=True, right_index=True, how='outer').drop(columns=['indicators'])

In [None]:
dataTwo.head(), dataEight.head()

In [None]:
dataTwo.annotation = dataTwo.annotation.apply(lambda x: 0 if x == 'S' else 1)

eight_dict = {"B3":0, "B2": 1, "B1": 2, "B0": 3, "S0": 4, "S1": 5, "S2": 6, "S3": 7}
dataEight.annotation = dataEight.annotation.apply(lambda x: eight_dict[x])

In [None]:
dataTwo = dataTwo.sort_index()
dataEight = dataEight.sort_index()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

n_steps = 4
n_indicators = 3

batch_size = 32
train_split = .8

X = {
    "mlp": {
        2 : [],
        8 : [] 
    },
    "lstm": {
        2 : [],
        8 : []
    }
}
y = {
    2 : [],
    8 : [] 
}

for i in range(len(dataTwo)):
    end_ix = i + n_steps
    if end_ix > len(dataTwo):
        break
    seq_x, seq_y = dataTwo.iloc[i:end_ix, :-1].values, dataTwo.iloc[end_ix-1, -1]
    X["lstm"][2].append(seq_x)
    X["mlp"][2].append(seq_x.flatten())
    y[2].append(seq_y)
    
for i in range(len(dataEight)):
    end_ix = i + n_steps
    if end_ix > len(dataEight):
        break
    seq_x, seq_y = dataEight.iloc[i:end_ix, :-1].values, dataEight.iloc[end_ix-1, -1]
    X["lstm"][8].append(seq_x)
    X["mlp"][8].append(seq_x.flatten())
    y[8].append(np.eye(8)[seq_y])

X["lstm"][2] = np.array(X["lstm"][2])
X["mlp"][2] = np.array(X["mlp"][2])
y[2] = np.array(y[2])

X["lstm"][8] = np.array(X["lstm"][8])
X["mlp"][8] = np.array(X["mlp"][8])
y[8] = np.array(y[8])

In [None]:
dataTwo.head()

In [None]:
X["lstm"][2][0]

In [None]:
X["mlp"][2][0]

In [None]:
len(X["lstm"][2])

In [None]:
from sklearn.model_selection import train_test_split

X_train = {
    "mlp": {
    },
    "lstm": {
        2 : X["lstm"][2][:int(len(X["lstm"][2]) * train_split)],
        8 : X["lstm"][8][:int(len(X["lstm"][8]) * train_split)]
    }
}

X_test = {
    "mlp": {
    },
    "lstm": {
        2 : X["lstm"][2][int(len(X["lstm"][2]) * train_split):],
        8 : X["lstm"][8][int(len(X["lstm"][8]) * train_split):]
    }
}

y_train = {
    "mlp": {
    },
    "lstm": {
        2 : y[2][:int(len(y[2]) * train_split)],
        8 : y[8][:int(len(y[8]) * train_split)] 
    }
}

y_test = {
    "mlp": {
    },
    "lstm": {
        2 : y[2][int(len(y[2]) * train_split):],
        8 : y[8][int(len(y[2]) * train_split):] 
    }
}


X_train["mlp"][2], X_test["mlp"][2], y_train["mlp"][2], y_test["mlp"][2] = train_test_split(X["mlp"][2], y[2], train_size=train_split, random_state=0)
X_train["mlp"][8], X_test["mlp"][8], y_train["mlp"][8], y_test["mlp"][8] = train_test_split(X["mlp"][8], y[8], train_size=train_split, random_state=0)

In [None]:
MLP_2 = getModelMLP(n_steps, n_indicators, 1)
MLP_8 = getModelMLP(n_steps, n_indicators, 8)

LSTM_2 = getModelBidirectionalLSTM(n_steps, n_indicators, 1)
LSTM_8 = getModelBidirectionalLSTM(n_steps, n_indicators, 8)

In [None]:
MLP_2, LSTM_2, MLP_8, LSTM_8

In [None]:
MLP_2.summary()

In [None]:
MLP_8.summary()

In [None]:
LSTM_2.summary()

In [None]:
LSTM_8.summary()

In [None]:
preds = {
    "mlp": {2: [], 8:[]},
    "lstm": {2: [], 8:[]}
}

In [None]:
history_mlp_2 = MLP_2.fit(X_train["mlp"][2], y_train["mlp"][2], epochs=300, verbose=1, validation_split=.2, batch_size=batch_size)
plt.plot(history_mlp_2.history['loss'], label='train')
plt.plot(history_mlp_2.history['val_loss'], label='test')
plt.legend()
plt.show()

In [None]:
preds["mlp"][2] = MLP_2.predict(X_test["mlp"][2], batch_size=batch_size)
print(classification_report(y_test["mlp"][2], np.rint(preds["mlp"][2]).astype(np.int64), target_names=["S","B"], digits=5))
conf = plot_confusion_matrix(y_test["mlp"][2], np.rint(preds["mlp"][2]).astype(np.int64), classes=np.array(["S","B"]))

In [None]:
history_mlp_8 = MLP_8.fit(X_train["mlp"][8], y_train["mlp"][8], epochs=300, verbose=1, validation_split=.2, batch_size=batch_size)
plt.plot(history_mlp_8.history['loss'], label='train')
plt.plot(history_mlp_8.history['val_loss'], label='test')
plt.legend()
plt.show()

In [None]:
# Calculando lucro/prejuízo total, supondo que inicialmente temos US$1000 em carteira
import copy

classificationArray = list(np.argmax(preds["lstm"][8], axis=1))

prices = copy.deepcopy(stockPrices)

for date in sorted(stockPrices.keys()):
    if len(prices) == len(classificationArray):
        break
    del prices[date]

initialMoney = 1000.00
index = 0
currentMoney = initialMoney
currentStocks = 0.0

for date in sorted(prices.keys()):
    stockPrice = float(prices[date])
    classification = classificationArray[index]
    # Caso de compra
    if currentMoney != 0 and (classification == 0 or classification == 1 or classification == 2 or classification == 3):
        currentStocks = currentMoney/stockPrice
        currentMoney = 0.0
    elif currentStocks != 0 and (classification == 4 or classification == 5 or classification == 6 or classification == 7):
        currentMoney = currentStocks*stockPrice
        currentStocks = 0.0
    index += 1

firstPrice = float(prices[sorted(prices.keys())[0]])
lastPrice = float(prices[sorted(prices.keys())[-1]])

if currentMoney == 0:
    finalMoney = currentStocks*lastPrice

growthPercentage = 100.0*((finalMoney-initialMoney)/initialMoney)
timeLenInYears = len(prices)/252.0

print("[DEBUG] Current money:\t   US$ " + str(round(currentMoney, 2)))
print("[DEBUG] Current stocks:\t   " + str(round(currentStocks, 2)) + "\t| Last stock-price: US$ " + str(round(lastPrice, 2)))
print("First date:\t   " + sorted(prices.keys())[1] + "\t| Initial stock value: US$ " + firstPrice)
print("Last date:\t   " + sorted(prices.keys())[-1] + "\t| Final stock value: US$ " + lastPrice)
print("Initial money:    US$ " + str(round(initialMoney, 2)) + " | Final money: US$ " + str(round(finalMoney, 2)))
print("Final profit:\t   US$ " + str(round(finalMoney-initialMoney, 2)) + " over " + str(round(timeLenInYears, 2)) + " years")
print("Growth percentage: " + str(round(growthPercentage, 2)) + "%")
print("Growth per year:   US$ " + str(round((finalMoney-initialMoney)/timeLenInYears, 2)))

In [None]:
preds["mlp"][8] = MLP_8.predict(X_test["mlp"][8], batch_size=batch_size)
print(classification_report(y_test["mlp"][8], np.rint(preds["mlp"][8]), target_names=eight_dict.keys(), digits=5))
conf = plot_confusion_matrix(y_test["mlp"][8].argmax(axis=1), np.rint(preds["mlp"][8]).argmax(axis=1), classes=np.array(list(eight_dict.keys())))

In [None]:
history_lstm_2 = LSTM_2.fit(X_train["lstm"][2], y_train["lstm"][2], epochs=200, verbose=1, validation_split=.2, batch_size=batch_size)
plt.plot(history_lstm_2.history['loss'], label='train')
plt.plot(history_lstm_2.history['val_loss'], label='test')
plt.legend()
plt.show()

In [None]:
preds["lstm"][2] = LSTM_2.predict(X_test["lstm"][2], batch_size=batch_size)
print(classification_report(y_test["lstm"][2], np.rint(preds["lstm"][2]).astype(np.int64), target_names=["S","B"], digits=5))
conf = plot_confusion_matrix(y_test["lstm"][2], np.rint(preds["lstm"][2]).astype(np.int64), classes=np.array(["S","B"]))

In [None]:
history_lstm_8 = LSTM_8.fit(X_train["lstm"][8], y_train["lstm"][8], epochs=300, verbose=1, validation_split=.2, batch_size=batch_size)
plt.plot(history_lstm_8.history['loss'], label='train')
plt.plot(history_lstm_8.history['val_loss'], label='test')
plt.legend()
plt.show()

In [None]:
preds["lstm"][8] = LSTM_8.predict(X_test["lstm"][8], batch_size=batch_size)
print(classification_report(y_test["lstm"][8], np.rint(preds["lstm"][8]).astype(np.int64), target_names=eight_dict.keys(), digits=5))
conf = plot_confusion_matrix(y_test["lstm"][8].argmax(axis=1), np.rint(preds["lstm"][8]).argmax(axis=1), classes=np.array(list(eight_dict.keys())))