In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
# !pip install mlrose
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose as ml

In [2]:
dataset_original = pd.read_csv('acoes.csv')
dinheiro_total = 5000

In [5]:
def alocacao_ativos(dataset, dinheiro_total, seed = 0, melhores_pesos = []):
  dataset = dataset.copy()

  if seed != 0:
    np.random.seed(seed)

  if len(melhores_pesos) > 0:
    pesos = melhores_pesos
  else:  
    pesos = np.random.random(len(dataset.columns) - 1)
    #print(pesos, pesos.sum())
    pesos = pesos / pesos.sum()
    #print(pesos, pesos.sum())

  colunas = dataset.columns[1:]
  #print(colunas)
  for i in colunas:
    dataset[i] = (dataset[i] / dataset[i][0])

  for i, acao in enumerate(dataset.columns[1:]):
    #print(i, acao)
    dataset[acao] = dataset[acao] * pesos[i] * dinheiro_total
  
  dataset['soma valor'] = dataset.sum(axis = 1)

  datas = dataset['Date']
  #print(datas)

  dataset.drop(labels = ['Date'], axis = 1, inplace = True)
  dataset['taxa retorno'] = 0.0

  for i in range(1, len(dataset)):
    dataset['taxa retorno'][i] = ((dataset['soma valor'][i] / dataset['soma valor'][i - 1]) - 1) * 100

  acoes_pesos = pd.DataFrame(data = {'Ações': colunas, 'Pesos': pesos * 100})

  return dataset, datas, acoes_pesos, dataset.loc[len(dataset) - 1]['soma valor']

In [6]:
dataset, datas, acoes_pesos, soma_valor = alocacao_ativos(pd.read_csv('acoes.csv'), 5000, 10)

In [7]:
dataset

Unnamed: 0,AZUL4.SA,COGN3.SA,IGTA3.SA,MRFG3.SA,VVAR3.SA,soma valor,taxa retorno
0,1442.782435,38.817252,1185.261345,1400.664041,932.474927,5000.000000,0.000000
1,1478.851962,39.073003,1187.059893,1440.826413,990.754610,5136.565881,2.731318
2,1480.784304,38.447834,1187.059893,1423.255435,990.754610,5120.302077,-0.316628
3,1471.766953,39.953923,1219.074517,1465.928015,979.098629,5175.822037,1.084310
4,1540.041446,40.209671,1219.434309,1523.661484,1023.391202,5346.738112,3.302202
...,...,...,...,...,...,...,...
1069,2430.830912,10.116356,1444.256258,4784.346994,2855.704465,11525.254986,-2.246141
1070,2433.407123,9.889022,1422.313614,4726.613645,2834.723744,11426.947147,-0.852978
1071,2445.645167,9.974272,1421.234375,4726.613645,2879.016428,11482.483886,0.486016
1072,2445.645167,9.889022,1433.464695,4899.814173,2962.939091,11751.752148,2.345035


In [8]:
def fitness_function(solucao):
  dataset = dataset_original.copy()
  pesos = solucao / solucao.sum()

  for i in dataset.columns[1:]:
    dataset[i] = (dataset[i] / dataset[i][0])

  for i, acao in enumerate(dataset.columns[1:]):
    dataset[acao] = dataset[acao] * pesos[i] * dinheiro_total

  dataset.drop(labels = ['Date'], axis = 1, inplace=True)
  dataset['soma valor'] = dataset.sum(axis = 1)
  dataset['taxa retorno'] = 0.0

  for i in range(1, len(dataset)):
    dataset['taxa retorno'][i] = ((dataset['soma valor'][i] / dataset['soma valor'][i - 1]) - 1) * 100

  sharpe_ratio = (dataset['taxa retorno'].mean()) / dataset['taxa retorno'].std() * np.sqrt(246)

  return sharpe_ratio

In [9]:
np.random.seed(10)
pesos = np.random.random(len(dataset_original.columns) - 1)
pesos = pesos / pesos.sum()
pesos

array([0.28855649, 0.00776345, 0.23705227, 0.28013281, 0.18649499])

In [10]:
fitness_function(pesos)

0.6946528176291948

In [11]:
def visualiza_alocacao(solucao):
  colunas = dataset_original.columns[1:]
  for i in range(len(solucao)):
    print(colunas[i], solucao[i] * 100)

In [12]:
visualiza_alocacao(pesos)

AZUL4.SA 28.855648693014757
COGN3.SA 0.7763450461717195
IGTA3.SA 23.705226900893685
MRFG3.SA 28.013280810153606
VVAR3.SA 18.64949854976623


In [13]:
fitness = ml.CustomFitness(fitness_function)

In [14]:
problema_maximizacao = ml.ContinuousOpt(length=5, fitness_fn=fitness,
                                            maximize = True, min_val = 0, max_val = 1)

In [15]:
problema_minimizacao = ml.ContinuousOpt(length=5, fitness_fn=fitness,
                                            maximize = False, min_val = 0, max_val = 1)

## Hill Climb

In [29]:
melhor_solucao, melhor_custo = ml.hill_climb(problema_maximizacao, random_state = 1)
melhor_solucao, melhor_custo

(array([0.        , 0.        , 0.        , 0.50233257, 0.14675589]),
 0.8619845488219066)

In [30]:
melhor_solucao = melhor_solucao / melhor_solucao.sum()
melhor_solucao, melhor_solucao.sum()

(array([0.        , 0.        , 0.        , 0.77390464, 0.22609536]), 1.0)

In [31]:
visualiza_alocacao(melhor_solucao)

AZUL4.SA 0.0
COGN3.SA 0.0
IGTA3.SA 0.0
MRFG3.SA 77.3904638456643
VVAR3.SA 22.6095361543357


In [32]:
_, _, _, soma_valor = alocacao_ativos(pd.read_csv('acoes.csv'), 5000, melhores_pesos=melhor_solucao)
soma_valor

17092.033719382267

In [33]:
pior_solucao, pior_custo = ml.hill_climb(problema_minimizacao, random_state = 1)
pior_solucao, pior_custo

(array([0.        , 0.72032449, 0.        , 0.        , 0.        ]),
 -0.302660352831205)

In [34]:
pior_solucao = pior_solucao / pior_solucao.sum()
pior_solucao, pior_solucao.sum()

(array([0., 1., 0., 0., 0.]), 1.0)

In [35]:
visualiza_alocacao(pior_solucao)

AZUL4.SA 0.0
COGN3.SA 100.0
IGTA3.SA 0.0
MRFG3.SA 0.0
VVAR3.SA 0.0


In [36]:
_, _, _, soma_valor = alocacao_ativos(pd.read_csv('acoes.csv'), 5000, melhores_pesos=pior_solucao)
soma_valor

1299.4143455238882

## Simulated Annealing

In [37]:
melhor_solucao, melhor_custo = ml.simulated_annealing(problema_maximizacao, random_state = 1)
melhor_solucao = melhor_solucao / melhor_solucao.sum()
melhor_solucao, melhor_custo

(array([0.        , 0.        , 0.        , 0.71428571, 0.28571429]),
 0.860591766385172)

In [38]:
visualiza_alocacao(melhor_solucao)

AZUL4.SA 0.0
COGN3.SA 0.0
IGTA3.SA 0.0
MRFG3.SA 71.42857142857143
VVAR3.SA 28.571428571428577


In [39]:
_, _, _, soma_valor = alocacao_ativos(pd.read_csv('acoes.csv'), 5000, melhores_pesos=melhor_solucao)
soma_valor

17017.53676517086

## Algoritmo Genético


In [51]:
problema_maximizacao_ag = ml.ContinuousOpt(length = 5, fitness_fn = fitness, 
                                               maximize = True, min_val = 0.1, max_val = 1)

In [54]:
melhor_solucao, melhor_custo = ml.genetic_alg(problema_maximizacao_ag, random_state = 1)
melhor_solucao = melhor_solucao / melhor_solucao.sum()
melhor_solucao, melhor_custo

(array([0.05825069, 0.06106596, 0.06199781, 0.48631548, 0.33237005]),
 0.7974222281787174)

In [55]:
visualiza_alocacao(melhor_solucao)

AZUL4.SA 5.825068649459877
COGN3.SA 6.106596270404071
IGTA3.SA 6.199781414327318
MRFG3.SA 48.631548270870624
VVAR3.SA 33.237005394938116


In [53]:
_, _, _, soma_valor = alocacao_ativos(pd.read_csv('acoes.csv'), 5000, melhores_pesos=melhor_solucao)
soma_valor

14749.71776682472