In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline

# import sys
# from pathlib import Path
# sys.path.append(str(Path().cwd().parent))

from utils import MinMaxScaler, get_ticket, MakeTimeSeries
from agent import Agent_xgb

from indicators import SimpleMovingAverage, RelativeStrengthIndex, AwesomeOscillator, \
                       MovingAverageConvergenceDivergence, AverageDirectionalIndex, \
                       OnBalanceVolume, BollingerBands, IchimokuClouds, \
                        VolumeWeightedAveragePrice, ParabolicSAR, Aleatory, Trend

In [2]:
download = True # False
transform = True # False
tickets = [
    'ABEV3.SA', 'AZUL4.SA', 'B3SA3.SA', 'BBAS3.SA', 'BBDC3.SA', 'BBDC4.SA', 'BBSE3.SA', 'BEEF3.SA', 'BPAC11.SA', 'BRAP4.SA',
    'BRFS3.SA', 'BRKM5.SA', 'CCRO3.SA', 'CIEL3.SA', 'CMIG4.SA', 'COGN3.SA', 'CPFE3.SA', 'CPLE6.SA', 'CRFB3.SA', 'CSAN3.SA',
    'CSNA3.SA', 'CVCB3.SA', 'CYRE3.SA', 'ECOR3.SA', 'EGIE3.SA', 'ELET3.SA', 'ELET6.SA', 'EMBR3.SA', 'ENEV3.SA', 'ENGI11.SA',
    'EQTL3.SA', 'EZTC3.SA', 'FLRY3.SA', 'GGBR4.SA', 'GOAU4.SA', 'GOLL4.SA', 'HAPV3.SA', 'HYPE3.SA', 'IRBR3.SA', 'ITSA4.SA',
    'ITUB4.SA', 'JBSS3.SA', 'JHSF3.SA', 'KLBN11.SA', 'LREN3.SA', 'MGLU3.SA', 'MRFG3.SA', 'MRVE3.SA', 'MULT3.SA', 'NTCO3.SA',
    'PCAR3.SA', 'PETR3.SA', 'PETR4.SA', 'PRIO3.SA', 'QUAL3.SA', 'RADL3.SA', 'RAIL3.SA', 'RENT3.SA', 'SANB11.SA', 'SBSP3.SA',
    'SUZB3.SA', 'TAEE11.SA', 'TIMS3.SA', 'TOTS3.SA', 'UGPA3.SA', 'USIM5.SA', 'VALE3.SA', 'VIVT3.SA', 'WEGE3.SA', 'YDUQ3.SA'
]

In [None]:
data = dict()

for ticket in tickets:

    if download and transform:
        try:

            df = get_ticket(ticket)
        except:
            print(f"Problemas para baixar dados de: {ticket}")
            continue
    
    elif transform:
        df = pd.read_csv(f"../data/raw/{ticket}.csv", index_col="Date")
        df.index = pd.to_datetime(df.index)

    if transform:

            preprocessor = Pipeline(steps=[
                # ("Scaler", MinMaxScaler(cols_list=["_Open", "_High", "_Low", "_Close"])),
                ("SMA", SimpleMovingAverage(short=21, middle=56, long=140, graphic=False)),
                ("RSI/IFR", RelativeStrengthIndex(window=14, graphic=False)),
                ("AO", AwesomeOscillator(short=5, long=34, graphic=False)),
                ("MACD", MovingAverageConvergenceDivergence(short=12, long=26, signal=9, graphic=False)),
                ("ADX/DMI", AverageDirectionalIndex(window=14, threshold=20, graphic=False)),
                # ("OBV", OnBalanceVolume()), # Transformar num sinal!
                ("BB", BollingerBands(window=40, entry_threshold=0.5, out_tp_threshold=1.5, out_sl_threshold=0, graphic=False)),
                ("Ichimoku", IchimokuClouds(window1=9, window2=26, window3=52, graphic=False)),
                # ("VWAP", VolumeWeightedAveragePrice(graphic=False)),  # Transformar num sinal!
                ("Par.SAR", ParabolicSAR(acceleration=0.02, maximum=0.2, graphic=False)),
                ("Aleatory", Aleatory(lack_trend=150, graphic=False)),
                ("Trend", Trend(short=21, long=140, graphic=False)),
                ("MakeTimeSeries", MakeTimeSeries(window=5, indicators=None)) # indicators=['SMA', ...]
            ])

            try:
                _ = preprocessor.fit(df)
                data[ticket] = preprocessor.transform(df)
                os.makedirs("../data/raw", exist_ok=True)
                os.makedirs("../data/preprocessed", exist_ok=True)
                df.to_csv(f"../data/raw/{ticket}.csv", index=True)
                data[ticket].to_csv(f"../data/preprocessed/{ticket}.csv", index=True)
                
            except Exception as e:
                print(f"Erro ao processar {ticket}: {e}")
                continue

    else:
        data[ticket] = pd.read_csv(f"../data/preprocessed/{ticket}.csv", index_col="Date")
        data[ticket].index = pd.to_datetime(data[ticket].index)

if transform:
    merge = pd.DataFrame()

    for ticket in tickets:
        merge = pd.concat([merge , data[ticket]])

    merge.sort_index(ascending=True, inplace=True)
    merge.to_csv("../data/merge.csv", index=True)

else:
    merge = pd.read_csv("../data/merge.csv", index_col="Date")
    merge.index = pd.to_datetime(merge.index)

$CIEL3.SA: possibly delisted; no timezone found


Erro ao processar CIEL3.SA: index 0 is out of bounds for axis 0 with size 0


### Build Agent

In [None]:
despise = ['_Open', '_High', '_Low', '_Close', '_Volume', 'Aleatory D-0']

In [None]:
agent = Agent_rf(data = data[tickets[0]], drop_intersection_time_series = 5, despise = despise, info=False)
# agent = Agent_xgb(data = data[tickets[0]], drop_intersection_time_series = 5, despise = despise, info=False)
# agent = Agent_nn(data = data[tickets[0]], drop_intersection_time_series = 5, despise = despise, info=False)

In [None]:
# XG Boost
# agent.load("xgBoost 12-05-24 10h51min", path="../saved/")

In [None]:
# ## NN
# agent.load("neuralNetwork 12-05-24 03h16min", path="../saved/")

In [None]:
# ## RF
agent.load("randomForest 17-05-24 17h12min", path="../saved/")

##### Companies

In [None]:
companies_naive = [{
        "ticket": ticket,
        "data": data[ticket],
        "signals": {
            "SMA D-0": {"policy": {"entry": lambda signal: signal > 0.5, "out": lambda signal: signal <= 0.5}, "delay": 5},
            "RSI D-0": {"policy": {"entry": lambda signal: signal < 30, "out": lambda signal: signal > 70}, "delay": 5},
            "AO D-0": {"policy": {"entry": lambda signal: signal > 0, "out": lambda signal: signal <= 0}, "delay": 5},
            "MACD D-0": {"policy": {"entry": lambda signal: signal > 0, "out": lambda signal: signal <= 0}, "delay": 5},
            "ADX D-0": {"policy": {"entry": lambda signal: signal > 0.5, "out": lambda signal: signal <= 0.5}, "delay": 5},
            "BB D-0": {"policy": {"entry": lambda signal: signal > 0.5, "out": lambda signal: signal <= 0.5}, "delay": 5},
            "Ichimoku D-0": {"policy": {"entry": lambda signal: signal > 0.5, "out": lambda signal: signal <= 0}, "delay": 5},
            "pSAR_ind D-0": {"policy": {"entry": lambda signal: signal > 0, "out": lambda signal: signal <= 0}, "delay": 5},
            "Aleatory D-0": {"policy": {"entry": lambda signal: signal > 0.5, "out": lambda signal: signal <= 0}, "delay": 5}
        }
    } for ticket in tickets]

### Naive Policy

In [None]:
result_naive = agent.naive_choice_market(companies_naive, return_df=True)

In [None]:
result_naive_pivot = result_naive.pivot(index="Ativo", columns='Signal', values='Return (% a.a.)').reset_index()
result_naive_pivot['Mean'] = result_naive_pivot.iloc[:, 1:].mean(axis=1)
result_naive_pivot = result_naive_pivot.sort_values(by="Mean", ascending=False)
result_naive_pivot

In [None]:
fig, ax = plt.subplots(figsize=(12, 5), dpi=100)

sns.kdeplot(data=result_naive_pivot, x="SMA D-0", fill=True, label="Simple Moving Average", common_norm=False, palette="crest", alpha=.3, linewidth=1, linestyle='--', legend=True, ax=ax)
sns.kdeplot(data=result_naive_pivot, x="RSI D-0", fill=True, label="Relative Strength Index", common_norm=False, palette="crest", alpha=.3, linewidth=1, linestyle='-.', legend=True, ax=ax)
sns.kdeplot(data=result_naive_pivot, x="AO D-0", fill=True, label="Awesome Oscillator", common_norm=False, palette="crest", alpha=.3, linewidth=1, linestyle='-.', legend=True, ax=ax)
sns.kdeplot(data=result_naive_pivot, x="MACD D-0", fill=True, label="Moving Average Convergence Divergence", common_norm=False, palette="crest", alpha=.3, linewidth=1, linestyle='-', legend=True, ax=ax)
sns.kdeplot(data=result_naive_pivot, x="ADX D-0", fill=True, label="Average Directional Index", common_norm=False, palette="crest", alpha=.3, linewidth=1, linestyle='-', legend=True, ax=ax)
sns.kdeplot(data=result_naive_pivot, x="BB D-0", fill=True, label="Bollinger Bands", common_norm=False, palette="crest", alpha=.3, linewidth=1, linestyle=':', legend=True, ax=ax)
sns.kdeplot(data=result_naive_pivot, x="Ichimoku D-0", fill=True, label="Ichimoku Clouds", common_norm=False, palette="crest", alpha=.3, linewidth=1, linestyle='--', legend=True, ax=ax)
sns.kdeplot(data=result_naive_pivot, x="pSAR_ind D-0", fill=True, label="Parabolic SAR", common_norm=False, palette="crest", alpha=.3, linewidth=1, linestyle=':', legend=True, ax=ax)
sns.kdeplot(data=result_naive_pivot, x="Aleatory D-0", fill=False, label="Aleatory", common_norm=False, color='red', alpha=1, linewidth=2, linestyle='--', legend=True, ax=ax)
ax.axvline(x=result_naive_pivot.iloc[:, 1:-1].mean(axis=0)['Aleatory D-0'], color='red', linestyle='--', linewidth=2, alpha=0.5)

plt.title("Retorno (% a.a.)")
plt.xlabel("Retorno (% a.a.)")
plt.legend()
plt.tight_layout()
plt.plot()

In [None]:
result_naive_pivot.iloc[:, 1:-1].mean(axis=0)

In [None]:
# fig, ax = plt.subplots(figsize=(10, len(result_naive_pivot)), dpi=100)

# sns.barplot(result_naive, x='Return (% a.a.)', y='Ativo', hue='Signal', ax=ax)
# ax.axvline(x=-15, color='red', linestyle='--', linewidth=1.5, alpha=0.3)
# ax.axvline(x=-10, color='red', linestyle='--', linewidth=1.0, alpha=0.3)
# ax.axvline(x=-5, color='red', linestyle='--', linewidth=0.5, alpha=0.3)
# ax.axvline(x=0, color='gray', linestyle='--', linewidth=0.5)
# ax.axvline(x=5, color='green', linestyle='--', linewidth=0.5, alpha=0.3)
# ax.axvline(x=10, color='green', linestyle='--', linewidth=1, alpha=0.3)
# ax.axvline(x=15, color='green', linestyle='--', linewidth=1.5, alpha=0.3)

# plt.title("Retorno (% a.a.)")
# plt.ylabel("Ativo")
# plt.xlabel("Retorno (% a.a.)")
# plt.legend()
# plt.tight_layout()
# plt.plot()

In [None]:
correlation_matrix = merge.iloc[:, 5:].corr()

mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
plt.figure(figsize=(10, 6), dpi=100) 
plt.title("Correlation Matrix", fontsize=20, fontweight='bold')
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="coolwarm", vmin=1, vmax=-1, mask=mask, annot_kws={'size': 5})
plt.xticks(rotation=45, ha='right')
plt.show()

### Manual Policy

In [None]:
companies_manual = [{
        "ticket": ticket,
        "data": data[ticket],
        "signals": {
            "SMA D-0": {"policy": {"entry": lambda signal: signal > 0.5, "out": lambda signal: signal <= 0.5}},
            "RSI D-0": {"policy": {"entry": lambda signal: signal < 30, "out": lambda signal: signal > 70}},
            "AO D-0": {"policy": {"entry": lambda signal: signal > 0, "out": lambda signal: signal <= 0}},
            "MACD D-0": {"policy": {"entry": lambda signal: signal > 0, "out": lambda signal: signal <= 0}},
            "ADX D-0": {"policy": {"entry": lambda signal: signal > 0.5, "out": lambda signal: signal <= 0.5}},
            "BB D-0": {"policy": {"entry": lambda signal: signal > 0.5, "out": lambda signal: signal <= 0.5}},
            "Ichimoku D-0": {"policy": {"entry": lambda signal: signal > 0.5, "out": lambda signal: signal <= 0}},
            "pSAR_ind D-0": {"policy": {"entry": lambda signal: signal > 0, "out": lambda signal: signal <= 0}}
        },
        "delay": 2,
        "min_entry": 3,
        "min_out": 3
    } for ticket in tickets]

In [None]:
result_manual = agent.combined_choice_market(companies_manual, return_df=True)

In [None]:
fig, ax = plt.subplots(figsize=(12, 5), dpi=100)

sns.kdeplot(data=result_manual, x="Return (% a.a.)", fill=True, label=result_manual['Signal'][0], common_norm=False, palette="crest", alpha=.3, linewidth=1, legend=True, ax=ax)
ax.axvline(x=0, color='red', linestyle='--', linewidth=2, alpha=0.5)

plt.title("Retorno (% a.a.)")
plt.xlabel("Retorno (% a.a.)")
plt.legend()
plt.tight_layout()
plt.plot()

### AI Policy

In [None]:
companies_ai = [{"ticket": ticket, "data": data[ticket], "delay": 1} for ticket in tickets]

In [None]:
result_ai = agent.ai_choice_market(companies_ai, return_df=True)

In [None]:
fig, ax = plt.subplots(figsize=(12, 5), dpi=100)

sns.kdeplot(data=result_ai, x="Return (% a.a.)", fill=True, label=result_ai['Signal'][0], common_norm=False, palette="crest", alpha=.3, linewidth=1, legend=True, ax=ax)
ax.axvline(x=0, color='red', linestyle='--', linewidth=2, alpha=0.5)

plt.title("Retorno (% a.a.)")
plt.xlabel("Retorno (% a.a.)")
plt.legend()
plt.tight_layout()
plt.plot()

In [None]:
result_ai[result_ai['Return (% a.a.)'] > 19.5]