In [1]:
import os
import pathlib
import re
import datetime

from src.data import text_handlers
from src.data import web_scrappers
from src.data import make_dataset
from src.models.optmization_model import PortfolioOptimizer


from pypdf import PdfReader
from tabula import read_pdf
import dateparser

import pandas as pd


%load_ext dotenv
%dotenv

REPORTS_STORE_PATH = os.environ['REPORTS_STORE_PATH']

# Historical market data workflow

In [2]:
# All functions can take either a single ticker or a list of tickers compatibles with yahoo finance API
tickers_sample = ['FMTY14.MX', 'FNOVA17.MX', 'FIBRAMQ12.MX', 'FIBRAPL14.MX']

# This function adds the new ticker to local database and returns a pandas dataframe for validations
new_data = make_dataset.add_new_tickers(tickers=tickers_sample, start_date='2012-12-01', end_date = '2024-06-01')

# Note dates after 2024-06-01 are still not in local DB
existing_data = make_dataset.get_market_dataset(ticker=tickers_sample, start_date='2012-12-01', end_date = '2024-07-01')
display(existing_data.tail(5))

# This function does not take specific tickers, it rathers update all tickers in local DB
updated_data = make_dataset.update_market_data()
display(updated_data.tail(5))

# Finally you can delete tickers from the local DB (not using it right now for the demo)
# make_dataset.delete_tickers(tickers=tickers_sample)

[*********************100%%**********************]  4 of 4 completed


Loading 9013 entries into database...
Succesfully loaded new data in database, returning dataset for validations...


Unnamed: 0,date,ticker,avg_price,dividends
21253,2024-06-25,FNOVA17.MX,28.15,0.0
21254,2024-06-26,FNOVA17.MX,28.15,0.0
21255,2024-06-27,FNOVA17.MX,28.15,0.0
21256,2024-06-28,FNOVA17.MX,28.15,0.0
21257,2024-07-01,FNOVA17.MX,28.15,0.0


[*********************100%%**********************]  15 of 15 completed

Loading 30 entries into database...
Succesfully loaded new data in database, returning dataset for validations...





Price,ticker,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
25,FIBRAUP18.MX,2024-07-17,31.799999,31.799999,31.799999,31.799999,0,0.0,0.0
26,FIBRAPL14.MX,2024-07-16,65.0,65.540001,64.809998,65.019997,934638,0.0,0.0
27,FIBRAPL14.MX,2024-07-17,65.220001,65.220001,62.700001,63.279999,1839883,0.0,0.0
28,FMTY14.MX,2024-07-16,9.99,9.99,9.72,9.75,629055,0.0,0.0
29,FMTY14.MX,2024-07-17,9.81,9.85,9.7,9.72,531599,0.0,0.0


# Future dividends extraction from financial reports

In [3]:
# Each FIBRA will have its own scrapper class and text extractor class (excluding FIBRAMQ12 since its web site is not scrapable with beautiful soup)
scrapper_fmty14 = web_scrappers.ScrapperFMTY14()
scrapper_fnova17 = web_scrappers.ScrapperFNOVA17()
scrapper_fibrapl14 = web_scrappers.ScrapperFIBRAPL14()

# This method automatically creates a folder for each FIBRA in the REPORTS_STORE_PATH path from .env file
scrapper_fmty14.update_financial_reports()
scrapper_fnova17.update_financial_reports()
scrapper_fibrapl14.update_financial_reports()

Sucessfully saved 1 pdf reports.
Sucessfully saved 5 pdf reports.
Sucessfully saved 3 pdf reports.


In [4]:
# Extract dividends info from reports (this is used only for future dates, historical dividends are included in market data)
asset_map ={
    'FMTY14': text_handlers.TextHandlerFMTY14,
    'FIBRAPL14': text_handlers.TextHandlerFIBRAPL14,
    'FNOVA17': text_handlers.TextHandlerFNOVA17,
    'FIBRAMQ12': text_handlers.TextHandlerFIBRAMQ12,
    }

# This function uploads info extracted from financial reports to local DB
make_dataset.update_dividend_data(asset_map)

{'ticker': 'FMTY14.MX', 'announcement_date': '2024-02-21', 'dividend_date': '2024-02-29', 'dividend_amount': '0.0750133309617115'}
{'ticker': 'FMTY14.MX', 'announcement_date': '2024-02-23', 'dividend_date': '2024-03-04', 'dividend_amount': None}
{'ticker': 'FMTY14.MX', 'announcement_date': '2024-06-20', 'dividend_date': '2024-06-28', 'dividend_amount': '0.0729148160167858'}
{'ticker': 'FIBRAPL14.MX', 'announcement_date': '2024-04-17', 'dividend_date': '2024-05-02', 'dividend_amount': '0.598'}
{'ticker': 'FIBRAPL14.MX', 'announcement_date': '2024-02-22', 'dividend_date': '2024-03-06', 'dividend_amount': '0.3645'}


'pages' argument isn't specified.Will extract only from page 1 by default.


{'ticker': 'FIBRAPL14.MX', 'announcement_date': '2024-01-17', 'dividend_date': '2024-02-01', 'dividend_amount': '2.0269'}


'pages' argument isn't specified.Will extract only from page 1 by default.
'pages' argument isn't specified.Will extract only from page 1 by default.


{'ticker': 'FNOVA17.MX', 'announcement_date': '2024-04-15', 'dividend_date': '2024-04-23', 'dividend_amount': 0.575027304}


'pages' argument isn't specified.Will extract only from page 1 by default.


{'ticker': 'FNOVA17.MX', 'announcement_date': '2023-11-16', 'dividend_date': '2023-11-27', 'dividend_amount': 0.561609687}


'pages' argument isn't specified.Will extract only from page 1 by default.


{'ticker': 'FNOVA17.MX', 'announcement_date': '2024-02-19', 'dividend_date': '2024-02-28', 'dividend_amount': 0.530664143}
{'ticker': 'FIBRAMQ12.MX', 'announcement_date': '2024-04-25', 'dividend_date': '2024-06-17', 'dividend_amount': '0.5250'}
Skipping line. Details:
[{'type': 'float_type', 'loc': ('dividend_amount',), 'msg': 'Input should be a valid number', 'input': None, 'url': 'https://errors.pydantic.dev/2.8/v/float_type'}]


# Get a historical data sample along with a forecast for a specified time window

In [7]:
# This function works for a single ticker and for a time window after the end date (I am hiding the output here because there are a lot of prints from ML model training|)
sample_df = make_dataset.get_ticker_dataset('FMTY14.MX', '2021-01-01', '2024-01-01', 20)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Unnamed: 0,date,ticker,avg_price,dividends,source
0,2021-01-04,FMTY14.MX,8.775499,0.0,actual
1,2021-01-05,FMTY14.MX,8.824704,0.0,actual
2,2021-01-06,FMTY14.MX,8.896615,0.0,actual
3,2021-01-07,FMTY14.MX,8.921222,0.0,actual
4,2021-01-08,FMTY14.MX,8.904188,0.0,actual
...,...,...,...,...,...
775,2024-01-22,FMTY14.MX,11.501997,0.0,prediction
776,2024-01-23,FMTY14.MX,11.553594,0.0,prediction
777,2024-01-24,FMTY14.MX,11.592491,0.0,prediction
778,2024-01-25,FMTY14.MX,11.766363,0.0,prediction


# Define a financial portfolio

In [15]:
optimizer = PortfolioOptimizer(tickers_sample)

In [16]:
# By default it asumes an even share distribution
optimizer.get_weights()

[0.25, 0.25, 0.25, 0.25]

In [17]:
# To optimize portfolio shares we need to set datasets with historical and predicted data (historical data is used to train a model up to the end date)
optimizer.make_tickers_dataset('2022-01-01', '2024-01-01')

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Found an existing model for this. Use load_model method to use it or continue with training to replace it


`Trainer.fit` stopped: `max_epochs=100` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [19]:
# This method compares current weights to the latest value in the forecast to compute optimal weights
optimizer.compute_optimal_weights()
optimizer.get_weights()

[0.0, 0.0, 1.0, 0.0]

# Run a backtest from historical values to evaluate strategy

In [1]:
# Consider this process will take a LOT of time to complete (also deleted the jupyter outputs because this will generate a lot of prints)
prediction_window = 20
initial_investment = 50_000
backtest_start_date = datetime.datetime.strptime('2023-01-01', '%Y-%m-%d')
backtest_end_date = datetime.datetime.strptime('2024-07-01', '%Y-%m-%d')
price_holder = PortfolioOptimizer(tickers_sample)
price_holder.run_backtest(backtest_start_date, backtest_end_date, prediction_window, initial_investment)

In [24]:
price_holder.backtest_summary_df

Unnamed: 0,tickers,date,portfolio_share,portfolio_composition,paid_dividends,pnl
0,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-01-27,"[0.0, 1.0, 0.0, 0.0]","[0, 2107, 0, 0]",528.356374,5710.945652
1,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-02-24,"[0.0, 0.0, 0.7681, 0.2319]","[0, 0, 1266, 394]",0.0,8727.761655
2,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-03-24,"[1.0, 0.0, 0.0, 0.0]","[5460, 0, 0, 0]",802.035754,11667.243835
3,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-04-21,"[1.0, 0.0, 0.0, 0.0]","[5186, 0, 0, 0]",0.0,8899.002312
4,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-05-19,"[0.0, 0.2665, 0.7335, 0.0]","[0, 477, 1468, 0]",0.0,4062.336744
5,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-06-16,"[0.401, 0.2834, 0.3157, 0.0]","[2141, 615, 635, 0]",0.0,8515.645728
6,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-07-14,"[0.1368, 0.5014, 0.0, 0.3618]","[447, 685, 0, 570]",0.0,6990.308805
7,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-08-11,"[0.9581, 0.0419, 0.0, 0.0]","[5001, 87, 0, 0]",449.85525,6043.99411
8,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-09-08,"[0.0, 0.0, 1.0, 0.0]","[0, 0, 1849, 0]",0.0,5063.358239
9,"[FMTY14, FNOVA17, FIBRAMQ12, FIBRAPL14]",2023-10-06,"[0.0, 0.0, 0.271, 0.729]","[0, 0, 157, 891]",0.0,7807.514056
