In [1]:
from bs4 import BeautifulSoup
import requests
import json
import pandas as pd

In [2]:
my_fiis_json = "../data/my_fiis.json"


with open(my_fiis_json) as f:
  my_fiis_dict = json.load(f)

In [3]:
def get_fiis_url(full=True,my_fiis=[]):
    r = requests.get('https://fiis.com.br/lista-de-fundos-imobiliarios/')
    soup = BeautifulSoup(r.text, 'html.parser')
    fii_url_list = []
    fii_html_list = soup.find(id='items-wrapper').find_all('a')
    
    if full:
        for fii_url in fii_html_list:
            fii_url_list.append(fii_url.get('href'))
 
        return fii_url_list
    else:
        for fii in fii_html_list:
            fii_name = fii.contents[1].contents[0]
            if fii_name in my_fiis:
                fii_url_list.append(fii.get('href'))
        
        return fii_url_list

In [4]:
def get_fii_news(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    
    fii_news = {}
    fii_news['news'] = []
    
    news_block = soup.find(id='news--wrapper').find_all('li')
    for news in news_block:
        news_dict = {}
        
        news_date = news.find(class_='date').contents[0]
        if len(news.find(class_='title')) == 1:
            news_title = news.find(class_='title').contents[0]
        else:
            news_title = ''
            for piece in news.find(class_='title'):
                if str(piece) != '<br/>':
                    news_title = news_title+f'{piece} '
                    
            news_title = news_title.rstrip()
            
        if news.find('a').get('href') != 'javascript:;':
            news_link_url = news.find('a').get('href')
        else:
            news_link_url = 'no_link'
        
        news_dict['date'] = news_date
        news_dict['title'] = news_title
        news_dict['link'] = news_link_url
        fii_news['news'].append(news_dict)
    
    
    return fii_news

In [5]:
def get_fii_info(url,news=False):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    
    fii_info = {}
    
    fii_kpis = soup.find(id='informations--indexes').find_all(class_='item')
    fii_admin_top = soup.find(id='informations--admin').find(class_='top-content')
    fii_admin_bottom = soup.find(id='informations--admin').find(class_='bottom-content').find_all(class_='item')   
    fii_basic = soup.find(id='informations--basic').find_all(class_='wrapper')
    fii_basic_1_items = fii_basic[0].find_all(class_='item')
    fii_basic_2_items = fii_basic[1].find_all(class_='item')
    
   
    fii_info["fiiCode"] = soup.find(id='fund-ticker').contents[0]
    fii_info["fiiFullName"] = soup.find(id='fund-name').contents[0].rstrip().lstrip()
    fii_info["currentValue"] = float(soup.find(class_='item quotation').find(class_='value').contents[0].replace(',','.'))
    
    fii_info["admin"] = {}
    fii_info["admin"]["name"] = fii_admin_top.find(class_='administrator-name').contents[0]
    fii_info["admin"]["cnpj"] = fii_admin_top.find(class_='administrator-doc').contents[0]
    fii_info["admin"]["phone"] = fii_admin_bottom[0].find(class_='value').contents[0]
    fii_info["admin"]["email"] = fii_admin_bottom[1].find(class_='value').find('a').contents[0].contents[0]
    fii_info["admin"]["site"] = fii_admin_bottom[2].find(class_='value').find('a').contents[0]
    
    
    fii_info["fiiBasicInfo"] = {}
    fii_info["fiiBasicInfo"]["stockName"] = fii_basic_1_items[0].find(class_='value').contents[0]
    fii_info["fiiBasicInfo"]["fiiType"] = fii_basic_1_items[1].find(class_='value').contents[0]
    fii_info["fiiBasicInfo"]["typeANBIMA"] = fii_basic_1_items[2].find(class_='value').contents[0]
    fii_info["fiiBasicInfo"]["CVM"] = fii_basic_1_items[3].find(class_='value').contents[0]
    fii_info["fiiBasicInfo"]["numberOfQuotas"] = int(fii_basic_2_items[0].find(class_='value').contents[0].replace('.',''))
    fii_info["fiiBasicInfo"]["numberOfQuotaHolders"] = int(fii_basic_2_items[1].find(class_='value').contents[0].replace('.',''))
    fii_info["fiiBasicInfo"]["fiiCNPJ"] = fii_basic_2_items[2].find(class_='value').contents[0]
    
    
    fii_info["DY"] = float(fii_kpis[0].find(class_='value').contents[0].replace(',','.'))
    
    fii_info["lastPayment"] = {}
    fii_info["lastPayment"]["currency"] = fii_kpis[1].find(class_='value').find(class_='currency').contents[0]
    fii_info["lastPayment"]["value"] = float(fii_kpis[1].find(class_='value').contents[1].replace(',','.'))
    
    fii_info["netPatrimony"] = {}
    fii_info["netPatrimony"]["currency"] = fii_kpis[2].find(class_='value').find(class_='currency').contents[0]
    fii_info["netPatrimony"]["value"] = fii_kpis[2].find(class_='value').contents[1]
   
    fii_info["valuePerQuota"] = {}
    fii_info["valuePerQuota"]["currency"] = fii_kpis[3].find(class_='value').find(class_='currency').contents[0]
    fii_info["valuePerQuota"]["value"] = float(fii_kpis[3].find(class_='value').contents[1].replace(',','.'))
        
    if news:
        fii_info['news'] = get_fii_news(url)

    return fii_info

In [7]:
fii_url = get_fiis_url(False,my_fiis_dict["fiis"][0])[0]

# print(get_fii_info(fii_url))
# print(get_fii_news(fii_url))

{'fiiCode': 'KNIP11', 'fiiFullName': 'Kinea Índice de Preços', 'currentValue': 111.3, 'admin': {'name': 'INTRAG DTVM', 'cnpj': '62.418.140/0001-31', 'phone': ' 55 (11) 30726012', 'email': '[email\xa0protected]', 'site': 'www.intrag.com.br'}, 'fiiBasicInfo': {'stockName': 'FII KINEA IP', 'fiiType': 'Papel: CRIs', 'typeANBIMA': 'Títulos e Valores Mobiliários Gestão Ativa', 'CVM': '30/11/-0001', 'numberOfQuotas': 32994469, 'numberOfQuotaHolders': 21660, 'fiiCNPJ': '24.960.430/0001-13'}, 'DY': 0.63, 'lastPayment': {'currency': 'R$', 'value': 0.7}, 'netPatrimony': {'currency': 'R$', 'value': '3,60 B'}, 'valuePerQuota': {'currency': 'R$', 'value': 109.08}}
