# Fundos de Investimento: Documentos: Informe Diário

O INFORME DIÁRIO é um demonstrativo que contém as seguintes informações do fundo, relativas à data de competência:

- Valor total da carteira do fundo;
- Patrimônio líquido;
- Valor da cota;
- Captações realizadas no dia;
- Resgates pagos no dia;
- Número de cotistas

**Importante**: A partir de maio/2022, os arquivos de dados de Informe Diário de Fundos passarão a ser disponibilizados no formato csv compactado (zip).



In [1]:
import requests
import pandas as pd
from io import BytesIO
from zipfile import ZipFile
from dateutil.relativedelta import relativedelta
from pyportela.services.CachedDownload import CachedDownload
from datetime import datetime

def get_year_url(year: int) -> str:
    fileName = f"inf_diario_fi_{year}.zip"
    url = "https://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/HIST/" + fileName
    return url

def get_year_month_url(year: int, month: int) -> str:
    fileName = f"inf_diario_fi_{year}{month:02d}.zip"
    url = "https://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/" + fileName
    return url

In [2]:
downloads = CachedDownload("/tmp/downloads/br_gov/cvm")
expiry = relativedelta(years=1)
urls = []


def download_history(urls: list):
    for year in range(2004, 2021):
        url = get_year_url(year)
        urls.append(url)
        downloads.download(url, expiry)


download_history(urls)

In [3]:
def download_recent_history(urls: list):
    dt = datetime(2021, 1, 1)
    while dt < datetime.now():
        url = get_year_month_url(dt.year, dt.month)
        urls.append(url)
        downloads.download(url, expiry)
        dt = dt + relativedelta(months=1)


download_recent_history(urls)

In [4]:
from pyportela.utils import unzip_to_df
from typing import Union, Optional
from datetime import datetime


def to_df(zip_file: Union[str, BytesIO]) -> pd.DataFrame:
    df = unzip_to_df(zip_file, sep=";", dtype=str)
    df["DT_COMPTC"] = df["DT_COMPTC"].apply(
        lambda x: datetime.strptime(x, "%Y-%m-%d").date()
    )
    for col in [
        "VL_TOTAL",
        "VL_QUOTA",
        "VL_PATRIM_LIQ",
        "CAPTC_DIA",
        "RESG_DIA",
        "NR_COTST",
    ]:
        df[col] = df[col].astype(float)
    if "TP_FUNDO" not in df.columns:
        df["TP_FUNDO"] = None
    col_names = {}
    for col in df.columns:
        col_names[col] = col.lower()
    df.rename(columns=col_names, inplace=True)
    df.Name = "fi_doc_inf_diario"
    return df


# df_2006 = to_df(df_2006_bytes)
# df_2006

In [5]:
from pyportela.repositories.PostgresWarehouse import PostgresWarehouse
warehouse = PostgresWarehouse(f"postgresql://postgres:popo8160@localhost:5432/br_gov_cvm")

In [None]:
def load_all_data(urls: list):
    for url in urls[17:]:
        print(url)
        #cached = downloads.download(url, expiry)
        #df = to_df(cached)
        #warehouse.save(df, "dt_comptc")
        # return 0

load_all_data(urls)

https://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/HIST/inf_diario_fi_2018.zip
https://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/HIST/inf_diario_fi_2019.zip
https://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/HIST/inf_diario_fi_2020.zip


: 

In [None]:
import sqlite3
from typing import List, Optional


def save_df_to_sqlite(
    df: pd.DataFrame,
    table_name: str,
    db_path: str,
    replace_col: Optional[str] = None,
    overwrite: bool = False,
):
    unique_values = None
    if replace_col:
        unique_values = df[replace_col].unique().tolist()
    con = sqlite3.connect(db_path)
    try:
        if overwrite == True:
            return df.to_sql(table_name, con, if_exists="replace", index=False)
        elif (
            replace_col is not None
            and unique_values is not None
            and len(unique_values) > 0
        ):
            sql_where = ", ".join("?" for _ in unique_values)
            sql = f"DELETE FROM {table_name} WHERE {replace_col} IN ({sql_where})"
            con.execute(sql, unique_values).close()
        return df.to_sql(table_name, con, if_exists="append", index=False)
    finally:
        con.close()

In [None]:
# save_df_to_sqlite(df_2006, "fi_doc_inf_diario", "cvm.db", replace_col="DT_COMPTC")

In [None]:
con = sqlite3.connect("cvm.db")
cur = con.cursor()
cur.execute("SELECT COUNT(*) FROM fi_doc_inf_diario")
print(cur.fetchone())

cur.execute("SELECT DISTINCT CNPJ_FUNDO FROM fi_doc_inf_diario")
print(len(cur.fetchall()))
cur.close()
con.close()
    

(66951678,)
50905
