In [None]:
#| default_exp main
import sys
from pathlib import Path

In [None]:
# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))
%load_ext autoreload
%autoreload 2

# Principal
> Este módulo concentra funções auxiliares específicas que filtram os dados do banco com campos e formatação de interesse para aplicações específicas como o [appAnalise](https://github.com/EricMagalhaesDelgado/appAnalise) por exemplo.

In [None]:
#| export
import os
from pathlib import Path
import json
from typing import Union
from datetime import datetime

import pandas as pd
from fastcore.test import *
from rich import print
import pyodbc
from pymongo import MongoClient
from dotenv import load_dotenv, find_dotenv

from extracao.reading import read_base, read_aero
from extracao.format import merge_on_frequency, _filter_matlab, _format_matlab

load_dotenv(find_dotenv())

True

In [None]:
#| export
def get_db(
    path: Union[str, Path],  # Pasta onde salvar os arquivos",
    connSQL: pyodbc.Connection = None,  # Objeto de conexão do banco SQL Server
    clientMongoDB: MongoClient = None,  # Objeto de conexão do banco MongoDB
) -> pd.DataFrame:  # Retorna o DataFrame com as bases da Anatel e da Aeronáutica
    """Lê e opcionalmente atualiza as bases da Anatel, mescla as bases da Aeronáutica, salva e retorna o arquivo
    A atualização junto às bases de dados da Anatel é efetuada caso ambos objetos de banco `connSQL` e `clientMongoDB` forem válidos`
    """
    dest = Path(path)
    dest.mkdir(parents=True, exist_ok=True)
    print(":scroll:[green]Lendo as bases de dados da Anatel...")
    df = read_base(path, connSQL, clientMongoDB)
    df = _filter_matlab(df)
    mod_times = {"ANATEL": datetime.now().strftime("%d/%m/%Y %H:%M:%S")}
    print(":airplane:[blue]Requisitando os dados da Aeronáutica.")
    update = all([connSQL, clientMongoDB])
    aero = read_aero(path, update=update)
    mod_times["AERONAUTICA"] = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
    print(":spoon:[yellow]Mesclando os dados da Aeronáutica.")
    df = _format_matlab(merge_on_frequency(df, aero))
    print(":card_file_box:[green]Salvando os arquivos...")
    df.to_parquet(f"{dest}/AnatelDB.parquet.gzip", compression="gzip", index=False)
    versiondb = json.loads((dest / "VersionFile.json").read_text())
    mod_times["ReleaseDate"] = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
    versiondb["anateldb"].update(mod_times)
    json.dump(versiondb, (dest / "VersionFile.json").open("w"))
    print("Sucesso :zap:")
    return df

In [None]:
#|eval: false
import os
import warnings
from extracao.updates import connect_db
import numpy as np
from geopy.distance import geodesic


In [None]:
#|eval: false
warnings.filterwarnings("ignore")

In [None]:
#|eval: false
folder = Path.cwd().parent / 'dados'

In [None]:
#|eval: false
conn = connect_db()
uri = os.environ['MONGO_URI']
mongo_client = MongoClient(uri)
mongo_client.server_info()

{'version': '4.0.5',
 'gitVersion': '3739429dd92b92d1b0ab120911a23d50bf03c412',
 'targetMinOS': 'Windows 7/Windows Server 2008 R2',
 'modules': [],
 'allocator': 'tcmalloc',
 'javascriptEngine': 'mozjs',
 'sysInfo': 'deprecated',
 'versionArray': [4, 0, 5, 0],
 'openssl': {'running': 'Windows SChannel'},
 'buildEnvironment': {'distmod': '2008plus-ssl',
  'distarch': 'x86_64',
  'cc': 'cl: Microsoft (R) C/C++ Optimizing Compiler Version 19.00.24223 for x64',
  'ccflags': '/nologo /EHsc /W3 /wd4355 /wd4800 /wd4267 /wd4244 /wd4290 /wd4068 /wd4351 /wd4373 /we4013 /we4099 /we4930 /WX /errorReport:none /MD /O2 /Oy- /bigobj /utf-8 /Zc:rvalueCast /Zc:strictStrings /volatile:iso /Gw /Gy /Zc:inline',
  'cxx': 'cl: Microsoft (R) C/C++ Optimizing Compiler Version 19.00.24223 for x64',
  'cxxflags': '/TP',
  'linkflags': '/nologo /DEBUG /INCREMENTAL:NO /LARGEADDRESSAWARE /OPT:REF',
  'target_arch': 'x86_64',
  'target_os': 'windows'},
 'bits': 64,
 'debug': False,
 'maxBsonObjectSize': 16777216,
 '

In [None]:
#|eval: false
df = get_db(folder)
# df = get_db(folder, conn, mongo_client)

In [None]:
#|eval: false
from pandas_profiling import ProfileReport
df['Frequency'] = df['Frequency'].astype('category')
profile = ProfileReport(df, config_file='report_config.yaml')
profile.to_notebook_iframe()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]