In [None]:
#default_exp filter
%load_ext autoreload
%autoreload 2

# Filtragem
> Este m√≥dulo concentra fun√ß√µes auxiliares espec√≠ficas que filtram os dados do banco com campos e formata√ß√£o de interesse para aplica√ß√µes espec√≠ficas como o [appAnalise](https://github.com/EricMagalhaesDelgado/appAnalise) por exemplo.

In [None]:
#export
from pathlib import Path
import json
import pandas as pd
from datetime import datetime
from anateldb.query import *
from anateldb.constants import console, APP_ANALISE
from unidecode import unidecode
from fastcore.test import *
from fastcore.script import call_parse, Param, store_true, bool_arg
from pyarrow import ArrowInvalid
from rich import print

In [None]:
#export
def bump_version(version, part=2):
    version = version.split('.')
    version[part] = str(int(version[part]) + 1)
    for i in range(part+1, 3): version[i] = '0'
    return '.'.join(version)

In [None]:
#export
@call_parse
def formatar_db(
    path: Param("Pasta onde salvar os arquivos", str),
    up_stel: Param("Atualizar a base do Stel", store_true) = False, 
    up_radcom: Param("Atualizar a base do Radcom", store_true) = False,
    up_mosaico: Param("Atualizar a base do Mosaico", store_true) = False,
) -> None:
    dest = Path(path)
    dest.mkdir(parents=True, exist_ok=True)
    time = datetime.today().strftime("%d/%m/%Y %H:%M:%S")
    console.print(":scroll:[green]Lendo as bases de dados...")
    rd = read_base(path, up_stel, up_radcom, up_mosaico)
    rd['Validade_RF'] = rd.Validade_RF.astype('string').fillna('')
    rd['Data_Ato'] = rd.Data_Ato.astype('string').fillna('')
    rd['Status'] = rd.Status.astype('string')
    rd['Classe'] = rd.Classe.astype('string')
    rd.loc[rd['Status'] != '', 'Status'] = rd.loc[rd['Status'] != '', 'Status'] + ", " \
        + rd.loc[rd['Status'] != '', 'Classe']
    rd.loc[rd['Status'].isna(), 'Status'] = rd.loc[rd['Status'].isna(), 'Num_Servi√ßo'].astype('string')

    rd["Descri√ß√£o"] = (
        rd.Status.astype('string').fillna('-')
        + ", "
        + rd.Entidade.astype('string').fillna('-').str.title()
        + " ("
        + rd.Fistel.astype('string').fillna('-')
        + ", "
        + rd["N√∫mero_da_Esta√ß√£o"].astype('string').fillna('-')
        + "), "
        + rd.Munic√≠pio.astype('string').fillna('-')
        + "/"
        + rd.UF.astype('string').fillna('-')
    )

    export_columns = [
        "Frequ√™ncia",
        "Latitude",
        "Longitude",
        "Descri√ß√£o",
        "Num_Servi√ßo",
        "N√∫mero_da_Esta√ß√£o",
        "Classe_Emiss√£o",
        "Largura_Emiss√£o",
        "Num_Ato",
        "Data_Ato",
        "Validade_RF",
    ]
    rd = rd.loc[:, export_columns]
    df_optimize(rd, exclude=['Frequ√™ncia'])
    rd.columns = APP_ANALISE
    console.print(":card_file_box:[green]Salvando os arquivos...")
    d = json.loads((dest / 'VersionFile.json').read_text())
    try:
        cache = pd.read_feather(f"{dest}/AnatelDB.fth")
    except (ArrowInvalid, FileNotFoundError):
        cache = pd.DataFrame()
    
    if not rd.equals(cache):
        console.print(":new: [green] A base de dados mudou desde a √∫ltima atualiza√ß√£o! Salvando o novo arquivo e atualizando a vers√£o")
        date = pd.DataFrame(columns=[time])
        try:
            rd.to_feather(Path(f"{dest}/AnatelDB.fth").open('bw'))
        except ArrowInvalid:
            Path(f"{dest}/AnatelDB.fth").unlink()
        with pd.ExcelWriter(f"{dest}/AnatelDB.xlsx") as workbook:
            date.to_excel(workbook, sheet_name="ExtractDate", index=False)
            rd.to_excel(workbook, sheet_name="DataBase", index=False)
        d['anateldb']['Version'] = bump_version(d['anateldb']['Version'])
    else:
        console.print(":recycle: [green] A base de dados n√£o mudou desde a √∫ltima atualiza√ß√£o, a vers√£o n√£o ser√° atualizada, somente a data de verifica√ß√£o")


    console.print("Sucesso :zap:")    
    d['anateldb']['ReleaseDate'] = datetime.today().strftime('%d/%m/%Y')
    json.dump(d, (dest / 'VersionFile.json').open('w'))

In [None]:
pasta = Path(r'G:\Meu Drive\repos\Code\AnatelDatabase')

In [None]:
if not pasta.exists():
    import subprocess
    result = subprocess.run(['git', 'clone', 'https://github.com/ronaldokun/AnatelDatabase.git'])
    pasta = Path.cwd() / 'AnatelDatabase'

In [None]:
base = read_base(pasta)

In [None]:
base.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 881393 entries, 0 to 881392
Data columns (total 16 columns):
 #   Column             Non-Null Count   Dtype   
---  ------             --------------   -----   
 0   Frequ√™ncia         881393 non-null  float64 
 1   Num_Servi√ßo        881393 non-null  int32   
 2   Status             881393 non-null  category
 3   Classe             881369 non-null  category
 4   Entidade           881376 non-null  category
 5   Fistel             881393 non-null  category
 6   N√∫mero_da_Esta√ß√£o  881393 non-null  int32   
 7   Munic√≠pio          881388 non-null  category
 8   UF                 881391 non-null  category
 9   Latitude           881393 non-null  float32 
 10  Longitude          881393 non-null  float32 
 11  Validade_RF        879054 non-null  category
 12  Num_Ato            871992 non-null  category
 13  Data_Ato           871002 non-null  category
 14  Classe_Emiss√£o     876748 non-null  category
 15  Largura_Emiss√£o    876748 

In [None]:
base.Num_Servi√ßo.value_counts()

19     638599
53     134004
46      37912
11      13714
604     11970
800      9444
801      8262
125      5553
231      4645
35       4317
230      4013
507      1987
124      1355
205      1173
17        992
78        984
247       716
79        526
33        372
64        237
12        195
108       188
248       148
27         40
167        21
29         10
26         10
132         4
15          2
Name: Num_Servi√ßo, dtype: int64

In [None]:
d = formatar_db(pasta)

In [None]:
d = pd.read_feather(pasta / 'AnatelDB.fth')

In [None]:
d.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 881393 entries, 0 to 881392
Data columns (total 11 columns):
 #   Column       Non-Null Count   Dtype   
---  ------       --------------   -----   
 0   Frequency    881393 non-null  float64 
 1   Latitude     881393 non-null  float32 
 2   Longitude    881393 non-null  float32 
 3   Description  881393 non-null  string  
 4   Service      881393 non-null  int32   
 5   Station      881393 non-null  int32   
 6   Class        876748 non-null  category
 7   BW           876748 non-null  category
 8   ActNumber    871992 non-null  category
 9   ActDate      881393 non-null  string  
 10  ValRF        881393 non-null  string  
dtypes: category(3), float32(2), float64(1), int32(2), string(3)
memory usage: 44.9 MB


In [None]:
n = d.loc[(d.Description.isna()) & (d.Station != -1), 'Station'].index

In [None]:
d.loc[n]

Unnamed: 0,Frequency,Latitude,Longitude,Description,Service,Station,Class,BW,ActNumber,ActDate,ValRF


In [None]:
d.loc[d.Station == 322687896]

Unnamed: 0,Frequency,Latitude,Longitude,Description,Service,Station,Class,BW,ActNumber,ActDate,ValRF
2022,1.11,-7.119167,-34.901669,"AM-C4, B, - (07008001908, 322687896), Jo√£o Pes...",205,322687896,,,5538,,2026-02-19


In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted constants.ipynb.
Converted filter.ipynb.
Converted index.ipynb.
Converted queries.ipynb.
