In [1]:
#default_exp filter

In [1]:
#hide
import sys
from pathlib import Path

# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))

# Filtragem
> Este m√≥dulo concentra fun√ß√µes auxiliares espec√≠ficas que filtram os dados do banco com campos e formata√ß√£o de interesse para aplica√ß√µes espec√≠ficas como o [appAnalise](https://github.com/EricMagalhaesDelgado/appAnalise) por exemplo.

In [2]:
#export
from pathlib import Path
import json
from datetime import datetime

import pandas as pd
from fastcore.test import *
from fastcore.script import call_parse, Param, store_true
from pyarrow import ArrowInvalid
from geopy.distance import geodesic
from rich.console import Console

from anateldb.constants import APP_ANALISE
from anateldb.read import read_base, read_aero
from anateldb.merge import merge_aero
from anateldb.format import df_optimize


In [3]:
#export
def bump_version(version, part=2):
    version = version.split(".")
    version[part] = str(int(version[part]) + 1)
    for i in range(part + 1, 3):
        version[i] = "0"
    return ".".join(version)

In [4]:
#export
def get_modtimes(pasta):
    """
    Retorna a data de modifica√ß√£o dos arquivos de dados
    """
    # Pasta
    pasta = Path(pasta)
    if not pasta.is_dir():
        raise FileNotFoundError(f"Pasta {pasta} n√£o encontrada")
    # Arquivos
    if not (stel := pasta / 'stel.fth').is_file():
        raise FileNotFoundError(f"Arquivo {stel} n√£o encontrado")
    if not (radcom := pasta / 'radcom.fth').is_file():
        raise FileNotFoundError(f"Arquivo {radcom} n√£o encontrado")
    if not (mosaico := pasta / 'mosaico.fth').is_file():
        raise FileNotFoundError(f"Arquivo {mosaico} n√£o encontrado")
    if not (icao := pasta / 'icao.csv').is_file():  # ICAO
        raise FileNotFoundError(f"Arquivo {icao} n√£o encontrado")
    if not (pmec := pasta / 'aisw.csv').is_file():  # PMEC
        raise FileNotFoundError(f"Arquivo {pmec} n√£o encontrado")  
    if not (geo := pasta / 'aisg.csv').is_file():  # GEO
        raise FileNotFoundError(f"Arquivo {geo} n√£o encontrado")
    # Modifica√ß√£o
    mod_stel = datetime.fromtimestamp(stel.stat().st_mtime).strftime("%d/%m/%Y %H:%M:%S")
    mod_radcom = datetime.fromtimestamp(radcom.stat().st_mtime).strftime("%d/%m/%Y %H:%M:%S")
    mod_mosaico = datetime.fromtimestamp(mosaico.stat().st_mtime).strftime("%d/%m/%Y %H:%M:%S")
    mod_icao = pd.read_excel(icao, engine='openpyxl', sheet_name='ExtractDate').columns[0]
    mod_aisw = pd.read_excel(pmec, engine='openpyxl', sheet_name='ExtractDate').columns[0]
    mod_aisg = pd.read_excel(geo, engine='openpyxl', sheet_name='ExtractDate').columns[0]
    return {'STEL': mod_stel, 
            'SRD': mod_radcom,
            'MOSAICO': mod_mosaico,
            'ICAO': mod_icao, 
            'AISW': mod_aisw, 
            'AISG': mod_aisg}
    
@call_parse
def formatar_db(
    path: Param("Pasta onde salvar os arquivos", str),
    up_base: Param("Atualizar as bases da Anatel", store_true) = False,
    up_icao: Param("Atualizar a base do ICAO", store_true) = False,
    up_pmec: Param("Atualizar a base do PMEC", store_true) = False,
    up_geo: Param("Atualizar a base do Geo", store_true) = False,
) -> None:
    dest = Path(path)
    dest.mkdir(parents=True, exist_ok=True)
    console = Console()
    console.print(":scroll:[green]Lendo as bases de dados da Anatel...")
    rd = read_base(path, up_base)
    rd["Status"] = rd.Status.astype("string")
    rd["Classe"] = rd.Classe.astype("string")
    rd.loc[rd["Classe"].notna(), "Status"] = (
        rd.loc[rd["Classe"].notna(), "Status"]
        + ", "
        + rd.loc[rd["Classe"].notna(), "Classe"]
    )
    rd.loc[rd.Classe == '-1', 'Classe'] = pd.NA
    rd['Classe'] = rd['Classe'].fillna('')
    rd["Descri√ß√£o"] = (
        "["
        + rd.Fonte.astype("string")
        + "] "
        + rd.Status.astype("string").fillna("-")
        + ", "
        + rd.Entidade.astype("string").fillna("-").str.title()
        + " ("
        + rd.Fistel.astype("string").fillna("-")
        + ", "
        + rd["N√∫mero_da_Esta√ß√£o"].astype("string").fillna("-")
        + "), "
        + rd.Munic√≠pio.astype("string").fillna("-")
        + "/"
        + rd.UF.astype("string").fillna("-")
    )


    export_columns = [
        "Frequ√™ncia",
        "Latitude",
        "Longitude",
        "Descri√ß√£o",
        "Num_Servi√ßo",
        "N√∫mero_da_Esta√ß√£o",
        "Classe_Emiss√£o",
        "BW(kHz)",
    ]
    rd = rd.loc[:, export_columns]
    rd.columns = APP_ANALISE
    common, new = read_aero(path, up_icao, up_pmec, up_geo)
    rd = merge_aero(rd, common, new)
    rd = df_optimize(rd, exclude=["Frequency"])
    rd['Frequency'] = rd['Frequency'].astype('float')
    console.print(":card_file_box:[green]Salvando os arquivos...")
    d = json.loads((dest / "VersionFile.json").read_text())
    mod_times = get_modtimes(path)    
    mod_times['ReleaseDate'] = datetime.today().strftime("%d/%m/%Y %H:%M:%S") 
    with pd.ExcelWriter(f"{dest}/AnatelDB.xlsx", engine="xlsxwriter") as workbook:
        rd.to_excel(workbook, sheet_name="DataBase", index=False)
    d["anateldb"]["Version"] = bump_version(d["anateldb"]["Version"])
    d['anateldb'].update(mod_times)
    json.dump(d, (dest / "VersionFile.json").open("w"))
    Path(dest / ".version").write_text(f"v{d['anateldb']['Version']}")
    console.print("Sucesso :zap:")


In [5]:
pasta = Path.cwd().parent / 'dados'

In [7]:
db = pd.read_excel(pasta / 'AnatelDB.xlsx', engine='openpyxl', sheet_name='DataBase')

In [8]:
db.to_parquet(pasta / 'AnatelDB.parquet.gzip', compression='gzip')

In [12]:
b = pd.read_feather(pasta / 'base.fth')

In [20]:
sp = b[(b.Frequ√™ncia == 87.5) & (b.UF == 'SP')]

In [24]:
sp

Unnamed: 0,Frequ√™ncia,Num_Servi√ßo,Status,Classe,Entidade,Fistel,N√∫mero_da_Esta√ß√£o,Munic√≠pio,UF,Latitude,...,Num_Ato,Data_Ato,Classe_Emiss√£o,Largura_Emiss√£o,Fonte,Fase,Situa√ß√£o,CNPJ,Unidade,BW(kHz)
15814,87.5,231,RADCOM,,ASSOCIA√á√ÉO CULTURAL COMUNITARIA NOSSA SENHORA ...,50401292460,684667908,Guarulhos,SP,-23.462778,...,-1,,,256K,SRD,3,,01788648000137,MHz,256.0
15815,87.5,231,RADCOM,,ASSOCIACAO DE DIFUSAO COMUNITARIA LINDOIA-SP,50403983878,692713832,Lind√≥ia,SP,-22.528055,...,-1,,,256K,SRD,3,,06158761000152,MHz,256.0
15816,87.5,231,RADCOM,,ASSOCIACAO CULTURAL COMUNITARIA PONTE ALTA,50405748515,693052767,Guarulhos,SP,-23.407499,...,-1,,,256K,SRD,3,,03592434000116,MHz,256.0
15817,87.5,231,RADCOM,3-A,ASS. ASTRAL DE DIFUSAO CULT. COMUNIT. DE JANDIRA,50403982715,690851448,Jandira,SP,-23.537500,...,-1,,,256K,SRD,3,A,01558787000174,MHz,256.0
15818,87.5,231,RADCOM,3-B,ASSOCIACAO CULTURAL E ARTISTICA DO JARDIM ITAQUA,50403897360,689432356,Itaquaquecetuba,SP,-23.451389,...,-1,,,256K,SRD,3,B,02973795000140,MHz,256.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15986,87.5,231,RADCOM,,ASSOCIACAO CUTL. AMIGOS DO BROOKLIN,50405699395,692006435,S√£o Paulo,SP,-23.605278,...,-1,,,256K,SRD,3,,03865126000117,MHz,256.0
15987,87.5,231,RADCOM,,UNAS-UNIAO DE N. ASS. E SOCIED. DE HELIOPOLIS ...,50404976956,691411433,S√£o Paulo,SP,-23.616388,...,-1,,,256K,SRD,3,,38883732000140,MHz,256.0
15988,87.5,231,RADCOM,3-B,ASSOCIACAO CIDADA,50405698666,692006885,S√£o Paulo,SP,-23.576389,...,-1,,,256K,SRD,3,B,08668928000150,MHz,256.0
15989,87.5,231,RADCOM,,INSTITUTO MARIA JOSE AMORIM - IMJA,50406618275,692976515,S√£o Paulo,SP,-23.489166,...,-1,,,256K,SRD,3,,54604251000104,MHz,256.0


In [26]:
for row in sp.itertuples():
    sp.loc[row.Index, 'Dist√¢ncia_RFeye_Sorocaba'] = geodesic((row.Latitude, row.Longitude), (-23.487039, -47.449856)).km

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sp.loc[row.Index, 'Dist√¢ncia_RFeye_Sorocaba'] = geodesic((row.Latitude, row.Longitude), (-23.487039, -47.449856)).km


In [29]:
sp.sort_values(by='Dist√¢ncia_RFeye_Sorocaba', ascending=True).to_excel(pasta / '87.5_com_Dist√¢ncia.xlsx')

In [8]:
# if not pasta.exists():
#     import subprocess
#     result = subprocess.run(['git', 'clone', 'https://github.com/ronaldokun/AnatelDatabase.git'])
#     pasta = Path.cwd() / 'AnatelDatabase'

In [21]:
rd = formatar_db(pasta)

In [22]:
rd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 885407 entries, 0 to 885406
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype   
---  ------       --------------   -----   
 0   Frequency    885407 non-null  float64 
 1   Latitude     885407 non-null  category
 2   Longitude    885407 non-null  category
 3   Description  885407 non-null  category
 4   Service      885407 non-null  category
 5   Station      885407 non-null  category
 6   Class        885407 non-null  category
 7   BW           885407 non-null  float32 
dtypes: category(6), float32(1), float64(1)
memory usage: 55.3 MB


In [12]:
base = read_base(pasta)

In [13]:
base.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 883452 entries, 0 to 883451
Data columns (total 22 columns):
 #   Column             Non-Null Count   Dtype   
---  ------             --------------   -----   
 0   Frequ√™ncia         883452 non-null  float64 
 1   Num_Servi√ßo        883452 non-null  int32   
 2   Status             883452 non-null  category
 3   Classe             880762 non-null  category
 4   Entidade           883434 non-null  category
 5   Fistel             883452 non-null  category
 6   N√∫mero_da_Esta√ß√£o  883452 non-null  int32   
 7   Munic√≠pio          883447 non-null  category
 8   UF                 883450 non-null  category
 9   Latitude           883452 non-null  float32 
 10  Longitude          883452 non-null  float32 
 11  Validade_RF        880753 non-null  category
 12  Num_Ato            869365 non-null  category
 13  Data_Ato           868406 non-null  category
 14  Classe_Emiss√£o     883452 non-null  category
 15  Largura_Emiss√£o    883452 

In [None]:
import hdf5storage as hdf

In [None]:
db = hdf.loadmat(f'{pasta}/AnatelDB.mat')

In [None]:
db = hdf.read(path='/c/Users/rsilva/db/', filename='AnatelDB.mat')

FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = 'AnatelDB.mat', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
# import seaborn as sns
# import matplotlib.pyplot as plt


# ax = sns.countplot(x="Num_Servi√ßo", data=base)

# plt.rcParams["figure.figsize"] = [30, 20]
# plt.rcParams["figure.autolayout"] = True

# for p in ax.patches:
#     ax.annotate('{:.1f}'.format(int(p.get_height())), (p.get_x()+0.05, p.get_height()+0.02))
    
# plt.title(f"Total de Entidades: {base.shape[0]}")
# plt.xlabel("C√≥digo Servi√ßo")

# plt.savefig("Stats.png")

In [None]:
base = read_base(pasta)

In [None]:
base.head()

Unnamed: 0,Frequ√™ncia,Num_Servi√ßo,Status,Classe,Entidade,Fistel,N√∫mero_da_Esta√ß√£o,Munic√≠pio,UF,Latitude,Longitude,Validade_RF,Num_Ato,Data_Ato,Fonte,Classe_Emiss√£o,Largura_Emiss√£o
0,0.028,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,1557670,Nova Igua√ßu,RJ,-22.662777,-43.476387,2033-08-17,-1,,STEL,J9E,8K00
1,0.0285,19,L,OP,COMPANHIA DE GERA√á√ÉO E TRANSMISS√ÉO DE ENERGIA ...,50420217282,1494686,Joinville,SC,-26.2925,-48.887222,2025-08-31,-1,,STEL,R3E,2K50
2,0.03,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,859966,Arapor√£,MG,-18.41,-49.099998,2033-08-17,-1,,STEL,J3E,1K00
3,0.03,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,859753,Campinas,SP,-22.774166,-47.004444,2033-08-17,-1,,STEL,J3E,1K00
4,0.03,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,1557823,S√£o Paulo,SP,-23.441668,-46.590832,2033-08-17,-1,,STEL,J3E,1K00


In [None]:
subset = ['Frequ√™ncia', 'Num_Servi√ßo', 'Status', 'Classe', 'Fistel', 'N√∫mero_da_Esta√ß√£o', 'Latitude', 'Longitude', 'Classe_Emiss√£o', 'Largura_Emiss√£o']
# double = base.duplicated(subset=subset, keep=False)
# base[double]

base.drop_duplicates(subset, keep='first').shape

(881692, 17)

In [None]:
subset = base.columns[:-2]
# double = base.duplicated(subset=subset, keep='first')
# base[double]

base.drop_duplicates(base.columns[:-2], keep='first').reset_index(drop=True)
# base.shape

Unnamed: 0,Frequ√™ncia,Num_Servi√ßo,Status,Classe,Entidade,Fistel,N√∫mero_da_Esta√ß√£o,Munic√≠pio,UF,Latitude,Longitude,Validade_RF,Num_Ato,Data_Ato,Fonte,Classe_Emiss√£o,Largura_Emiss√£o
26,0.038,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,756830,Pira√≠,RJ,-22.629444,-43.895832,2028-12-12,-1,,STEL,N0N,1K00
33,0.040,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,758124,Petr√≥polis,RJ,-22.433332,-43.166668,2028-12-12,-1,,STEL,A3E,5K00
37,0.040,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,758051,Carmo,RJ,-21.840000,-42.570000,2028-12-12,-1,,STEL,N0N,1K00
58,0.044,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,814180,Volta Redonda,RJ,-22.513611,-44.089722,2028-12-12,-1,,STEL,A3E,5K00
62,0.045,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,749680,Rio de Janeiro,RJ,-22.966667,-43.233334,2028-12-12,-1,,STEL,A3E,5K00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
883461,82125.000,19,L,FX,TIM S A,50417425295,1007302558,Ribeir√£o das Neves,MG,-19.768250,-44.082527,2032-08-31,-1,,STEL,Q7W,62M5
883467,82125.000,19,L,FX,TIM S A,50417425295,1005319658,Uberl√¢ndia,MG,-18.942499,-48.300556,2032-08-31,-1,,STEL,Q7W,62M5
883481,82125.000,19,L,FX,TIM S A,50417425295,1005325984,Arax√°,MG,-19.582222,-46.953888,2032-08-31,-1,,STEL,Q7W,62M5
883514,82125.000,19,L,FX,TIM S A,50417425295,1008765390,Aparecida de Goi√¢nia,GO,-16.805166,-49.329556,2032-08-31,-1,,STEL,Q7W,62M5


In [None]:
base[base.N√∫mero_da_Esta√ß√£o == 1005304065]

Unnamed: 0,Frequ√™ncia,Num_Servi√ßo,Status,Classe,Entidade,Fistel,N√∫mero_da_Esta√ß√£o,Munic√≠pio,UF,Latitude,Longitude,Validade_RF,Num_Ato,Data_Ato,Fonte,Classe_Emiss√£o,Largura_Emiss√£o
744796,8118.32,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,29M7
825221,19343.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,55M0
831552,19453.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,55M0
865366,23275.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,56M0
881466,82125.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,G7W,2G00
881467,82125.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,Q7W,62M5


In [10]:
from nbdev.export import notebook2script; notebook2script()

Converted constants.ipynb.
Converted filter.ipynb.
Converted index.ipynb.
Converted queries.ipynb.
