In [1]:
#default_exp filter

In [1]:
#hide
import sys
from pathlib import Path

# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))

# Filtragem
> Este m√≥dulo concentra fun√ß√µes auxiliares espec√≠ficas que filtram os dados do banco com campos e formata√ß√£o de interesse para aplica√ß√µes espec√≠ficas como o [appAnalise](https://github.com/EricMagalhaesDelgado/appAnalise) por exemplo.

In [2]:
#export
import os
from pathlib import Path
import json
import pandas as pd
from datetime import datetime
from openpyxl import load_workbook
from anateldb.query import *
from anateldb.constants import console, APP_ANALISE
from fastcore.test import *
from fastcore.script import call_parse, Param, store_true
from pyarrow import ArrowInvalid
from rich import print

In [3]:
#export
def bump_version(version, part=2):
    version = version.split('.')
    version[part] = str(int(version[part]) + 1)
    for i in range(part+1, 3): version[i] = '0'
    return '.'.join(version)

In [4]:
#exporti
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
                       truncate_sheet=False, 
                       **to_excel_kwargs):
    """
    Append a DataFrame [df] to existing Excel file [filename]
    into [sheet_name] Sheet.
    If [filename] doesn't exist, then this function will create it.

    @param filename: File path or existing ExcelWriter
                     (Example: '/path/to/file.xlsx')
    @param df: DataFrame to save to workbook
    @param sheet_name: Name of sheet which will contain DataFrame.
                       (default: 'Sheet1')
    @param startrow: upper left cell row to dump data frame.
                     Per default (startrow=None) calculate the last row
                     in the existing DF and write to the next row...
    @param truncate_sheet: truncate (remove and recreate) [sheet_name]
                           before writing DataFrame to Excel file
    @param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
                            [can be a dictionary]
    @return: None

    Usage examples:

    >>> append_df_to_excel('d:/temp/test.xlsx', df)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
                           index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2', 
                           index=False, startrow=25)

    (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
    """
    # Excel file doesn't exist - saving and exiting
    if not Path(filename).is_file():
        df.to_excel(
            filename,
            sheet_name=sheet_name, 
            startrow=startrow if startrow is not None else 0, 
            **to_excel_kwargs)
        return
    
    # ignore [engine] parameter if it was passed
    if 'engine' in to_excel_kwargs:
        to_excel_kwargs.pop('engine')

    writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a', if_sheet_exists='replace')

    # try to open an existing workbook
    writer.book = load_workbook(filename)
    
    # get the last row in the existing Excel sheet
    # if it was not specified explicitly
    if startrow is None and sheet_name in writer.book.sheetnames:
        startrow = writer.book[sheet_name].max_row

    # truncate sheet
    if truncate_sheet and sheet_name in writer.book.sheetnames:
        # index of [sheet_name] sheet
        idx = writer.book.sheetnames.index(sheet_name)
        # remove [sheet_name]
        writer.book.remove(writer.book.worksheets[idx])
        # create an empty sheet [sheet_name] using old index
        writer.book.create_sheet(sheet_name, idx)
    
    # copy existing sheets
    writer.sheets = {ws.title:ws for ws in writer.book.worksheets}

    if startrow is None:
        startrow = 0

    # write out the new sheet
    df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)

    # save the workbook
    writer.save()

In [5]:
#export
@call_parse
def formatar_db(
    path: Param("Pasta onde salvar os arquivos", str),
    up_stel: Param("Atualizar a base do Stel", store_true) = False, 
    up_radcom: Param("Atualizar a base do Radcom", store_true) = False,
    up_mosaico: Param("Atualizar a base do Mosaico", store_true) = False,
    up_icao: Param("Atualizar a base do ICAO", store_true) = False,
    up_pmec: Param("Atualizar a base do PMEC", store_true) = False,
    up_geo: Param("Atualizar a base do Geo", store_true) = False,
) -> None:
    dest = Path(path)
    dest.mkdir(parents=True, exist_ok=True)
    time = datetime.today().strftime("%d/%m/%Y %H:%M:%S")
    console.print(":scroll:[green]Lendo as bases de dados...")
    rd = read_base(path, up_stel, up_radcom, up_mosaico, up_icao)
    rd['Validade_RF'] = rd.Validade_RF.astype('string').fillna('')
    rd['Data_Ato'] = rd.Data_Ato.astype('string').fillna('')
    rd['Status'] = rd.Status.astype('string')
    rd['Classe'] = rd.Classe.astype('string')
    rd.loc[rd['Status'] != '', 'Status'] = rd.loc[rd['Status'] != '', 'Status'] + ", " \
        + rd.loc[rd['Status'] != '', 'Classe']
    rd.loc[rd['Status'].isna(), 'Status'] = rd.loc[rd['Status'].isna(), 'Num_Servi√ßo'].astype('string')

    rd["Descri√ß√£o"] = (
        '[' + rd.Fonte.astype('string') + '] ' + 
        rd.Status.astype('string').fillna('-')
        + ", "
        + rd.Entidade.astype('string').fillna('-').str.title()
        + " ("
        + rd.Fistel.astype('string').fillna('-')
        + ", "
        + rd["N√∫mero_da_Esta√ß√£o"].astype('string').fillna('-')
        + "), "
        + rd.Munic√≠pio.astype('string').fillna('-')
        + "/"
        + rd.UF.astype('string').fillna('-')
    )

    export_columns = [
        "Frequ√™ncia",
        "Latitude",
        "Longitude",
        "Descri√ß√£o",
        "Num_Servi√ßo",
        "N√∫mero_da_Esta√ß√£o",
        "Classe_Emiss√£o",
        "Largura_Emiss√£o",
    ]
    rd = rd.loc[:, export_columns]
    rd.columns = APP_ANALISE
    rd  = merge_aero(rd, read_aero(path, up_icao, up_pmec, up_geo))
    rd = df_optimize(rd, exclude=['Frequency'])
    console.print(":card_file_box:[green]Salvando os arquivos...")
    d = json.loads((dest / 'VersionFile.json').read_text())
    try:
        cache = pd.read_feather(f"{dest}/AnatelDB.fth")
    except (ArrowInvalid, FileNotFoundError):
        cache = pd.DataFrame()
    
    if not rd.equals(cache):
        console.print(":new: [green] A base de dados mudou desde a √∫ltima atualiza√ß√£o! Salvando o novo arquivo e atualizando a vers√£o")
        date = pd.DataFrame(columns=[time])
        try:
            rd.to_feather(Path(f"{dest}/AnatelDB.fth").open('bw'))
        except ArrowInvalid:
            Path(f"{dest}/AnatelDB.fth").unlink()
        with pd.ExcelWriter(f"{dest}/AnatelDB.xlsx", engine='xlsxwriter') as workbook:
            date.to_excel(workbook, sheet_name="ExtractDate", index=False)
            rd.to_excel(workbook, sheet_name="DataBase", index=False)
        d['anateldb']['Version'] = bump_version(d['anateldb']['Version'])
    else:
        console.print(":recycle: [green] A base de dados n√£o mudou desde a √∫ltima atualiza√ß√£o, a vers√£o n√£o ser√° atualizada, somente a data de verifica√ß√£o")


    console.print("Sucesso :zap:")    
    d['anateldb']['ReleaseDate'] = datetime.today().strftime('%d/%m/%Y')
    json.dump(d, (dest / 'VersionFile.json').open('w'))

In [6]:
pasta = Path('c:/Users/rsilva/AnatelDatabase')

In [7]:
if not pasta.exists():
    import subprocess
    result = subprocess.run(['git', 'clone', 'https://github.com/ronaldokun/AnatelDatabase.git'])
    pasta = Path.cwd() / 'AnatelDatabase'

In [12]:
# formatar_db(pasta)

In [44]:
df = pd.read_feather(f'{pasta}/AnatelDB.fth') #, engine='openpyxl', sheet_name='DataBase')

In [45]:
df.tail()

Unnamed: 0,Frequency,Latitude,Longitude,Description,Service,Station,Class,BW
884055,85469.0,-1.358931,-48.38567,"[STEL] L, FX, Tim S A (50417425295, 1009786951...",19,1009786951,Q7W,750M
884056,85469.0,-9.937445,-67.827751,"[STEL] L, FX, Tim S A (50417425295, 1005059940...",19,1005059940,Q7W,62M5
884057,85469.0,-23.624083,-46.623943,"[STEL] L, FX, Tim S A (50417425295, 1007183141...",19,1007183141,Q7W,62M5
884058,85469.0,-19.857389,-44.616112,"[STEL] L, FX, Tim S A (50417425295, 1009125734...",19,1009125734,Q7W,62M5
884059,85469.0,-16.592699,-49.267799,"[STEL] L, FX, Tim S A (50417425295, 1008754061...",19,1008754061,Q7W,62M5


In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 884060 entries, 0 to 884059
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype   
---  ------       --------------   -----   
 0   Frequency    884060 non-null  float64 
 1   Latitude     884060 non-null  float32 
 2   Longitude    884060 non-null  float32 
 3   Description  884060 non-null  string  
 4   Service      884060 non-null  int32   
 5   Station      884060 non-null  int32   
 6   Class        884060 non-null  category
 7   BW           884060 non-null  category
dtypes: category(2), float32(2), float64(1), int32(2), string(1)
memory usage: 29.5 MB


In [7]:
icao = pd.read_excel(f'{pasta}/IcaoDB.xlsx', engine='openpyxl', sheet_name='DataBase')
pmec = pd.read_excel(f'{pasta}/PmecDB.xlsx', engine='openpyxl', sheet_name='DataBase')
geo = pd.read_excel(f'{pasta}/GeoAiswebDB.xlsx', engine='openpyxl', sheet_name='DataBase')

In [38]:
icao[icao.Station != -1]

Unnamed: 0,Frequency,Latitude,Longitude,Description,Service,Station
0,109.1,-25.600000,-54.466667,"[FFD] ILS, FOZ DO IGUACU CATARATAS",-1,-1
2,109.3,-1.366667,-48.466667,"[FFD] ILS/DME, BELEM VAL DE CAES",-1,-1
3,109.3,-15.866667,-47.933334,"[FFD] ILS/DME, BRASILIA INTL.",-1,-1
4,109.3,-25.533333,-49.166668,"[FFD] ILS, CURITIBA AFONSO PE√ëA",-1,-1
5,109.3,-9.516666,-35.783333,"[FFD] ILS/DME, MACEIO",-1,-1
...,...,...,...,...,...,...
2576,1203.0,-23.616667,-46.650002,[ICA] Ground-based DME,-1,-1
2577,1204.0,-29.933332,-51.183334,[ICA] Ground-based DME,-1,-1
2578,1207.0,-1.383333,-48.483334,[ICA] Ground-based DME,-1,-1
2579,1209.0,-4.183333,-69.933334,[ICA] Ground-based DME,-1,-1


In [9]:
pmec.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 946 entries, 0 to 945
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Frequency    946 non-null    float64
 1   Latitude     946 non-null    float64
 2   Longitude    946 non-null    float64
 3   Description  946 non-null    object 
 4   Service      946 non-null    int64  
 5   Station      946 non-null    int64  
dtypes: float64(3), int64(2), object(1)
memory usage: 44.5+ KB


In [35]:
icao.shape

(2581, 6)

In [34]:
pmec.shape

(946, 6)

In [33]:
geo.shape

(286, 6)

In [21]:
cols = ['Frequency', 'Station']
df2 = pd.merge(icao, pmec, on=cols, how='outer').merge(geo, on=cols, how='outer')

In [36]:
x = df2.Description_x.notna()
y = df2.Description_y.notna()
z = df2.Description.notna()
df2[y & z  & (df2.Station != -1)]

Unnamed: 0,Frequency,Latitude_x,Longitude_x,Description_x,Service_x,Station,Latitude_y,Longitude_y,Description_y,Service_y,Latitude,Longitude,Description,Service
209,112.1,-9.366667,-40.566666,"[FFD] VOR/DME, PETROLINA",108.0,1000842573,-9.363334,-40.561501,"[AIS] SBPL-RDONAV, VOR/DME PTL, SENADOR NILO C...",108.0,-9.363261,-40.561577,[GEOAISWEB] VOR - PETROLINA OPR INFRAERO,108.0
212,112.2,-6.233333,-57.766666,"[FFD] VOR/DME, JACAREACANGA",108.0,1000842433,-6.235667,-57.770500,"[AIS] SBEK-RDONAV, VOR/DME JAC, Jacareacanga",108.0,-6.235744,-57.770527,[GEOAISWEB] VOR - JACAREACANGA VOR NO AVBL SEC...,108.0
215,112.3,-2.450000,-54.816666,"[FFD] VOR/DME, SANTAREM INTL.",108.0,1000823129,-2.426500,-54.817501,"[AIS] SBSN-RDONAV, VOR/DME STM, Maestro Wilson...",108.0,-2.426419,-54.817562,[GEOAISWEB] VOR - SANTAR√âM OPR INFRAERO,108.0
218,112.4,-23.333334,-51.116665,"[FFD] VOR/DME, LONDRINA",108.0,1000822874,-23.339500,-51.112499,"[AIS] SBLO-RDONAV, VOR/DME LON, Governador Jos...",108.0,-23.339516,-51.112526,[GEOAISWEB] VOR - LONDRINA CH 71XOPR INFRAERO,108.0
227,112.7,-5.516667,-47.450001,"[FFD] VOR/DME, IMPERATRIZ",108.0,1000842417,-5.523833,-47.449833,"[AIS] SBIZ-RDONAV, VOR/DME YTZ, PREFEITO RENAT...",108.0,-5.523889,-47.449844,[GEOAISWEB] VOR - IMPERATRIZ CH 74XOPR INFRAERO,108.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4078,1178.0,,,,,1000842506,-6.119667,-50.001999,[ICA] Ground-based DME,108.0,-6.119645,-50.001995,[GEOAISWEB] DME - CARAJ√ÅS 91X,108.0
4079,1185.0,,,,,1000822831,-26.222334,-48.798000,[ICA] Ground-based DME,108.0,-26.222261,-48.798046,[GEOAISWEB] DME - JOINVILLE 98X,108.0
4084,1193.0,,,,,1000822904,-22.344500,-41.769001,[ICA] Ground-based DME,108.0,-22.344448,-41.769001,[GEOAISWEB] DME - MACA√â 106X,108.0
4085,1195.0,,,,,1000823692,-18.876667,-48.221333,[ICA] Ground-based DME,108.0,-18.876610,-48.221298,[GEOAISWEB] DME - UBERL√ÇNDIA 108X,108.0


In [None]:
para Station != -1, pegar s√≥ descri√ß√£o | 

In [48]:
import hdf5storage as hdf

In [60]:
db = hdf.loadmat(f'{pasta}/AnatelDB.mat')

In [59]:
db = hdf.read(path='/c/Users/rsilva/db/', filename='AnatelDB.mat')

FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = 'AnatelDB.mat', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [16]:
# import seaborn as sns
# import matplotlib.pyplot as plt


# ax = sns.countplot(x="Num_Servi√ßo", data=base)

# plt.rcParams["figure.figsize"] = [30, 20]
# plt.rcParams["figure.autolayout"] = True

# for p in ax.patches:
#     ax.annotate('{:.1f}'.format(int(p.get_height())), (p.get_x()+0.05, p.get_height()+0.02))
    
# plt.title(f"Total de Entidades: {base.shape[0]}")
# plt.xlabel("C√≥digo Servi√ßo")

# plt.savefig("Stats.png")

In [11]:
base = read_base(pasta)

In [12]:
base.head()

Unnamed: 0,Frequ√™ncia,Num_Servi√ßo,Status,Classe,Entidade,Fistel,N√∫mero_da_Esta√ß√£o,Munic√≠pio,UF,Latitude,Longitude,Validade_RF,Num_Ato,Data_Ato,Fonte,Classe_Emiss√£o,Largura_Emiss√£o
0,0.028,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,1557670,Nova Igua√ßu,RJ,-22.662777,-43.476387,2033-08-17,-1,,STEL,J9E,8K00
1,0.0285,19,L,OP,COMPANHIA DE GERA√á√ÉO E TRANSMISS√ÉO DE ENERGIA ...,50420217282,1494686,Joinville,SC,-26.2925,-48.887222,2025-08-31,-1,,STEL,R3E,2K50
2,0.03,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,859966,Arapor√£,MG,-18.41,-49.099998,2033-08-17,-1,,STEL,J3E,1K00
3,0.03,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,859753,Campinas,SP,-22.774166,-47.004444,2033-08-17,-1,,STEL,J3E,1K00
4,0.03,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,1557823,S√£o Paulo,SP,-23.441668,-46.590832,2033-08-17,-1,,STEL,J3E,1K00


In [13]:
subset = ['Frequ√™ncia', 'Num_Servi√ßo', 'Status', 'Classe', 'Fistel', 'N√∫mero_da_Esta√ß√£o', 'Latitude', 'Longitude', 'Classe_Emiss√£o', 'Largura_Emiss√£o']
# double = base.duplicated(subset=subset, keep=False)
# base[double]

base.drop_duplicates(subset, keep='first').shape

(881692, 17)

In [105]:
subset = base.columns[:-2]
# double = base.duplicated(subset=subset, keep='first')
# base[double]

base.drop_duplicates(base.columns[:-2], keep='first').reset_index(drop=True)
# base.shape

Unnamed: 0,Frequ√™ncia,Num_Servi√ßo,Status,Classe,Entidade,Fistel,N√∫mero_da_Esta√ß√£o,Munic√≠pio,UF,Latitude,Longitude,Validade_RF,Num_Ato,Data_Ato,Fonte,Classe_Emiss√£o,Largura_Emiss√£o
26,0.038,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,756830,Pira√≠,RJ,-22.629444,-43.895832,2028-12-12,-1,,STEL,N0N,1K00
33,0.040,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,758124,Petr√≥polis,RJ,-22.433332,-43.166668,2028-12-12,-1,,STEL,A3E,5K00
37,0.040,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,758051,Carmo,RJ,-21.840000,-42.570000,2028-12-12,-1,,STEL,N0N,1K00
58,0.044,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,814180,Volta Redonda,RJ,-22.513611,-44.089722,2028-12-12,-1,,STEL,A3E,5K00
62,0.045,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,749680,Rio de Janeiro,RJ,-22.966667,-43.233334,2028-12-12,-1,,STEL,A3E,5K00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
883461,82125.000,19,L,FX,TIM S A,50417425295,1007302558,Ribeir√£o das Neves,MG,-19.768250,-44.082527,2032-08-31,-1,,STEL,Q7W,62M5
883467,82125.000,19,L,FX,TIM S A,50417425295,1005319658,Uberl√¢ndia,MG,-18.942499,-48.300556,2032-08-31,-1,,STEL,Q7W,62M5
883481,82125.000,19,L,FX,TIM S A,50417425295,1005325984,Arax√°,MG,-19.582222,-46.953888,2032-08-31,-1,,STEL,Q7W,62M5
883514,82125.000,19,L,FX,TIM S A,50417425295,1008765390,Aparecida de Goi√¢nia,GO,-16.805166,-49.329556,2032-08-31,-1,,STEL,Q7W,62M5


In [14]:
base[base.N√∫mero_da_Esta√ß√£o == 1005304065]

Unnamed: 0,Frequ√™ncia,Num_Servi√ßo,Status,Classe,Entidade,Fistel,N√∫mero_da_Esta√ß√£o,Munic√≠pio,UF,Latitude,Longitude,Validade_RF,Num_Ato,Data_Ato,Fonte,Classe_Emiss√£o,Largura_Emiss√£o
744796,8118.32,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,29M7
825221,19343.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,55M0
831552,19453.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,55M0
865366,23275.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,56M0
881466,82125.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,G7W,2G00
881467,82125.0,19,L,FX,TIM S A,50417425295,1005304065,Sabar√°,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,Q7W,62M5


In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted constants.ipynb.
Converted filter.ipynb.
Converted index.ipynb.
Converted queries.ipynb.
