In [31]:
#default_exp filter

In [32]:
#hide
import sys
from pathlib import Path

# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))

# Filtragem
> Este módulo concentra funções auxiliares específicas que filtram os dados do banco com campos e formatação de interesse para aplicações específicas como o [appAnalise](https://github.com/EricMagalhaesDelgado/appAnalise) por exemplo.

In [1]:
#export
import os
from pathlib import Path
import json
import pandas as pd
from datetime import datetime
from openpyxl import load_workbook
from anateldb.query import *
from anateldb.constants import console, APP_ANALISE
from fastcore.test import *
from fastcore.script import call_parse, Param, store_true
from pyarrow import ArrowInvalid
from rich import print

In [2]:
#export
def bump_version(version, part=2):
    version = version.split('.')
    version[part] = str(int(version[part]) + 1)
    for i in range(part+1, 3): version[i] = '0'
    return '.'.join(version)

In [3]:
#exporti
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
                       truncate_sheet=False, 
                       **to_excel_kwargs):
    """
    Append a DataFrame [df] to existing Excel file [filename]
    into [sheet_name] Sheet.
    If [filename] doesn't exist, then this function will create it.

    @param filename: File path or existing ExcelWriter
                     (Example: '/path/to/file.xlsx')
    @param df: DataFrame to save to workbook
    @param sheet_name: Name of sheet which will contain DataFrame.
                       (default: 'Sheet1')
    @param startrow: upper left cell row to dump data frame.
                     Per default (startrow=None) calculate the last row
                     in the existing DF and write to the next row...
    @param truncate_sheet: truncate (remove and recreate) [sheet_name]
                           before writing DataFrame to Excel file
    @param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
                            [can be a dictionary]
    @return: None

    Usage examples:

    >>> append_df_to_excel('d:/temp/test.xlsx', df)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
                           index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2', 
                           index=False, startrow=25)

    (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
    """
    # Excel file doesn't exist - saving and exiting
    if not Path(filename).is_file():
        df.to_excel(
            filename,
            sheet_name=sheet_name, 
            startrow=startrow if startrow is not None else 0, 
            **to_excel_kwargs)
        return
    
    # ignore [engine] parameter if it was passed
    if 'engine' in to_excel_kwargs:
        to_excel_kwargs.pop('engine')

    writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a', if_sheet_exists='replace')

    # try to open an existing workbook
    writer.book = load_workbook(filename)
    
    # get the last row in the existing Excel sheet
    # if it was not specified explicitly
    if startrow is None and sheet_name in writer.book.sheetnames:
        startrow = writer.book[sheet_name].max_row

    # truncate sheet
    if truncate_sheet and sheet_name in writer.book.sheetnames:
        # index of [sheet_name] sheet
        idx = writer.book.sheetnames.index(sheet_name)
        # remove [sheet_name]
        writer.book.remove(writer.book.worksheets[idx])
        # create an empty sheet [sheet_name] using old index
        writer.book.create_sheet(sheet_name, idx)
    
    # copy existing sheets
    writer.sheets = {ws.title:ws for ws in writer.book.worksheets}

    if startrow is None:
        startrow = 0

    # write out the new sheet
    df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)

    # save the workbook
    writer.save()

In [4]:
#export
@call_parse
def formatar_db(
    path: Param("Pasta onde salvar os arquivos", str),
    up_stel: Param("Atualizar a base do Stel", store_true) = False, 
    up_radcom: Param("Atualizar a base do Radcom", store_true) = False,
    up_mosaico: Param("Atualizar a base do Mosaico", store_true) = False,
    up_icao: Param("Atualizar a base do ICAO", store_true) = False,
    up_pmec: Param("Atualizar a base do PMEC", store_true) = False,
    up_geo: Param("Atualizar a base do Geo", store_true) = False,
) -> None:
    dest = Path(path)
    dest.mkdir(parents=True, exist_ok=True)
    time = datetime.today().strftime("%d/%m/%Y %H:%M:%S")
    console.print(":scroll:[green]Lendo as bases de dados...")
    rd = read_base(path, up_stel, up_radcom, up_mosaico, up_icao)
    rd['Validade_RF'] = rd.Validade_RF.astype('string').fillna('')
    rd['Data_Ato'] = rd.Data_Ato.astype('string').fillna('')
    rd['Status'] = rd.Status.astype('string')
    rd['Classe'] = rd.Classe.astype('string')
    rd.loc[rd['Status'] != '', 'Status'] = rd.loc[rd['Status'] != '', 'Status'] + ", " \
        + rd.loc[rd['Status'] != '', 'Classe']
    rd.loc[rd['Status'].isna(), 'Status'] = rd.loc[rd['Status'].isna(), 'Num_Serviço'].astype('string')

    rd["Descrição"] = (
        '[' + rd.Fonte.astype('string') + '] ' + 
        rd.Status.astype('string').fillna('-')
        + ", "
        + rd.Entidade.astype('string').fillna('-').str.title()
        + " ("
        + rd.Fistel.astype('string').fillna('-')
        + ", "
        + rd["Número_da_Estação"].astype('string').fillna('-')
        + "), "
        + rd.Município.astype('string').fillna('-')
        + "/"
        + rd.UF.astype('string').fillna('-')
    )

    export_columns = [
        "Frequência",
        "Latitude",
        "Longitude",
        "Descrição",
        "Num_Serviço",
        "Número_da_Estação",
        "Classe_Emissão",
        "Largura_Emissão",
    ]
    rd = rd.loc[:, export_columns]
    rd.columns = APP_ANALISE
    rd  = merge_aero(rd, read_aero(path, up_icao, up_pmec, up_geo))
    rd = df_optimize(rd, exclude=['Frequency'])
    console.print(":card_file_box:[green]Salvando os arquivos...")
    d = json.loads((dest / 'VersionFile.json').read_text())
    try:
        cache = pd.read_feather(f"{dest}/AnatelDB.fth")
    except (ArrowInvalid, FileNotFoundError):
        cache = pd.DataFrame()
    
    if not rd.equals(cache):
        console.print(":new: [green] A base de dados mudou desde a última atualização! Salvando o novo arquivo e atualizando a versão")
        date = pd.DataFrame(columns=[time])
        try:
            rd.to_feather(Path(f"{dest}/AnatelDB.fth").open('bw'))
        except ArrowInvalid:
            Path(f"{dest}/AnatelDB.fth").unlink()
        with pd.ExcelWriter(f"{dest}/AnatelDB.xlsx", engine='xlsxwriter') as workbook:
            date.to_excel(workbook, sheet_name="ExtractDate", index=False)
            rd.to_excel(workbook, sheet_name="DataBase", index=False)
        d['anateldb']['Version'] = bump_version(d['anateldb']['Version'])
    else:
        console.print(":recycle: [green] A base de dados não mudou desde a última atualização, a versão não será atualizada, somente a data de verificação")


    console.print("Sucesso :zap:")    
    d['anateldb']['ReleaseDate'] = datetime.today().strftime('%d/%m/%Y')
    json.dump(d, (dest / 'VersionFile.json').open('w'))

In [5]:
pasta = Path('c:/Users/rsilva/AnatelDatabase')

In [38]:
if not pasta.exists():
    import subprocess
    result = subprocess.run(['git', 'clone', 'https://github.com/ronaldokun/AnatelDatabase.git'])
    pasta = Path.cwd() / 'AnatelDatabase'

In [39]:
# formatar_db(pasta)

In [40]:
#df = pd.read_feather(f'{pasta}/AnatelDB.fth') #, engine='openpyxl', sheet_name='DataBase')

In [41]:
#df.tail()

In [42]:
#df.info()

In [6]:
icao = read_icao(pasta)
pmec = read_pmec(pasta)
geo = read_geo(pasta)
icao['Description'] = icao.Description.astype('string')
pmec['Description'] = pmec.Description.astype('string')
geo['Description'] = geo.Description.astype('string')

In [48]:
print(icao.shape, pmec.shape, geo.shape)

In [7]:
cols = ['Frequency', 'Station']
a = icao[icao.Station != -1].reset_index(drop=True)
b = pmec[pmec.Station != -1].reset_index(drop=True)
c = geo[geo.Station != -1].reset_index(drop=True)

In [8]:
df = merge_df(a, b, cols, how='outer')
df = merge_df(df, c, cols, how='outer')
df

Unnamed: 0,Frequency,Station,Description,Latitude,Longitude,Service
0,109.100,696597080,"[ICAO] ILS/DME, JUIZ DE FORA",-21.783333,-43.383335,108.0
1,109.300,1008242052,"[ICAO] ILS, RIO DE JANEIRO INTL.",-22.799999,-43.216667,108.0
2,110.300,1000822769,"[ICAO] ILS, CAMPINAS VIRACOPOS, SP | [AISW] SB...",-23.017500,-47.117500,108.0
3,110.700,1000823269,"[ICAO] ILS, SAO PAULO GUARULHOS INTL. | [AISW]...",-23.425468,-46.464542,108.0
4,111.100,1000823242,"[ICAO] ILS, SAO PAULO GUARULHOS INTL. | [AISW]...",-23.436501,-46.486748,108.0
...,...,...,...,...,...,...
1606,1193.000,1000822904,"[DOC] SBME-RDONAV, VOR/DME MCA, MACAE (Ground-...",-22.344475,-41.769001,108.0
1607,1195.000,1000823692,"[DOC] SBUL-RDONAV, VOR/DME ULD, TEN CEL AV CÉS...",-18.876638,-48.221313,108.0
1608,1198.000,1000823781,"[DOC] SBVT-RDONAV, VOR/DME VRI, Eurico de Agui...",-20.260166,-40.285000,108.0
1609,0.355,690266120,[AISG] NDB - URUCU OPR PETROBRAS,-4.886500,-65.349831,108.0


In [9]:
def merge_diff(df):
    """Merge two dataframes"""
    x = df.Description_x.notna()
    y = df.Description_y.notna()
    df.loc[x & ~y, 'Description'] = df.loc[x & ~y, 'Description_x']
    df.loc[~x & y, 'Description'] = df.loc[~x & y, 'Description_y']
    df.loc[x & ~y, 'Latitude'] = df.loc[x & ~y, 'Latitude_x']
    df.loc[x & ~y, 'Longitude'] = df.loc[x & ~y, 'Longitude_x']
    df.loc[~x & y, 'Latitude'] = df.loc[~x & y, 'Latitude_y']
    df.loc[~x & y, 'Longitude'] = df.loc[~x & y, 'Longitude_y']
    df.loc[x, 'Service'] = df.loc[x, 'Service_x']
    df.loc[~x & y, 'Service'] = df.loc[~x & y, 'Service_y']
    return df.loc[:, [c for c in df.columns if '_' not in c]]


In [10]:
cols = ['Frequency']
a = icao[icao.Station == -1].reset_index(drop=True)
b = pmec[pmec.Station == -1].reset_index(drop=True)
c = geo[geo.Station == -1].reset_index(drop=True)
#freqs = set(a.Frequency.tolist()).union(set(b.Frequency.tolist())).union(set(c.Frequency.tolist()))

In [11]:
ab = pd.merge(a, b, on=cols, how='outer').reset_index(drop=True)
ab

Unnamed: 0,Frequency,Latitude_x,Longitude_x,Description_x,Service_x,Station_x,Latitude_y,Longitude_y,Description_y,Service_y,Station_y
0,109.1,-25.600000,-54.466667,"[ICAO] ILS, FOZ DO IGUACU CATARATAS",-1.0,-1.0,-25.602833,-54.478500,"[AISW] SBFI-RDONAV, ILS/DME 14 IFI, Cataratas",-1.0,-1.0
1,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-2.578333,-44.228333,"[AISW] SBSL-RDONAV, ILS/DME 06 ISL, Marechal C...",-1.0,-1.0
2,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-23.637667,-46.648834,"[AISW] SBSP-RDONAV, ILS/DME 17R ISP, Congonhas",-1.0,-1.0
3,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-20.267000,-40.292835,"[AISW] SBVT-RDONAV, ILS/DME 24 IVI, Eurico de ...",-1.0,-1.0
4,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-9.861167,-67.881668,"[AISW] SBRB-RDONAV, ILS/DME 06 IRB, Plácido de...",-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...
2285,2800.0,,,,,,-3.377222,-64.720558,[RMET] Radar - Tefé/AM,-1.0,-1.0
2286,2800.0,,,,,,-18.207258,-45.460533,[RMET] Radar - Três Marias/MG,-1.0,-1.0
2287,2800.0,,,,,,-4.248333,69.934998,[RMET] Radar - Tabatinga/AM,-1.0,-1.0
2288,2800.0,,,,,,-0.143611,-67.056946,[RMET] Radar - São Gabriel da Cachoeira/AM,-1.0,-1.0


In [12]:
x = ab.Description_x.notna()
y = ab.Description_y.notna()
ab_new = ab[(~x & y) | (x & ~y)].reset_index(drop=True)
ab_common = ab[(x & y)].reset_index(drop=True)
ab_new = merge_diff(ab_new)
ab_new

Unnamed: 0,Frequency,Description,Latitude,Longitude,Service
0,112.2,"[ICAO] VOR/DME, BAURU",-22.350000,-49.049999,-1.0
1,112.2,"[ICAO] VOR/DME, ILHEUS",-14.800000,-39.016666,-1.0
2,112.3,"[ICAO] VOR, ITAPARICA",-12.900000,-38.400002,-1.0
3,112.3,"[ICAO] VOR/DME, TERESINA",-5.066667,-42.816666,-1.0
4,112.5,"[ICAO] VOR, MANICORE",-5.800000,-61.283333,-1.0
...,...,...,...,...,...
1119,2800.0,[RMET] Radar - Tefé/AM,-3.377222,-64.720558,-1.0
1120,2800.0,[RMET] Radar - Três Marias/MG,-18.207258,-45.460533,-1.0
1121,2800.0,[RMET] Radar - Tabatinga/AM,-4.248333,69.934998,-1.0
1122,2800.0,[RMET] Radar - São Gabriel da Cachoeira/AM,-0.143611,-67.056946,-1.0


In [13]:
ab_common

Unnamed: 0,Frequency,Latitude_x,Longitude_x,Description_x,Service_x,Station_x,Latitude_y,Longitude_y,Description_y,Service_y,Station_y
0,109.1,-25.600000,-54.466667,"[ICAO] ILS, FOZ DO IGUACU CATARATAS",-1.0,-1.0,-25.602833,-54.478500,"[AISW] SBFI-RDONAV, ILS/DME 14 IFI, Cataratas",-1.0,-1.0
1,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-2.578333,-44.228333,"[AISW] SBSL-RDONAV, ILS/DME 06 ISL, Marechal C...",-1.0,-1.0
2,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-23.637667,-46.648834,"[AISW] SBSP-RDONAV, ILS/DME 17R ISP, Congonhas",-1.0,-1.0
3,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-20.267000,-40.292835,"[AISW] SBVT-RDONAV, ILS/DME 24 IVI, Eurico de ...",-1.0,-1.0
4,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-9.861167,-67.881668,"[AISW] SBRB-RDONAV, ILS/DME 06 IRB, Plácido de...",-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...
1161,1203.0,-23.616667,-46.650002,"[DOC] VOR/DME, SAO PAULO CONGONHAS (Ground-bas...",-1.0,-1.0,-8.136500,-34.927334,"[DOC] SBRF-RDONAV, VOR/DME REC, Guararapes - G...",-1.0,-1.0
1162,1204.0,-29.933332,-51.183334,"[DOC] VOR/DME, PORTO ALEGRE CANOAS (Ground-bas...",-1.0,-1.0,-19.688999,-47.060501,"[DOC] SBAX-RDONAV, VOR/DME ARX, Romeu Zema (Gr...",-1.0,-1.0
1163,1207.0,-1.383333,-48.483334,"[DOC] VOR/DME, BELEM VAL DE CAES (Ground-based...",-1.0,-1.0,-1.384333,-48.478500,"[DOC] SBBE-RDONAV, VOR/DME BEL, Val de Cans - ...",-1.0,-1.0
1164,1209.0,-4.183333,-69.933334,"[DOC] VOR/DME, AMAZONICA (Ground-based DME)",-1.0,-1.0,-15.865014,-47.900188,"[DOC] SBBR-RDONAV, VOR/DME VJK, Presidente Jus...",-1.0,-1.0


In [16]:
ab_newc = pd.merge(ab_new, c, on=cols, how='outer').reset_index(drop=True)
ab_newc

Unnamed: 0,Frequency,Description_x,Latitude_x,Longitude_x,Service_x,Latitude_y,Longitude_y,Description_y,Service_y,Station
0,112.2,"[ICAO] VOR/DME, BAURU",-22.350000,-49.049999,-1.0,,,,,
1,112.2,"[ICAO] VOR/DME, ILHEUS",-14.800000,-39.016666,-1.0,,,,,
2,112.3,"[ICAO] VOR, ITAPARICA",-12.900000,-38.400002,-1.0,,,,,
3,112.3,"[ICAO] VOR/DME, TERESINA",-5.066667,-42.816666,-1.0,,,,,
4,112.5,"[ICAO] VOR, MANICORE",-5.800000,-61.283333,-1.0,,,,,
...,...,...,...,...,...,...,...,...,...,...
1304,1209.0,,,,,-4.195000,-69.940552,[AISG] DME - LETÍCIA 122X,-1.0,-1.0
1305,1209.0,,,,,-15.865013,-47.900188,[AISG] DME - KUBITSCHEK 122X,-1.0,-1.0
1306,1211.0,,,,,4.693000,-61.028831,[AISG] DME - LA DIVINA PASTORA 124X,-1.0,-1.0
1307,1211.0,,,,,-25.778656,-49.763241,[AISG] DME - LAPA-PR 124X,-1.0,-1.0


In [17]:
x = ab_newc.Description_x.notna()
y = ab_newc.Description_y.notna()
abc_new = ab_newc[(~x & y) | (x & ~y)].reset_index(drop=True)
ab_newc_common = ab_newc[(x & y)].reset_index(drop=True)
abc_new = merge_diff(abc_new)
abc_new

Unnamed: 0,Frequency,Station,Description,Latitude,Longitude,Service
0,112.2,,"[ICAO] VOR/DME, BAURU",-22.350000,-49.049999,-1.0
1,112.2,,"[ICAO] VOR/DME, ILHEUS",-14.800000,-39.016666,-1.0
2,112.3,,"[ICAO] VOR, ITAPARICA",-12.900000,-38.400002,-1.0
3,112.3,,"[ICAO] VOR/DME, TERESINA",-5.066667,-42.816666,-1.0
4,112.5,,"[ICAO] VOR, MANICORE",-5.800000,-61.283333,-1.0
...,...,...,...,...,...,...
1216,1209.0,-1.0,[AISG] DME - LETÍCIA 122X,-4.195000,-69.940552,-1.0
1217,1209.0,-1.0,[AISG] DME - KUBITSCHEK 122X,-15.865013,-47.900188,-1.0
1218,1211.0,-1.0,[AISG] DME - LA DIVINA PASTORA 124X,4.693000,-61.028831,-1.0
1219,1211.0,-1.0,[AISG] DME - LAPA-PR 124X,-25.778656,-49.763241,-1.0


In [28]:
abc = pd.merge(ab, c, on=cols, how='outer').reset_index(drop=True)
x = abc.Description_x.notna()
y = abc.Description_y.notna()
z = abc.Description.notna()
abc_new2 = abc[(x & ~y & ~z) | (~x & y & ~z) | (~x & ~y & z)].reset_index(drop=True)
abc_new2


Unnamed: 0,Frequency,Latitude_x,Longitude_x,Description_x,Service_x,Station_x,Latitude_y,Longitude_y,Description_y,Service_y,Station_y,Latitude,Longitude,Description,Service,Station
0,112.2,-22.350000,-49.049999,"[ICAO] VOR/DME, BAURU",-1.0,-1.0,,,,,,,,,,
1,112.2,-14.800000,-39.016666,"[ICAO] VOR/DME, ILHEUS",-1.0,-1.0,,,,,,,,,,
2,112.3,-12.900000,-38.400002,"[ICAO] VOR, ITAPARICA",-1.0,-1.0,,,,,,,,,,
3,112.3,-5.066667,-42.816666,"[ICAO] VOR/DME, TERESINA",-1.0,-1.0,,,,,,,,,,
4,112.5,-5.800000,-61.283333,"[ICAO] VOR, MANICORE",-1.0,-1.0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075,116.1,,,,,,,,,,,-27.135693,-52.662968,[AISG] VOR - CHAPECÓ OPR CITY HALL CH 108X,-1.0,-1.0
1076,967.0,,,,,,,,,,,-2.578333,-44.228333,[AISG] DME - SÃO LUIZ 6X,-1.0,-1.0
1077,985.0,,,,,,,,,,,-21.700470,-41.307671,[AISG] DME - CAMPOS 24X,-1.0,-1.0
1078,1182.0,,,,,,,,,,,-22.967175,-42.890945,[AISG] DME - MARICÁ 95X,-1.0,-1.0


In [29]:
abc[(x & y) | (y & z) | (x & z)].reset_index(drop=True)

Unnamed: 0,Frequency,Latitude_x,Longitude_x,Description_x,Service_x,Station_x,Latitude_y,Longitude_y,Description_y,Service_y,Station_y,Latitude,Longitude,Description,Service,Station
0,109.1,-25.600000,-54.466667,"[ICAO] ILS, FOZ DO IGUACU CATARATAS",-1.0,-1.0,-25.602833,-54.478500,"[AISW] SBFI-RDONAV, ILS/DME 14 IFI, Cataratas",-1.0,-1.0,,,,,
1,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-2.578333,-44.228333,"[AISW] SBSL-RDONAV, ILS/DME 06 ISL, Marechal C...",-1.0,-1.0,,,,,
2,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-23.637667,-46.648834,"[AISW] SBSP-RDONAV, ILS/DME 17R ISP, Congonhas",-1.0,-1.0,,,,,
3,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-20.267000,-40.292835,"[AISW] SBVT-RDONAV, ILS/DME 24 IVI, Eurico de ...",-1.0,-1.0,,,,,
4,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES",-1.0,-1.0,-9.861167,-67.881668,"[AISW] SBRB-RDONAV, ILS/DME 06 IRB, Plácido de...",-1.0,-1.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3289,1192.0,,,,,,-21.984501,-47.344501,"[DOC] SBYS-RDONAV, VOR/DME PIR, CAMPO FONTENEL...",-1.0,-1.0,-21.984562,-47.344505,[AISG] DME - PIRASSUNUNGA 105X,-1.0,-1.0
3290,1192.0,,,,,,-21.984501,-47.344501,"[DOC] SBYS-RDONAV, VOR/DME PIR, CAMPO FONTENEL...",-1.0,-1.0,-3.040030,-60.054737,[AISG] DME - MANAUS 105X,-1.0,-1.0
3291,1198.0,,,,,,-9.876000,-67.905334,"[DOC] SBRB-RDONAV, VOR/DME RCO, Plácido de Cas...",-1.0,-1.0,-9.875980,-67.905380,[AISG] DME - RIO BRANCO 111X,-1.0,-1.0
3292,1200.0,,,,,,-0.150333,-66.990501,"[DOC] SBUA-RDONAV, VOR/DME GBR, São Gabriel da...",-1.0,-1.0,-0.150278,-66.990570,[AISG] DME - SÃO GABRIEL DA CACHOEIRA 113X,-1.0,-1.0


In [32]:
abc[(x & y & z)].reset_index(drop=True)

Unnamed: 0,Frequency,Latitude_x,Longitude_x,Description_x,Service_x,Station_x,Latitude_y,Longitude_y,Description_y,Service_y,Station_y,Latitude,Longitude,Description,Service,Station
0,112.0,-10.983334,-37.066666,"[ICAO] VOR/DME, ARACAJU STA. MARIA",-1.0,-1.0,-29.709833,-53.712666,"[AISW] SBSM-RDONAV, VOR/DME SMA, Santa Maria",-1.0,-1.0,-29.709761,-53.712742,[AISG] VOR - SANTA MARIA COMAER CH 57X,-1.0,-1.0
1,112.0,-0.050000,-51.066666,"[ICAO] VOR/DME, MACAPA INTL.",-1.0,-1.0,-29.709833,-53.712666,"[AISW] SBSM-RDONAV, VOR/DME SMA, Santa Maria",-1.0,-1.0,-29.709761,-53.712742,[AISG] VOR - SANTA MARIA COMAER CH 57X,-1.0,-1.0
2,112.0,-29.716667,-53.716667,"[ICAO] VOR/DME, SANTA MARIA",-1.0,-1.0,-29.709833,-53.712666,"[AISW] SBSM-RDONAV, VOR/DME SMA, Santa Maria",-1.0,-1.0,-29.709761,-53.712742,[AISG] VOR - SANTA MARIA COMAER CH 57X,-1.0,-1.0
3,112.1,-25.583334,-54.500000,"[ICAO] VOR/DME, FOZ DO IGUACU CATARATAS",-1.0,-1.0,-25.583166,-54.503502,"[AISW] SBFI-RDONAV, VOR/DME FOZ, Cataratas",-1.0,-1.0,-12.693556,-60.095242,[AISG] VOR - VILHENA CH 58XOPR INFRAERO,-1.0,-1.0
4,112.1,-25.583334,-54.500000,"[ICAO] VOR/DME, FOZ DO IGUACU CATARATAS",-1.0,-1.0,-25.583166,-54.503502,"[AISW] SBFI-RDONAV, VOR/DME FOZ, Cataratas",-1.0,-1.0,-22.812775,-42.095341,[AISG] VOR - ALDEIA OPR COMANDO DA MARINHA; CO...,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2379,1209.0,-4.183333,-69.933334,"[DOC] VOR/DME, AMAZONICA (Ground-based DME)",-1.0,-1.0,-15.865014,-47.900188,"[DOC] SBBR-RDONAV, VOR/DME VJK, Presidente Jus...",-1.0,-1.0,-4.195000,-69.940552,[AISG] DME - LETÍCIA 122X,-1.0,-1.0
2380,1209.0,-4.183333,-69.933334,"[DOC] VOR/DME, AMAZONICA (Ground-based DME)",-1.0,-1.0,-15.865014,-47.900188,"[DOC] SBBR-RDONAV, VOR/DME VJK, Presidente Jus...",-1.0,-1.0,-15.865013,-47.900188,[AISG] DME - KUBITSCHEK 122X,-1.0,-1.0
2381,1211.0,-19.833334,-44.000000,"[DOC] VOR/DME, BELO HORIZONTE PAMPULHA (Ground...",-1.0,-1.0,-19.835833,-44.003502,"[DOC] SBBH-RDONAV, VOR/DME BHZ a. SECT 100/11...",-1.0,-1.0,4.693000,-61.028831,[AISG] DME - LA DIVINA PASTORA 124X,-1.0,-1.0
2382,1211.0,-19.833334,-44.000000,"[DOC] VOR/DME, BELO HORIZONTE PAMPULHA (Ground...",-1.0,-1.0,-19.835833,-44.003502,"[DOC] SBBH-RDONAV, VOR/DME BHZ a. SECT 100/11...",-1.0,-1.0,-25.778656,-49.763241,[AISG] DME - LAPA-PR 124X,-1.0,-1.0


In [33]:
abc[abc.Latitude_x == -29.709833]

Unnamed: 0,Frequency,Latitude_x,Longitude_x,Description_x,Service_x,Station_x,Latitude_y,Longitude_y,Description_y,Service_y,Station_y,Latitude,Longitude,Description,Service,Station


In [78]:
s = '_x'
new = df2[x & ~y].reset_index(drop=True).loc[:, ['Frequency', f'Latitude{s}', f'Longitude{s}', f'Description{s}', f'Service{s}', f'Station{s}']]
new.columns = ['Frequency', 'Latitude', 'Longitude', 'Description', 'Service', 'Station']
df = pd.concat([df, new])
df

Unnamed: 0,Frequency,Station,Description,Latitude,Longitude,Service
0,109.1,6.965971e+08,"[ICAO] ILS/DME, JUIZ DE FORA",-21.783333,-43.383335,108.0
1,109.3,1.008242e+09,"[ICAO] ILS, RIO DE JANEIRO INTL.",-22.799999,-43.216667,108.0
2,110.3,1.000823e+09,"[ICAO] ILS, CAMPINAS VIRACOPOS, SP | [AISW] SB...",-23.017500,-47.117500,
3,110.7,1.000823e+09,"[ICAO] ILS, SAO PAULO GUARULHOS INTL. | [AISW]...",-23.425468,-46.464542,
4,111.1,1.000823e+09,"[ICAO] ILS, SAO PAULO GUARULHOS INTL. | [AISW]...",-23.436501,-46.486748,
...,...,...,...,...,...,...
820,1186.0,-1.000000e+00,"[DOC] VOR/DME, MANAUS EDUARDO GOMES INTL. (Gro...",-3.033333,-60.116665,-1.0
821,1186.0,-1.000000e+00,"[DOC] VOR/DME, SOROCABA (Ground-based DME)",-23.500000,-47.383335,-1.0
822,1189.0,-1.000000e+00,"[DOC] VOR/DME, FLORIANOPOLIS HERCILIO LUZ (Gro...",-27.683332,-48.500000,-1.0
823,1189.0,-1.000000e+00,"[DOC] VOR/DME, VITORIA GOIABEIRAS (Ground-base...",-20.250000,-40.283333,-1.0


In [79]:
s = '_y'
new = df2[~x & y].reset_index(drop=True).loc[:, ['Frequency', f'Latitude{s}', f'Longitude{s}', f'Description{s}', f'Service{s}', f'Station{s}']]
new.columns = ['Frequency', 'Latitude', 'Longitude', 'Description', 'Service', 'Station']
df = pd.concat([df, new])
df

Unnamed: 0,Frequency,Station,Description,Latitude,Longitude,Service
0,109.1,6.965971e+08,"[ICAO] ILS/DME, JUIZ DE FORA",-21.783333,-43.383335,108.0
1,109.3,1.008242e+09,"[ICAO] ILS, RIO DE JANEIRO INTL.",-22.799999,-43.216667,108.0
2,110.3,1.000823e+09,"[ICAO] ILS, CAMPINAS VIRACOPOS, SP | [AISW] SB...",-23.017500,-47.117500,
3,110.7,1.000823e+09,"[ICAO] ILS, SAO PAULO GUARULHOS INTL. | [AISW]...",-23.425468,-46.464542,
4,111.1,1.000823e+09,"[ICAO] ILS, SAO PAULO GUARULHOS INTL. | [AISW]...",-23.436501,-46.486748,
...,...,...,...,...,...,...
294,2800.0,-1.000000e+00,[RMET] Radar - Tefé/AM,-3.377222,-64.720558,-1.0
295,2800.0,-1.000000e+00,[RMET] Radar - Três Marias/MG,-18.207258,-45.460533,-1.0
296,2800.0,-1.000000e+00,[RMET] Radar - Tabatinga/AM,-4.248333,69.934998,-1.0
297,2800.0,-1.000000e+00,[RMET] Radar - São Gabriel da Cachoeira/AM,-0.143611,-67.056946,-1.0


In [None]:
df2 = pd.merge(df2, c, on=cols, how='outer').reset_index(drop=True)
x = df2.Description_x.notna()
y = df2.Description_y.notna()
s = '_x'
new = df2[x & ~y].reset_index(drop=True).loc[:, ['Frequency', f'Latitude{s}', f'Longitude{s}', f'Description{s}', f'Service{s}', f'Station{s}']]
new.columns = ['Frequency', 'Latitude', 'Longitude', 'Description', 'Service', 'Station']
df = pd.concat([df, new])
s = '_y'
new = df2[~x & y].reset_index(drop=True).loc[:, ['Frequency', f'Latitude{s}', f'Longitude{s}', f'Description{s}', f'Service{s}', f'Station{s}']]
new.columns = ['Frequency', 'Latitude', 'Longitude', 'Description', 'Service', 'Station']
df = pd.concat([df, new])
df

In [48]:
import hdf5storage as hdf

In [60]:
db = hdf.loadmat(f'{pasta}/AnatelDB.mat')

In [59]:
db = hdf.read(path='/c/Users/rsilva/db/', filename='AnatelDB.mat')

FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = 'AnatelDB.mat', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [16]:
# import seaborn as sns
# import matplotlib.pyplot as plt


# ax = sns.countplot(x="Num_Serviço", data=base)

# plt.rcParams["figure.figsize"] = [30, 20]
# plt.rcParams["figure.autolayout"] = True

# for p in ax.patches:
#     ax.annotate('{:.1f}'.format(int(p.get_height())), (p.get_x()+0.05, p.get_height()+0.02))
    
# plt.title(f"Total de Entidades: {base.shape[0]}")
# plt.xlabel("Código Serviço")

# plt.savefig("Stats.png")

In [11]:
base = read_base(pasta)

In [12]:
base.head()

Unnamed: 0,Frequência,Num_Serviço,Status,Classe,Entidade,Fistel,Número_da_Estação,Município,UF,Latitude,Longitude,Validade_RF,Num_Ato,Data_Ato,Fonte,Classe_Emissão,Largura_Emissão
0,0.028,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,1557670,Nova Iguaçu,RJ,-22.662777,-43.476387,2033-08-17,-1,,STEL,J9E,8K00
1,0.0285,19,L,OP,COMPANHIA DE GERAÇÃO E TRANSMISSÃO DE ENERGIA ...,50420217282,1494686,Joinville,SC,-26.2925,-48.887222,2025-08-31,-1,,STEL,R3E,2K50
2,0.03,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,859966,Araporã,MG,-18.41,-49.099998,2033-08-17,-1,,STEL,J3E,1K00
3,0.03,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,859753,Campinas,SP,-22.774166,-47.004444,2033-08-17,-1,,STEL,J3E,1K00
4,0.03,19,L,OP,FURNAS CENTRAIS ELETRICAS S A,1030052263,1557823,São Paulo,SP,-23.441668,-46.590832,2033-08-17,-1,,STEL,J3E,1K00


In [13]:
subset = ['Frequência', 'Num_Serviço', 'Status', 'Classe', 'Fistel', 'Número_da_Estação', 'Latitude', 'Longitude', 'Classe_Emissão', 'Largura_Emissão']
# double = base.duplicated(subset=subset, keep=False)
# base[double]

base.drop_duplicates(subset, keep='first').shape

(881692, 17)

In [105]:
subset = base.columns[:-2]
# double = base.duplicated(subset=subset, keep='first')
# base[double]

base.drop_duplicates(base.columns[:-2], keep='first').reset_index(drop=True)
# base.shape

Unnamed: 0,Frequência,Num_Serviço,Status,Classe,Entidade,Fistel,Número_da_Estação,Município,UF,Latitude,Longitude,Validade_RF,Num_Ato,Data_Ato,Fonte,Classe_Emissão,Largura_Emissão
26,0.038,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,756830,Piraí,RJ,-22.629444,-43.895832,2028-12-12,-1,,STEL,N0N,1K00
33,0.040,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,758124,Petrópolis,RJ,-22.433332,-43.166668,2028-12-12,-1,,STEL,A3E,5K00
37,0.040,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,758051,Carmo,RJ,-21.840000,-42.570000,2028-12-12,-1,,STEL,N0N,1K00
58,0.044,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,814180,Volta Redonda,RJ,-22.513611,-44.089722,2028-12-12,-1,,STEL,A3E,5K00
62,0.045,19,L,OP,LIGHT SERVICOS DE ELETRICIDADE S A,01030063621,749680,Rio de Janeiro,RJ,-22.966667,-43.233334,2028-12-12,-1,,STEL,A3E,5K00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
883461,82125.000,19,L,FX,TIM S A,50417425295,1007302558,Ribeirão das Neves,MG,-19.768250,-44.082527,2032-08-31,-1,,STEL,Q7W,62M5
883467,82125.000,19,L,FX,TIM S A,50417425295,1005319658,Uberlândia,MG,-18.942499,-48.300556,2032-08-31,-1,,STEL,Q7W,62M5
883481,82125.000,19,L,FX,TIM S A,50417425295,1005325984,Araxá,MG,-19.582222,-46.953888,2032-08-31,-1,,STEL,Q7W,62M5
883514,82125.000,19,L,FX,TIM S A,50417425295,1008765390,Aparecida de Goiânia,GO,-16.805166,-49.329556,2032-08-31,-1,,STEL,Q7W,62M5


In [14]:
base[base.Número_da_Estação == 1005304065]

Unnamed: 0,Frequência,Num_Serviço,Status,Classe,Entidade,Fistel,Número_da_Estação,Município,UF,Latitude,Longitude,Validade_RF,Num_Ato,Data_Ato,Fonte,Classe_Emissão,Largura_Emissão
744796,8118.32,19,L,FX,TIM S A,50417425295,1005304065,Sabará,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,29M7
825221,19343.0,19,L,FX,TIM S A,50417425295,1005304065,Sabará,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,55M0
831552,19453.0,19,L,FX,TIM S A,50417425295,1005304065,Sabará,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,55M0
865366,23275.0,19,L,FX,TIM S A,50417425295,1005304065,Sabará,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,D7W,56M0
881466,82125.0,19,L,FX,TIM S A,50417425295,1005304065,Sabará,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,G7W,2G00
881467,82125.0,19,L,FX,TIM S A,50417425295,1005304065,Sabará,MG,-19.897028,-43.807056,2032-08-31,-1,,STEL,Q7W,62M5


In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted constants.ipynb.
Converted filter.ipynb.
Converted index.ipynb.
Converted queries.ipynb.
