In [1]:
#| default_exp read
%load_ext autoreload
%autoreload 2
import sys
from fastcore.xtras import Path

# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))

In [2]:
from itertools import product
from tqdm.auto import tqdm
import pandas as pd
from geopy.distance import geodesic
from anateldb.read import *
from anateldb.merge import *
P = Path.cwd().parent / 'dados'

In [3]:
icao = read_icao(P).drop(columns=['Service', 'Station'])
icao.head()

Unnamed: 0,Frequency,Latitude,Longitude,Description
0,109.1,-25.6,-54.466667,"[ICAO] ILS, FOZ DO IGUACU CATARATAS"
1,109.1,-21.783333,-43.383335,"[ICAO] ILS/DME, JUIZ DE FORA"
2,109.3,-1.366667,-48.466667,"[ICAO] ILS/DME, BELEM VAL DE CAES"
3,109.3,-15.866667,-47.933334,"[ICAO] ILS/DME, BRASILIA INTL."
4,109.3,-25.533333,-49.166668,"[ICAO] ILS, CURITIBA AFONSO PEÑA"


In [4]:
aisw = read_aisw(P).drop(columns=['Service', 'Station'])
aisw.head()

Unnamed: 0,Frequency,Latitude,Longitude,Description
0,0.2,-22.923334,-42.071499,"[AISW] SBCB-RDONAV, NDB BFR, Cabo Frio"
1,0.205,-1.486333,-56.397835,"[AISW] SBTB-RDONAV, NDB PTT, Trombetas"
2,0.205,-5.386167,-35.530998,"[AISW] SNXX-RDONAV, NDB MXN, Maxaranguape"
3,0.205,-5.386167,-35.530998,"[AISW] SBNT-RDONAV, NDB MXN, CAMPO AUGUSTO SEVERO"
4,0.21,-19.561001,-46.964668,"[AISW] SBAX-RDONAV, NDB ARX, Romeu Zema"


In [5]:
aisg = read_aisg(P).drop(columns=['Service', 'Station'])
aisg.head()

Unnamed: 0,Frequency,Latitude,Longitude,Description
0,0.114,-32.340057,-54.223888,[AISG] NDB - MELO
1,0.2,-22.923334,-42.071499,[AISG] NDB - CABO FRIO OPR COSTA DO SOL OPERAD...
2,0.205,-1.486333,-56.397835,[AISG] NDB - TROMBETAS COVERAGE 100NM; OPR MIN...
3,0.205,-5.386167,-35.530998,[AISG] NDB - MAXARANGUAPE
4,0.21,-19.561001,-46.964668,[AISG] NDB - ARAXÁ COVERAGE 50NM


In [6]:
f1 = set(icao.Frequency.tolist())
f2 = set(aisw.Frequency.tolist())
f3 = set(aisg.Frequency.tolist())

In [7]:
A = f1.intersection(f2).intersection(f3)
B = f1.intersection(f2).difference(A)
C = f2.intersection(f3).difference(A)
D = f1.intersection(f3).difference(A)

In [8]:
print(f'{len(A) = }', 
      f'{len(B) = }', 
      f'{len(C) = }', 
      f'{len(D) = }')

len(A) = 74 len(B) = 174 len(C) = 46 len(D) = 8


In [9]:
COLS = ['Latitude', 'Longitude', 'Description']
df = pd.DataFrame(columns=['Frequency', 'Latitude', 'Longitude', 'Description'])
DIST = 0.5

def check_add_row(df, f, f1, f2, sa, sb):
    if (f1.Index in sa) and (f2.Index in sb):
        lat = (f1.Latitude + f2.Latitude) / 2
        long = (f1.Longitude + f2.Longitude) / 2
        desc = f'{f1.Description} | {f2.Description}'
        d = {'Frequency': f, 'Latitude': lat, 'Longitude': long, 'Description': desc}
        sa.pop(f1.Index)
        sb.pop(f2.Index)
        return pd.concat([df, pd.DataFrame(d, index=[0])], ignore_index=True)
    return df

def get_subsets(f, *args):
    return [{s.Index: s for s in df[df.Frequency == f, COLS].itertuples()} for df in args]

total = 0
for f in tqdm(A):
    sa, sb, sc = get_subsets(f, icao, aisw, aisg)
    total += len(sa) + len(sb) + len(sc)
    if all([sa, sb, sc]):
        for fa, fb, fc in list(product(sa.copy().values(), sb.copy().values(), sc.copy().values())):
            ca = (fa.Latitude, fa.Longitude)
            cb = (fb.Latitude, fb.Longitude)
            cc = (fc.Latitude, fc.Longitude)
            dab = geodesic(ca, cb).km
            dac = geodesic(ca, cc).km
            dbc = geodesic(cb, cc).km
            if all(d <= DIST for d in [dab, dac, dbc]):
                if (fa.Index in sa) and (fb.Index in sb) and (fc.Index in sc):
                    lat = (ca[0] + cb[0] + cc[0]) / 3
                    long = (ca[1] + cb[1] + cc[1]) / 3
                    desc = ' | '.join([fa.Description, fb.Description, fc.Description])
                    d = {'Frequency': f, 'Latitude': lat, 'Longitude': long, 'Description': desc}
                    df = pd.concat([df, pd.DataFrame(d, index=[0])], ignore_index=True)
                    sa.pop(fa.Index)
                    sb.pop(fb.Index)
                    sc.pop(fc.Index)
            elif all(d > DIST for d in [dac, dbc]):
                df = check_add_row(df, f, fa, fb, sa, sb)
            elif all(d > DIST for d in [dab, dac]):
                df = check_add_row(df, f, fa, fc, sb, sc)
            elif all(d > DIST for d in [dab, dbc]):
                df = check_add_row(df, f, fa, fc, sa, sc)
    for reg in [sa, sb, sc]:
        for r in reg.values():
            r = {'Frequency': f, 
                    'Latitude': r.Latitude, 
                    'Longitude': r.Longitude, 
                    'Description': r.Description}
            df = pd.concat([df, pd.DataFrame(r, index=[0])], ignore_index=True)
                    


  0%|          | 0/74 [00:00<?, ?it/s]

In [10]:
df = df.sort_values('Frequency')
df

Unnamed: 0,Frequency,Latitude,Longitude,Description
279,112.0,-29.709797,-53.712704,"[AISW] SBSM-RDONAV, VOR/DME SMA, Santa Maria |..."
282,112.0,-0.05,-51.066666,"[ICAO] VOR/DME, MACAPA INTL."
281,112.0,-7.6,-72.76667,"[ICAO] VOR/DME, CRUZEIRO DO SUL INTL."
280,112.0,-23.0,-47.133335,"[ICAO] VOR/DME, CAMPINAS VIRACOPOS, SP"
285,112.0,0.052246,-51.073112,[AISG] VOR - MACAPÁ OPR INFRAERO
...,...,...,...,...
162,1209.0,-15.865013,-47.900188,[AISG] DME - KUBITSCHEK 122X
180,1211.0,-19.834583,-44.001751,"[DOC] VOR/DME, BELO HORIZONTE PAMPULHA (Ground..."
181,1211.0,4.693,-61.028831,[AISG] DME - LA DIVINA PASTORA 124X
182,1211.0,-25.778656,-49.763241,[AISG] DME - LAPA-PR 124X


In [11]:
for f in tqdm(B):
    sa, sb = get_subsets(f, icao, aisw)
    total += len(sa) + len(sb)
    if all([sa, sb]):
        for fa, fb in list(product(sa.copy().values(), sb.copy().values())):
            ca = (fa.Latitude, fa.Longitude)
            cb = (fb.Latitude, fb.Longitude)
            dab = geodesic(ca, cb).km
            if dab <= DIST:
                df = check_add_row(df, f, fa, fb, sa, sb)
                sa.pop(fa.Index)
                sb.pop(fb.Index)
    for reg in [sa, sb]:
        for r in reg.values():
            r = {'Frequency': f, 
                    'Latitude': r.Latitude, 
                    'Longitude': r.Longitude, 
                    'Description': r.Description}
            df = pd.concat([df, pd.DataFrame(r, index=[0])], ignore_index=True)                   


  0%|          | 0/174 [00:00<?, ?it/s]

In [12]:
df = df.sort_values('Frequency')
df

Unnamed: 0,Frequency,Latitude,Longitude,Description
386,109.1,-25.6,-54.466667,"[ICAO] ILS, FOZ DO IGUACU CATARATAS"
389,109.1,-25.602833,-54.4785,"[AISW] SBFI-RDONAV, ILS/DME 14 IFI, Cataratas"
387,109.1,-21.783333,-43.383335,"[ICAO] ILS/DME, JUIZ DE FORA"
388,109.1,-7.2745,-35.889,"[AISW] SBKG-RDONAV, LOC/DME 15 IKG, Presidente..."
630,109.3,-8.722333,-63.901669,"[AISW] SBPV-RDONAV, ILS/DME 19 IPV, Governador..."
...,...,...,...,...
335,1209.0,-10.024174,-58.916761,"[DOC] VOR/DME, AMAZONICA (Ground-based DME) | ..."
338,1211.0,-19.834583,-44.001751,"[DOC] VOR/DME, BELO HORIZONTE PAMPULHA (Ground..."
339,1211.0,4.693,-61.028831,[AISG] DME - LA DIVINA PASTORA 124X
340,1211.0,-25.778656,-49.763241,[AISG] DME - LAPA-PR 124X


In [13]:
for f in tqdm(C):
    sb = list(aisw.loc[aisw.Frequency == f, cols].itertuples())
    sb = {s.Index: s for s in sb}
    sc = list(aisg.loc[aisg.Frequency == f, cols].itertuples())
    sc = {s.Index: s for s in sc}
    total += len(sb) + len(sc)
    if all([sb, sc]):
        for fb, fc in list(product(sb.copy().values(), sc.copy().values())):
            cb = (fb.Latitude, fb.Longitude)
            cc = (fc.Latitude, fc.Longitude)
            dbc = geodesic(cb, cc).km
            if (fb.Index in sb) and (fc.Index in sc) and (dbc <= DIST):
                df = add_row(df, f, fb, fc)
                sb.pop(fb.Index)
                sc.pop(fc.Index)
    for reg in [sb, sc]:
        for r in reg.values():
            r = {'Frequency': f, 
                    'Latitude': r.Latitude, 
                    'Longitude': r.Longitude, 
                    'Description': r.Description}
            df = pd.concat([df, pd.DataFrame(r, index=[0])], ignore_index=True)                   


  0%|          | 0/46 [00:00<?, ?it/s]

In [14]:
df = df.sort_values('Frequency')
df

Unnamed: 0,Frequency,Latitude,Longitude,Description
1477,0.2,-22.923334,-42.071499,"[AISW] SBCB-RDONAV, NDB BFR, Cabo Frio | [AISG..."
1508,0.205,-5.386167,-35.530998,"[AISW] SNXX-RDONAV, NDB MXN, Maxaranguape | [A..."
1509,0.205,-5.386167,-35.530998,"[AISW] SBNT-RDONAV, NDB MXN, CAMPO AUGUSTO SEVERO"
1507,0.205,-1.486333,-56.397835,"[AISW] SBTB-RDONAV, NDB PTT, Trombetas | [AISG..."
1504,0.21,-19.561001,-46.964668,"[AISW] SBAX-RDONAV, NDB ARX, Romeu Zema | [AIS..."
...,...,...,...,...
1464,1209.0,-10.024174,-58.916761,"[DOC] VOR/DME, AMAZONICA (Ground-based DME) | ..."
1468,1211.0,-19.83577,-44.003563,[AISG] DME - BELO HORIZONTE 124X
1467,1211.0,-25.778656,-49.763241,[AISG] DME - LAPA-PR 124X
1466,1211.0,4.693,-61.028831,[AISG] DME - LA DIVINA PASTORA 124X


In [15]:
for f in tqdm(D):
    sa = list(icao.loc[icao.Frequency == f, cols].itertuples())
    sa = {s.Index: s for s in sa}
    sc = list(aisg.loc[aisg.Frequency == f, cols].itertuples())
    sc = {s.Index: s for s in sc}
    total += len(sa) + len(sc)
    if all([sa, sc]):
        for fa, fc in list(product(sa.copy().values(), sc.copy().values())):
            ca = (fa.Latitude, fa.Longitude)
            cc = (fc.Latitude, fc.Longitude)
            dac = geodesic(ca, cc).km
            if (fa.Index in sa) and (fc.Index in sc) and (dac <= DIST):
                df = add_row(df, f, fa, fc)
                sa.pop(fa.Index)
                sc.pop(fc.Index)
    for reg in [sa, sc]:
        for r in reg.values():
            r = {'Frequency': f, 
                    'Latitude': r.Latitude, 
                    'Longitude': r.Longitude, 
                    'Description': r.Description}
            df = pd.concat([df, pd.DataFrame(r, index=[0])], ignore_index=True)                   


  0%|          | 0/8 [00:00<?, ?it/s]

In [16]:
df = df.sort_values('Frequency')
df

Unnamed: 0,Frequency,Latitude,Longitude,Description
0,0.2,-22.923334,-42.071499,"[AISW] SBCB-RDONAV, NDB BFR, Cabo Frio | [AISG..."
1,0.205,-5.386167,-35.530998,"[AISW] SNXX-RDONAV, NDB MXN, Maxaranguape | [A..."
2,0.205,-5.386167,-35.530998,"[AISW] SBNT-RDONAV, NDB MXN, CAMPO AUGUSTO SEVERO"
3,0.205,-1.486333,-56.397835,"[AISW] SBTB-RDONAV, NDB PTT, Trombetas | [AISG..."
4,0.21,-19.561001,-46.964668,"[AISW] SBAX-RDONAV, NDB ARX, Romeu Zema | [AIS..."
...,...,...,...,...
1539,1209.0,-4.195,-69.940552,[AISG] DME - LETÍCIA 122X
1544,1211.0,-19.834583,-44.001751,"[DOC] VOR/DME, BELO HORIZONTE PAMPULHA (Ground..."
1543,1211.0,4.693,-61.028831,[AISG] DME - LA DIVINA PASTORA 124X
1542,1211.0,-25.778656,-49.763241,[AISG] DME - LAPA-PR 124X


In [17]:
total

2000

In [18]:
three_merges = df[df.Description.str.contains('\|.*\|')]
three_merges

Unnamed: 0,Frequency,Latitude,Longitude,Description
159,113.2,-3.249568,-52.248437,"[ICAO] VOR/DME, ALTAMIRA | [AISW] SBHT-RDONAV,..."
233,116.2,-22.950984,-46.568768,"[ICAO] VOR/DME, BRAGANCA | [AISW] SBBP-RDONAV,..."
1517,1196.0,-22.950953,-46.568799,"[DOC] VOR/DME, BRAGANCA (Ground-based DME) | [..."


In [19]:
two_merges = df[(df.Description.str.contains('[\|]{1}')) & (~df.index.isin(three_merges.index))]
two_merges


Unnamed: 0,Frequency,Latitude,Longitude,Description
0,0.2,-22.923334,-42.071499,"[AISW] SBCB-RDONAV, NDB BFR, Cabo Frio | [AISG..."
1,0.205,-5.386167,-35.530998,"[AISW] SNXX-RDONAV, NDB MXN, Maxaranguape | [A..."
3,0.205,-1.486333,-56.397835,"[AISW] SBTB-RDONAV, NDB PTT, Trombetas | [AISG..."
4,0.21,-19.561001,-46.964668,"[AISW] SBAX-RDONAV, NDB ARX, Romeu Zema | [AIS..."
5,0.23,-7.266,-35.892666,"[AISW] SBKG-RDONAV, NDB CPG, Presidente João S..."
...,...,...,...,...
1533,1203.0,-8.134917,-34.930334,"[DOC] VOR/DME, RECIFE GUARARAPES (Ground-based..."
1535,1204.0,-19.689023,-47.060537,"[DOC] SBAX-RDONAV, VOR/DME ARX, Romeu Zema (Gr..."
1536,1207.0,-1.384344,-48.478533,"[DOC] SBBE-RDONAV, VOR/DME BEL, Val de Cans - ..."
1540,1209.0,-10.024174,-58.916761,"[DOC] VOR/DME, AMAZONICA (Ground-based DME) | ..."


In [20]:
no_merge = df[~df.Description.str.contains('[\|]{1}')]
no_merge

Unnamed: 0,Frequency,Latitude,Longitude,Description
2,0.205,-5.386167,-35.530998,"[AISW] SBNT-RDONAV, NDB MXN, CAMPO AUGUSTO SEVERO"
9,0.25,-29.694723,-57.148056,[AISG] NDB - PASO DE LOS LIBRES OPR ARGENTINA
14,0.275,2.068833,-50.8605,[AISG] NDB - AMAPÁ COVERAGE 100NM.
23,0.3,-29.996307,-50.145004,[AISG] NDB - TRAMANDAÍ OPR MAR
29,0.33,-16.335333,-58.387165,[AISG] NDB - SAN MATIAS
...,...,...,...,...
1538,1209.0,-15.865013,-47.900188,[AISG] DME - KUBITSCHEK 122X
1539,1209.0,-4.195,-69.940552,[AISG] DME - LETÍCIA 122X
1543,1211.0,4.693,-61.028831,[AISG] DME - LA DIVINA PASTORA 124X
1542,1211.0,-25.778656,-49.763241,[AISG] DME - LAPA-PR 124X


## Sanity Check

In [21]:
assert no_merge.shape[0] + two_merges.shape[0] * 2 + three_merges.shape[0] * 3 == total, "Verifique a mesclagem das bases da aeronáutica"

In [22]:
df

Unnamed: 0,Frequency,Latitude,Longitude,Description
0,0.2,-22.923334,-42.071499,"[AISW] SBCB-RDONAV, NDB BFR, Cabo Frio | [AISG..."
1,0.205,-5.386167,-35.530998,"[AISW] SNXX-RDONAV, NDB MXN, Maxaranguape | [A..."
2,0.205,-5.386167,-35.530998,"[AISW] SBNT-RDONAV, NDB MXN, CAMPO AUGUSTO SEVERO"
3,0.205,-1.486333,-56.397835,"[AISW] SBTB-RDONAV, NDB PTT, Trombetas | [AISG..."
4,0.21,-19.561001,-46.964668,"[AISW] SBAX-RDONAV, NDB ARX, Romeu Zema | [AIS..."
...,...,...,...,...
1539,1209.0,-4.195,-69.940552,[AISG] DME - LETÍCIA 122X
1544,1211.0,-19.834583,-44.001751,"[DOC] VOR/DME, BELO HORIZONTE PAMPULHA (Ground..."
1543,1211.0,4.693,-61.028831,[AISG] DME - LA DIVINA PASTORA 124X
1542,1211.0,-25.778656,-49.763241,[AISG] DME - LAPA-PR 124X


In [29]:
df.to_parquet(P / 'aero.parquet.gzip', compression = 'gzip')

In [23]:
base = read_base(P)

In [26]:
base.shape

(914754, 17)

In [25]:
F = set(base.Frequência).intersection(df.Frequency)
len(F)

190

In [27]:
for f in tqdm(F):
    sa = list(base.loc[base.Frequência == f, cols].itertuples())
    sa = {s.Index: s for s in sa}
    sb = list(df.loc[df.Frequency == f, cols].itertuples())
    sb = {s.Index: s for s in sb}
    total += len(sa) + len(sc)
    if all([sa, sb]):
        for fa, fb in list(product(sa.copy().values(), sb.copy().values())):
            ca = (fa.Latitude, fa.Longitude)
            cb = (fb.Latitude, fb.Longitude)
            dab = geodesic(ca, cb).km
            if (fa.Index in sa) and (fb.Index in sb) and (dab <= DIST):
                df = add_row(df, f, fa, fb)
                sa.pop(fa.Index)
                sb.pop(fb.Index)
    for reg in [sa, sb]:
        for r in reg.values():
            r = {'Frequency': f, 
                    'Latitude': r.Latitude, 
                    'Longitude': r.Longitude, 
                    'Description': r.Description}
            base = pd.concat([base, pd.DataFrame(r, index=[0])], ignore_index=True)                   


  0%|          | 0/190 [00:00<?, ?it/s]

KeyError: "['Description'] not in index"