In [1]:
import pandas as pd
import duckdb as db

In [3]:
vra_azu_airports = pd.read_csv(r"C:\Users\USER\Desktop\anac_reg_flights\data_dictionary\vra_azu_airports.csv")
countries_csv = pd.read_csv(r"C:\Users\USER\Desktop\anac_reg_flights\misc_data\sources\countries.csv")
airports = pd.read_csv(r"C:\Users\USER\Desktop\anac_reg_flights\misc_data\sources\airports.csv")

In [4]:
vra_azu_airports = db.sql(
    """
    select
        a.airport_icao,
        coalesce(trim(b.code),'NO IATA')  as airport_iata,
        b.name as airport_name,
        b.latitude,
        b.longitude,
        b.country as country_iso,
        c.name as country
    from vra_azu_airports as a
    left join airports as b on a.airport_icao = b.icao
    left join countries_csv as c on b.country = c.code
    order by airport_icao asc
    """
).df()

In [5]:
vra_azu_airports.head()

Unnamed: 0,airport_icao,airport_iata,airport_name,latitude,longitude,country_iso,country
0,EBBR,BRU,Brussels Airport,50.901034,4.478627,BE,Belgium
1,EHAM,AMS,Amsterdam Airport Schiphol,52.326979,4.741505,NL,Netherlands
2,KFLL,FLL,Fort Lauderdale-Hollywood International Airport,26.072017,-80.150997,US,United States
3,KMCO,MCO,Orlando International Airport,28.412904,-81.309443,US,United States
4,KORD,ORD,O'Hare International Airport,41.977957,-87.909176,US,United States


Get records where airport_iata is missing.

The following missing information will be manually searched through the internet because one site or source can't provide all information for the missing information.

In [6]:
na_vra_azu_airports = db.sql(
    """
    select *
    from vra_azu_airports
    where airport_iata = 'NO IATA'
    """
).df()

In [7]:
na_vra_azu_airports

Unnamed: 0,airport_icao,airport_iata,airport_name,latitude,longitude,country_iso,country
0,KPVG,NO IATA,,,,,
1,SBFE,NO IATA,,,,,
2,SBQV,NO IATA,,,,,
3,SNGV,NO IATA,,,,,
4,SSPB,NO IATA,,,,,
5,SSZW,NO IATA,,,,,
6,SWJI,NO IATA,,,,,
7,SWKN,NO IATA,,,,,
8,SWKQ,NO IATA,,,,,
9,SWSI,NO IATA,,,,,


In [9]:
missing_airports = {
    'SSZW':['PGZ','Ponta Grossa Airport',25.1947,50.1441,'BR','Brazil'],
    'SWKN':['CLV','Caldas Novas Airport',17.725,48.606389,'BR','Brazil'],
    'SWJI':['JPR','Ji-Paraná Airport',10.870556,-61.846667,'BR','Brazil'],
    'SWKQ':['NO IATA','Serra da Capivara Airport',9.083333,-42.644722,'BR','Brazil'],
    'SWSI':['OPS','Presidente João Batista Figueiredo Airport',-11.885,-55.586111,'BR','Brazil'],
    'SBQV':['NO IATA','Pedro Otacílio Figueiredo Airport',14.863611, -40.863056,'BR','Brazil'],
    'SNGV':['GVR','Aeroporto Coronel Altino Machado',-18.8968, -41.9861,'BR','Brazil'],
    'KPVG':['NO IATA','Hampton Roads Executive Airport',36.780278, -76.448889,'US','United States'],
    'SBFE':['FEC','Feira de Santana Airport',-12.200556, -38.906389,'BR','Brazil'],
    'SSPB':['PTO','Pato Branco Airport',-26.217222, -52.694444,'BR','Brazil']
}

Missing airports information can be searched here: https://airportcodes.aero/icao

Other information not available on this site can be searched through wikipedia

In [8]:
airport_icao_list = na_vra_azu_airports['airport_icao'].to_list()

In [10]:
for i in airport_icao_list:
    na_vra_azu_airports.loc[
        na_vra_azu_airports['airport_icao'] == i,
        ['airport_iata','airport_name','latitude','longitude','country_iso','country']
    ] = [missing_airports[i][0],
         missing_airports[i][1],
         missing_airports[i][2],
         missing_airports[i][3],
         missing_airports[i][4],
         missing_airports[i][5],
         ]

In [11]:
na_vra_azu_airports.head()

Unnamed: 0,airport_icao,airport_iata,airport_name,latitude,longitude,country_iso,country
0,KPVG,NO IATA,Hampton Roads Executive Airport,36.780278,-76.448889,US,United States
1,SBFE,FEC,Feira de Santana Airport,-12.200556,-38.906389,BR,Brazil
2,SBQV,NO IATA,Pedro Otacílio Figueiredo Airport,14.863611,-40.863056,BR,Brazil
3,SNGV,GVR,Aeroporto Coronel Altino Machado,-18.8968,-41.9861,BR,Brazil
4,SSPB,PTO,Pato Branco Airport,-26.217222,-52.694444,BR,Brazil


Merge `na_vra_azu_airports` to `vra_azu_airports_non_na`. Extract rows where airport_iata != 'NO IATA'

In [12]:
vra_azu_airports_non_na = db.sql(
    """
    select *
    from vra_azu_airports
    where airport_iata != 'NO IATA'
    
    """
).df()

In [13]:
vra_azu_airports_final = pd.concat([vra_azu_airports_non_na,na_vra_azu_airports], ignore_index=True)

In [14]:
vra_azu_airports_final.head()

Unnamed: 0,airport_icao,airport_iata,airport_name,latitude,longitude,country_iso,country
0,EBBR,BRU,Brussels Airport,50.901034,4.478627,BE,Belgium
1,EHAM,AMS,Amsterdam Airport Schiphol,52.326979,4.741505,NL,Netherlands
2,KFLL,FLL,Fort Lauderdale-Hollywood International Airport,26.072017,-80.150997,US,United States
3,KMCO,MCO,Orlando International Airport,28.412904,-81.309443,US,United States
4,KORD,ORD,O'Hare International Airport,41.977957,-87.909176,US,United States


In [15]:
vra_azu_airports_final['latitude'] = vra_azu_airports_final['latitude'].round(6)
vra_azu_airports_final['longitude'] = vra_azu_airports_final['longitude'].round(6)


In [16]:
vra_azu_airports_final.head()

Unnamed: 0,airport_icao,airport_iata,airport_name,latitude,longitude,country_iso,country
0,EBBR,BRU,Brussels Airport,50.901034,4.478627,BE,Belgium
1,EHAM,AMS,Amsterdam Airport Schiphol,52.326979,4.741505,NL,Netherlands
2,KFLL,FLL,Fort Lauderdale-Hollywood International Airport,26.072017,-80.150997,US,United States
3,KMCO,MCO,Orlando International Airport,28.412904,-81.309443,US,United States
4,KORD,ORD,O'Hare International Airport,41.977957,-87.909176,US,United States


In [70]:
vra_azu_airports_final.to_csv(r"C:\Users\USER\Desktop\anac_reg_flights\data_dictionary\vra_azu_airports_final.csv",index=False)