In [1]:
import pandas as pd
import duckdb as db

In [2]:
vra_2017_2023 = pd.read_parquet(r"C:\Users\USER\Desktop\vra_bfd_dataset\VRA_PARQUET\vra_2017_2023.parquet")

In [3]:
vra_2017_2023.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6057059 entries, 0 to 83564
Data columns (total 20 columns):
 #   Column                        Dtype 
---  ------                        ----- 
 0   Sigla ICAO Empresa Aérea      object
 1   Empresa Aérea                 object
 2   Número Voo                    object
 3   Código DI                     object
 4   Código Tipo Linha             object
 5   Modelo Equipamento            object
 6   Número de Assentos            object
 7   Sigla ICAO Aeroporto Origem   object
 8   Descrição Aeroporto Origem    object
 9   Partida Prevista              object
 10  Partida Real                  object
 11  Sigla ICAO Aeroporto Destino  object
 12  Descrição Aeroporto Destino   object
 13  Chegada Prevista              object
 14  Chegada Real                  object
 15  Situação Voo                  object
 16  Justificativa                 object
 17  Referência                    object
 18  Situação Partida              object
 19  Situaçã

In [None]:
db.sql(
    """
    select
        trim("Sigla ICAO Empresa Aérea") airline,
        count(*)
    from vra_2017_2023
    where airline in ('TAM','AZU','GLO')
    and "Código DI" = '0'
    group by airline
    """
)

┌─────────┬──────────────┐
│ airline │ count_star() │
│ varchar │    int64     │
├─────────┼──────────────┤
│ AZU     │      1808659 │
│ TAM     │      1430487 │
│ GLO     │      1440529 │
└─────────┴──────────────┘

Get Origin and Destination airports for regular flights

In [13]:
vra_azu_airports = db.sql(
    """
    with origin_airports as (
        select
            trim("Sigla ICAO Aeroporto Origem") as airport_icao
        from vra_2017_2023
        where trim("Código DI") = '0'
        and trim("Sigla ICAO Empresa Aérea") = 'AZU'
    ),
    destination_airports as (
        select
            trim("Sigla ICAO Aeroporto Destino") as airport_icao
        from vra_2017_2023
        where trim("Código DI") = '0'
        and trim("Sigla ICAO Empresa Aérea") = 'AZU'
    ),
    all_airports as (
        select
            airport_icao
        from origin_airports
        union
        select
            airport_icao
        from destination_airports
    )
    select *
    from all_airports
    where airport_icao is not null and airport_icao != ''
    """
).df()

In [14]:
vra_azu_airports.head()

Unnamed: 0,airport_icao
0,SBFL
1,SBUL
2,SBPA
3,LPPT
4,SBGR


Save as csv for future use

In [15]:
vra_azu_airports.to_csv(r"C:\Users\USER\Desktop\anac_reg_flights\data_dictionary\vra_azu_airports.csv",index=False)

Create vra airports data dictionary

In [49]:
vra_airports_csv = pd.read_csv(r"C:\Users\USER\Desktop\anac_reg_flights\data_dictionary\vra_airports.csv")

In [8]:
countries_csv = pd.read_csv(r"C:\Users\USER\Desktop\anac_reg_flights\misc_data\countries.csv")

In [59]:
ourairports = pd.read_csv(r"C:\Users\USER\Desktop\anac_reg_flights\data_dictionary\ourairports.csv")

In [55]:
countries_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 248 entries, 0 to 247
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              248 non-null    int64 
 1   code            247 non-null    object
 2   name            248 non-null    object
 3   continent       207 non-null    object
 4   wikipedia_link  248 non-null    object
 5   keywords        232 non-null    object
dtypes: int64(1), object(5)
memory usage: 11.8+ KB


In [38]:
countries_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 248 entries, 0 to 247
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              248 non-null    int64 
 1   code            247 non-null    object
 2   name            248 non-null    object
 3   continent       207 non-null    object
 4   wikipedia_link  248 non-null    object
 5   keywords        232 non-null    object
dtypes: int64(1), object(5)
memory usage: 11.8+ KB


In [60]:
ourairports.head()

Unnamed: 0,id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,gps_code,iata_code,local_code,home_link,wikipedia_link,keywords
0,6523,00A,heliport,Total RF Heliport,40.070985,-74.933689,11.0,,US,US-PA,Bensalem,no,K00A,,00A,https://www.penndot.pa.gov/TravelInPA/airports...,,
1,323361,00AA,small_airport,Aero B Ranch Airport,38.704022,-101.473911,3435.0,,US,US-KS,Leoti,no,00AA,,00AA,,,
2,6524,00AK,small_airport,Lowell Field,59.947733,-151.692524,450.0,,US,US-AK,Anchor Point,no,00AK,,00AK,,,
3,6525,00AL,small_airport,Epps Airpark,34.864799,-86.770302,820.0,,US,US-AL,Harvest,no,00AL,,00AL,,,
4,506791,00AN,small_airport,Katmai Lodge Airport,59.093287,-156.456699,80.0,,US,US-AK,King Salmon,no,00AN,,00AN,,,


In [68]:
vra_airports_final_2 = db.sql(
    """
    select
        a.airport_icao as icao,
        trim(b.iata_code) as iata,
        trim(b.name) as airport_name,
        b.latitude_deg as latitude,
        b.longitude_deg as longitude,
        trim(b.iso_country) as country_iso,
        trim(c.name) as country
    from vra_airports_csv as a
    left join ourairports as b on a.airport_icao = b.ident
    left join countries_csv as c on b.iso_country = c.code
    """
).df()

In [69]:
vra_airports_final_2.head()

Unnamed: 0,icao,iata,airport_name,latitude,longitude,country_iso,country
0,SAAR,ROS,Rosario Islas Malvinas International Airport,-32.9036,-60.785,AR,Argentina
1,SABE,AEP,Jorge Newbery Airpark,-34.5592,-58.4156,AR,Argentina
2,SACE,,Escuela de Aviación Militar (Military Aviation...,-31.444223,-64.283377,AR,Argentina
3,SACO,COR,Ingeniero Ambrosio Taravella Airport,-31.323601,-64.208,AR,Argentina
4,SAEZ,EZE,Minister Pistarini International Airport,-34.8222,-58.5358,AR,Argentina


In [71]:
iata_missing = db.sql(
    """
    select *
    from vra_airports_final_2
    where iata is null or iata =''
    """
).df()

In [72]:
iata_missing.head()

Unnamed: 0,icao,iata,airport_name,latitude,longitude,country_iso,country
0,SACE,,Escuela de Aviación Militar (Military Aviation...,-31.444223,-64.283377,AR,Argentina
1,SBJC,,Belém/Brigadeiro Protásio de Oliveira Airport,-1.415051,-48.459805,BR,Brazil
2,SBNT,,Natal Air Force Base,-5.91142,-35.2477,BR,Brazil
3,SBQV,,Pedro Otacílio Figueiredo Airport,-14.862567,-40.863186,BR,Brazil
4,SCRG,,La Independencia Airport,-34.173698,-70.775703,CL,Chile
