In [2]:
# Libraries
import pandas as pd
import pycountry_convert as pc
import numpy as np
import re

In [3]:
airports = pd.read_csv(r"data/airports.csv", header=None)
airports.columns = ["airport_id", "name", "city", "country", "IATA", "ICAO", "latitude", "longitude", 
                    "altitude", "time_zone", "dst", "tz_database_time_zone", "type", "source"]
airports.drop(["airport_id", "ICAO", "altitude", "time_zone", "dst", "tz_database_time_zone", "type", "source"], axis = 1, inplace = True)
airports 

Unnamed: 0,name,city,country,IATA,latitude,longitude
0,Goroka Airport,Goroka,Papua New Guinea,GKA,-6.081690,145.391998
1,Madang Airport,Madang,Papua New Guinea,MAG,-5.207080,145.789001
2,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,-5.826790,144.296005
3,Nadzab Airport,Nadzab,Papua New Guinea,LAE,-6.569803,146.725977
4,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,-9.443380,147.220001
...,...,...,...,...,...,...
7693,Rogachyovo Air Base,Belaya,Russia,\N,71.616699,52.478298
7694,Ulan-Ude East Airport,Ulan Ude,Russia,\N,51.849998,107.737999
7695,Krechevitsy Air Base,Novgorod,Russia,\N,58.625000,31.385000
7696,Desierto de Atacama Airport,Copiapo,Chile,CPO,-27.261200,-70.779198


In [4]:
def get_continent_code():
    country_codes = []
    for _, row in airports.iterrows():
        try:
            country_code = pc.country_name_to_country_alpha2(row.country, cn_name_format="default")
            continent_code = pc.country_alpha2_to_continent_code(country_code)
            country_codes.append(continent_code)
        except:
            country_codes.append(np.nan)
    return country_codes

In [5]:
airports["country_code"] = get_continent_code()
airports

Unnamed: 0,name,city,country,IATA,latitude,longitude,country_code
0,Goroka Airport,Goroka,Papua New Guinea,GKA,-6.081690,145.391998,OC
1,Madang Airport,Madang,Papua New Guinea,MAG,-5.207080,145.789001,OC
2,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,-5.826790,144.296005,OC
3,Nadzab Airport,Nadzab,Papua New Guinea,LAE,-6.569803,146.725977,OC
4,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,-9.443380,147.220001,OC
...,...,...,...,...,...,...,...
7693,Rogachyovo Air Base,Belaya,Russia,\N,71.616699,52.478298,EU
7694,Ulan-Ude East Airport,Ulan Ude,Russia,\N,51.849998,107.737999,EU
7695,Krechevitsy Air Base,Novgorod,Russia,\N,58.625000,31.385000,EU
7696,Desierto de Atacama Airport,Copiapo,Chile,CPO,-27.261200,-70.779198,SA


In [6]:
airports[airports.country_code.isnull()]["country"].unique()

array(["Cote d'Ivoire", 'Congo (Brazzaville)', 'Congo (Kinshasa)',
       'Saint Helena', 'Reunion', 'Antarctica', 'West Bank',
       'Midway Islands', 'Virgin Islands', 'Netherlands Antilles',
       'Burma', 'East Timor', 'Johnston Atoll', 'Western Sahara',
       'Wake Island'], dtype=object)

In [7]:
airports['IATA'] = airports['IATA'].replace([r'\N'],np.nan)


In [8]:
airports


Unnamed: 0,name,city,country,IATA,latitude,longitude,country_code
0,Goroka Airport,Goroka,Papua New Guinea,GKA,-6.081690,145.391998,OC
1,Madang Airport,Madang,Papua New Guinea,MAG,-5.207080,145.789001,OC
2,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,-5.826790,144.296005,OC
3,Nadzab Airport,Nadzab,Papua New Guinea,LAE,-6.569803,146.725977,OC
4,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,-9.443380,147.220001,OC
...,...,...,...,...,...,...,...
7693,Rogachyovo Air Base,Belaya,Russia,,71.616699,52.478298,EU
7694,Ulan-Ude East Airport,Ulan Ude,Russia,,51.849998,107.737999,EU
7695,Krechevitsy Air Base,Novgorod,Russia,,58.625000,31.385000,EU
7696,Desierto de Atacama Airport,Copiapo,Chile,CPO,-27.261200,-70.779198,SA


In [None]:
routes = pd.read_csv("data/routes.csv", header=None)
routes.columns = ["airline", "airline_id", "source_airport", "source_airport_id", "destination_airport", 
                    "destination_airport_id", "codeshare", "stops", "equipment"]
routes