In [198]:
import pandas as pd
from pandasql import sqldf
import numpy as np
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import warnings

In [199]:
warnings.filterwarnings("ignore")

In [200]:
data = pd.read_csv("FlightDistanceTest.csv")

Checking the data information, i.e. the data types, column names, and the number of null values.

In [201]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Normalised City Pair  3000 non-null   object 
 1   Departure Code        3000 non-null   object 
 2   Arrival Code          3000 non-null   object 
 3   Departure_lat         3000 non-null   float64
 4   Departure_lon         3000 non-null   float64
 5   Arrival_lat           3000 non-null   float64
 6   Arrival_lon           3000 non-null   float64
dtypes: float64(4), object(3)
memory usage: 164.2+ KB


In [202]:
data.isnull().sum()

Normalised City Pair    0
Departure Code          0
Arrival Code            0
Departure_lat           0
Departure_lon           0
Arrival_lat             0
Arrival_lon             0
dtype: int64

In [203]:
data.head(10)

Unnamed: 0,Normalised City Pair,Departure Code,Arrival Code,Departure_lat,Departure_lon,Arrival_lat,Arrival_lon
0,"London, United Kingdom - New York, United States Of America",LHR,JFK,51.5,-0.45,40.64,-73.79
1,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,-26.1,28.23,51.47,-0.45
2,"London, United Kingdom - New York, United States Of America",LHR,JFK,51.5,-0.45,40.64,-73.79
3,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,-26.1,28.23,51.47,-0.45
4,"London, United Kingdom - Singapore, Singapore",SIN,LHR,1.3,103.98,51.47,-0.45
5,"London, United Kingdom - New York, United States Of America",JFK,LCY,40.6,-73.79,51.5,0.05
6,"London, United Kingdom - New York, United States Of America",LCY,JFK,51.5,0.05,40.64,-73.79
7,"London, United Kingdom - Newark, United States Of America",EWR,LHR,40.7,-74.18,51.47,-0.45
8,"Bombay, India - London, United Kingdom",LHR,BOM,51.5,-0.45,19.09,72.86
9,"Dubai, United Arab Emirates - London, United Kingdom",DXB,LHR,25.3,55.35,51.47,-0.45


Calculating route distances in miles based on latitude and longitude.

In [204]:
pd.set_option("display.max_colwidth", 300)

At this point, my intention was to split the strings from the "Normalized City Pair" column to create new columns for the city of departure, city of arrival, country of departure, and country of arrival. The goal was to verify the exact addresses for each coordinate and to slice strings in the "Normalized City Pair" column to check if any of the words matched the generated address. After using the *geopy* library to find the exact addresses of the coordinates, I quickly realized that this approach was impractical. This was because some addresses use different character sets (e.g. Arabic letters) or the exact address does not include the city name. For example, the exact address for the coordinates "-26.1, 28.23" is "Kosmos Road, Ekurhuleni Ward 16, Kempton Park, City of Ekurhuleni Metropolitan Municipality, Gauteng, 1627, South Africa." The corresponding row in the "Normalized City Pair" column for that location is "Johannesburg", which my method would not have detected since "Johannesburg" is not mentioned in the exact address *geopy* provided.

In [205]:
def location(latitude_and_longitude: str) -> str: # returns location (exact address) of the input coordinates
    geolocator = Nominatim(user_agent="locator")
    geolocation = geolocator.reverse(latitude_and_longitude)
    return geolocation.address if geolocation else ""
print(location("-26.1, 28.23")) # example

Kosmos Road, Ekurhuleni Ward 16, Kempton Park, City of Ekurhuleni Metropolitan Municipality, Gauteng, 1627, South Africa


Since "Normalizsed City Pair" column has been removed from the scope of my analysis, another potential anchor point could be to verify if airport codes are consistent for each set of geographic coordinates. First, I will create a temporary table that will store concatenated values of latitude and longitude (for easier interpretation), along with the airport codes for both departure and arrival. This will enable me to search for data inconsistencies using SQL.

In [206]:
# concatenating longitude and latitude into a single column
data["departure_coords"] = data["Departure_lat"].astype(str) + ", " + data["Departure_lon"].astype(str)
data["arrival_coords"] = data["Arrival_lat"].astype(str) + ", " + data["Arrival_lon"].astype(str)

In [207]:
departure_code = pd.DataFrame(data["Departure Code"].copy())
arrival_code = pd.DataFrame(data["Arrival Code"].copy())

temp_df = departure_code.join(data["departure_coords"], how = "inner")
temp_df = temp_df.join(arrival_code)
temp_df = temp_df.join(data["arrival_coords"], how = "inner")
temp_df.columns = ["departure_code", "departure_coords", "arrival_code", "arrival_coords"] # renaming columns for easier handling in sql

In [208]:
temp_df

Unnamed: 0,departure_code,departure_coords,arrival_code,arrival_coords
0,LHR,"51.5, -0.45",JFK,"40.64, -73.79"
1,JNB,"-26.1, 28.23",LHR,"51.47, -0.45"
2,LHR,"51.5, -0.45",JFK,"40.64, -73.79"
3,JNB,"-26.1, 28.23",LHR,"51.47, -0.45"
4,SIN,"1.3, 103.98",LHR,"51.47, -0.45"
...,...,...,...,...
2995,LGA,"40.8, -73.88",YYZ,"43.68, -79.61"
2996,EGE,"41.3, -85.28",EWR,"40.69, -74.18"
2997,SOU,"51.0, -1.36",IOM,"54.09, -4.63"
2998,YYZ,"43.7, -79.61",YOW,"45.32, -75.67"


Now, I will proceed with querying the above table to determine if there are any coordinates that are associated with more than one airport code (which of course, shouldn't occur).

In [209]:
sqldf(""" 
    SELECT departure_coords, COUNT(DISTINCT departure_code) AS count_of_distinct_departure_codes
    FROM temp_df
    GROUP BY departure_coords
    HAVING COUNT(DISTINCT departure_code) > 1
    ORDER BY count_of_distinct_departure_codes DESC
""")

Unnamed: 0,departure_coords,count_of_distinct_departure_codes
0,"51.5, -0.45",39
1,"50.1, 8.56",26
2,"25.3, 55.35",18
3,"49.0, 2.54",14
4,"41.6, -98.93",14
5,"34.1, -118.24",11
6,"47.5, 8.57",10
7,"1.3, 103.98",10
8,"52.3, 4.75",9
9,"48.3, 11.77",9


In [210]:
sqldf("""
    SELECT 'asssuming_1_correct' AS type, SUM(count_of_distinct_departure_codes) -  COUNT(departure_coords) as unique_incorrect_airport_codes
    FROM (
        SELECT departure_coords, COUNT(DISTINCT departure_code) AS count_of_distinct_departure_codes
        FROM temp_df
        GROUP BY departure_coords
        HAVING COUNT(DISTINCT departure_code) > 1
    ) temp
UNION ALL
    SELECT 'asssuming_none_correct' AS type, SUM(count_of_distinct_departure_codes) as unique_incorrect_airport_codes
    FROM (
        SELECT departure_coords, COUNT(DISTINCT departure_code) AS count_of_distinct_departure_codes
        FROM temp_df
        GROUP BY departure_coords
        HAVING COUNT(DISTINCT departure_code) > 1
    ) temp
""")

Unnamed: 0,type,unique_incorrect_airport_codes
0,asssuming_1_correct,308
1,asssuming_none_correct,366


As we can see, some coordinates are associated with multiple airport codes, each with at least two codes assigned. It is safe to assume that the number of unique, incorrectly assigned departure airport codes varies between 308 and 366, depending on whether we assume at least one or none of the codes per set of coordinates is correct. The coordinates '51.5, -0.45' have the largest number of airport codes assigned to them. Let's proceed and check the specific codes matched with these coordinates.

In [211]:
sqldf("""
    SELECT DISTINCT departure_code
    FROM temp_df
    WHERE departure_coords = '51.5, -0.45'
    ORDER BY departure_code  
""")

Unnamed: 0,departure_code
0,AMS
1,ARN
2,BOM
3,BRU
4,CDG
5,CPT
6,DEL
7,DFW
8,DOH
9,DXB


There are **39 unique** airport codes associated with the coordinates "51.5, -0.45". We can use the location function mentioned earlier to decode these coordinates and determine the correct code that should be assigned to them. To verify the airport code we can look for "airport codes list" on Google for example, and use any of the resulting website.

In [212]:
location("51.5 -0.45")

'Stockley Road, West Drayton, London Borough of Hillingdon, London, Greater London, England, UB7 9BN, United Kingdom'

The coordinates "51.5, -0.45" correspond to London. Therefore, any airport codes other than "LHR" associated with these specific coordinates are, in fact, errors in the dataset.

We can also display all of the airport codes assigned to the previously mentioned coordinates.

In [213]:
sqldf("""
    SELECT departure_coords, GROUP_CONCAT(DISTINCT departure_code) AS departure_codes
    FROM (
        SELECT DISTINCT departure_coords, departure_code
        FROM temp_df
    ) unique_codes
    GROUP BY departure_coords
    HAVING COUNT(DISTINCT departure_code) > 1
""")

Unnamed: 0,departure_coords,departure_codes
0,"-1.3, 36.81","NBO,LHR,EBB,JNB,ACC"
1,"-12.0, -77.11","LIM,EZE"
2,"-20.0, 23.43","MUB,GBE"
3,"-20.4, 57.68","MRU,SEZ"
4,"-23.4, -46.48","GRU,LHR,JFK"
5,"-26.1, 28.23","JNB,CAI,GBE,DXB,HKG,DUR,GRU,LHR,DOH"
6,"-33.9, 151.23","SYD,NRT"
7,"-37.7, 144.85","MEL,LHR"
8,"1.3, 103.98","SIN,LHR,MNL,EWR,CGK,AKL,NRT,JFK,KUL,FRA"
9,"13.7, 100.77","BKK,HKG,FRA"


Now, I will query the columns containing information about arrival coordinates and arrival airport codes.

In [214]:
sqldf(""" 
    SELECT arrival_coords, COUNT(DISTINCT arrival_code) AS count_distinct_arrival_codes
    FROM temp_df
    GROUP BY arrival_coords
    HAVING COUNT(DISTINCT arrival_code) > 1;
""")

Unnamed: 0,arrival_coords,count_distinct_arrival_codes
0,"40.07, 116.58",2
1,"45.47, 9.18",2
2,"49.01, 2.54",2
3,"51.47, -0.45",2


In [215]:
sqldf("""
    SELECT arrival_coords,
    GROUP_CONCAT(DISTINCT arrival_code) as arrival_codes
    FROM (
        SELECT DISTINCT arrival_coords, arrival_code
        FROM temp_df
    ) unique_codes
    GROUP BY arrival_coords
    HAVING COUNT(DISTINCT arrival_code) > 1
""")

Unnamed: 0,arrival_coords,arrival_codes
0,"40.07, 116.58","PEK,BJS"
1,"45.47, 9.18","LIN,MXP"
2,"49.01, 2.54","CDG,PAR"
3,"51.47, -0.45","LHR,LON"


There are significantly fewer errors in the arrival coordinates and codes. However, some errors still exist.

Now, I will calculate the distances between provided coordinates and compare them with distances calculated based on airport codes and coordinates using this [database](https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat).

In [216]:
airports = pd.read_csv("https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat", header = None)

In [217]:
airports

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.081690,145.391998,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.207080,145.789001,20,10,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.826790,144.296005,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.443380,147.220001,146,10,U,Pacific/Port_Moresby,airport,OurAirports
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7693,14106,Rogachyovo Air Base,Belaya,Russia,\N,ULDA,71.616699,52.478298,272,\N,\N,\N,airport,OurAirports
7694,14107,Ulan-Ude East Airport,Ulan Ude,Russia,\N,XIUW,51.849998,107.737999,1670,\N,\N,\N,airport,OurAirports
7695,14108,Krechevitsy Air Base,Novgorod,Russia,\N,ULLK,58.625000,31.385000,85,\N,\N,\N,airport,OurAirports
7696,14109,Desierto de Atacama Airport,Copiapo,Chile,CPO,SCAT,-27.261200,-70.779198,670,\N,\N,\N,airport,OurAirports


In [218]:
drop = [0, 1, 3, 5, 8, 9, 10, 11, 12, 13] # dropping useless columns
airports = airports.drop(drop, axis = "columns")

In [219]:
airports.columns = ["city", "airport_code", "latitude", "longitude"]

In [220]:
airports["latitude"] = airports["latitude"].round(2)
airports["longitude"] = airports["longitude"].round(2)

In [221]:
airports["coordinates"] = airports["latitude"].astype(str) + ", " + airports["longitude"].astype(str) # geopy handles strings

In [222]:
drop = ["latitude", "longitude"]
airports = airports.drop(drop, axis = "columns")
airports

Unnamed: 0,city,airport_code,coordinates
0,Goroka,GKA,"-6.08, 145.39"
1,Madang,MAG,"-5.21, 145.79"
2,Mount Hagen,HGU,"-5.83, 144.3"
3,Nadzab,LAE,"-6.57, 146.73"
4,Port Moresby,POM,"-9.44, 147.22"
...,...,...,...
7693,Belaya,\N,"71.62, 52.48"
7694,Ulan Ude,\N,"51.85, 107.74"
7695,Novgorod,\N,"58.62, 31.39"
7696,Copiapo,CPO,"-27.26, -70.78"


In [223]:
data

Unnamed: 0,Normalised City Pair,Departure Code,Arrival Code,Departure_lat,Departure_lon,Arrival_lat,Arrival_lon,departure_coords,arrival_coords
0,"London, United Kingdom - New York, United States Of America",LHR,JFK,51.5,-0.45,40.64,-73.79,"51.5, -0.45","40.64, -73.79"
1,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,-26.1,28.23,51.47,-0.45,"-26.1, 28.23","51.47, -0.45"
2,"London, United Kingdom - New York, United States Of America",LHR,JFK,51.5,-0.45,40.64,-73.79,"51.5, -0.45","40.64, -73.79"
3,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,-26.1,28.23,51.47,-0.45,"-26.1, 28.23","51.47, -0.45"
4,"London, United Kingdom - Singapore, Singapore",SIN,LHR,1.3,103.98,51.47,-0.45,"1.3, 103.98","51.47, -0.45"
...,...,...,...,...,...,...,...,...,...
2995,"New York, United States Of America - Toronto, Canada",LGA,YYZ,40.8,-73.88,43.68,-79.61,"40.8, -73.88","43.68, -79.61"
2996,"Newark, United States Of America - Vail/eagle, United States Of America",EGE,EWR,41.3,-85.28,40.69,-74.18,"41.3, -85.28","40.69, -74.18"
2997,"Isle Of Man, United Kingdom - Southampton, United Kingdom",SOU,IOM,51.0,-1.36,54.09,-4.63,"51.0, -1.36","54.09, -4.63"
2998,"Ottawa, Canada - Toronto, Canada",YYZ,YOW,43.7,-79.61,45.32,-75.67,"43.7, -79.61","45.32, -75.67"


In [224]:
drop = ["Departure_lat", "Departure_lon", "Arrival_lat", "Arrival_lon"]
data = data.drop(drop, axis = "columns")
data

Unnamed: 0,Normalised City Pair,Departure Code,Arrival Code,departure_coords,arrival_coords
0,"London, United Kingdom - New York, United States Of America",LHR,JFK,"51.5, -0.45","40.64, -73.79"
1,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,"-26.1, 28.23","51.47, -0.45"
2,"London, United Kingdom - New York, United States Of America",LHR,JFK,"51.5, -0.45","40.64, -73.79"
3,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,"-26.1, 28.23","51.47, -0.45"
4,"London, United Kingdom - Singapore, Singapore",SIN,LHR,"1.3, 103.98","51.47, -0.45"
...,...,...,...,...,...
2995,"New York, United States Of America - Toronto, Canada",LGA,YYZ,"40.8, -73.88","43.68, -79.61"
2996,"Newark, United States Of America - Vail/eagle, United States Of America",EGE,EWR,"41.3, -85.28","40.69, -74.18"
2997,"Isle Of Man, United Kingdom - Southampton, United Kingdom",SOU,IOM,"51.0, -1.36","54.09, -4.63"
2998,"Ottawa, Canada - Toronto, Canada",YYZ,YOW,"43.7, -79.61","45.32, -75.67"


In [225]:
data = data.rename(columns = {"departure_coords": "dep_coords_ORIGINAL", "arrival_coords": "arr_coords_ORIGINAL"})

Checking unique airport codes in the dataset

In [226]:
codes = set(data['Departure Code'].unique()).union(set(data['Arrival Code'].unique()))

Dropping airport codes that are not present in original dataset

In [227]:
airports = airports[airports['airport_code'].isin(codes)]

In [228]:
airports.reset_index(drop = True)

Unnamed: 0,city,airport_code,coordinates
0,Keflavik,KEF,"63.99, -22.61"
1,Edmonton,YEG,"53.31, -113.58"
2,Halifax,YHZ,"44.88, -63.51"
3,Ottawa,YOW,"45.32, -75.67"
4,Quebec,YQB,"46.79, -71.39"
...,...,...,...
391,Hilton Head Island,HHH,"32.22, -80.7"
392,Doha,DOH,"25.27, 51.61"
393,Hyderabad,HYD,"17.23, 78.43"
394,Istanbul,IST,"41.28, 28.75"


In [229]:
data

Unnamed: 0,Normalised City Pair,Departure Code,Arrival Code,dep_coords_ORIGINAL,arr_coords_ORIGINAL
0,"London, United Kingdom - New York, United States Of America",LHR,JFK,"51.5, -0.45","40.64, -73.79"
1,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,"-26.1, 28.23","51.47, -0.45"
2,"London, United Kingdom - New York, United States Of America",LHR,JFK,"51.5, -0.45","40.64, -73.79"
3,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,"-26.1, 28.23","51.47, -0.45"
4,"London, United Kingdom - Singapore, Singapore",SIN,LHR,"1.3, 103.98","51.47, -0.45"
...,...,...,...,...,...
2995,"New York, United States Of America - Toronto, Canada",LGA,YYZ,"40.8, -73.88","43.68, -79.61"
2996,"Newark, United States Of America - Vail/eagle, United States Of America",EGE,EWR,"41.3, -85.28","40.69, -74.18"
2997,"Isle Of Man, United Kingdom - Southampton, United Kingdom",SOU,IOM,"51.0, -1.36","54.09, -4.63"
2998,"Ottawa, Canada - Toronto, Canada",YYZ,YOW,"43.7, -79.61","45.32, -75.67"


In [230]:
departure_merged = pd.merge(data,
                            airports[["airport_code", "coordinates"]],
                            left_on = "Departure Code",
                            right_on = "airport_code",
                            how = "left").drop("airport_code", axis = "columns")

departure_merged.rename(columns = {"coordinates": "dep_coords"}, inplace = True)


merged = pd.merge(departure_merged,
                        airports[["airport_code", "coordinates"]],
                        left_on = "Arrival Code",
                        right_on = "airport_code",
                        how = "left").drop("airport_code", axis = "columns")

merged.rename(columns = {"coordinates": "arr_coords"}, inplace = True)
merged

Unnamed: 0,Normalised City Pair,Departure Code,Arrival Code,dep_coords_ORIGINAL,arr_coords_ORIGINAL,dep_coords,arr_coords
0,"London, United Kingdom - New York, United States Of America",LHR,JFK,"51.5, -0.45","40.64, -73.79","51.47, -0.46","40.64, -73.78"
1,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,"-26.1, 28.23","51.47, -0.45","-26.14, 28.25","51.47, -0.46"
2,"London, United Kingdom - New York, United States Of America",LHR,JFK,"51.5, -0.45","40.64, -73.79","51.47, -0.46","40.64, -73.78"
3,"Johannesburg, South Africa - London, United Kingdom",JNB,LHR,"-26.1, 28.23","51.47, -0.45","-26.14, 28.25","51.47, -0.46"
4,"London, United Kingdom - Singapore, Singapore",SIN,LHR,"1.3, 103.98","51.47, -0.45","1.35, 103.99","51.47, -0.46"
...,...,...,...,...,...,...,...
2995,"New York, United States Of America - Toronto, Canada",LGA,YYZ,"40.8, -73.88","43.68, -79.61","40.78, -73.87","43.68, -79.63"
2996,"Newark, United States Of America - Vail/eagle, United States Of America",EGE,EWR,"41.3, -85.28","40.69, -74.18","39.64, -106.92","40.69, -74.17"
2997,"Isle Of Man, United Kingdom - Southampton, United Kingdom",SOU,IOM,"51.0, -1.36","54.09, -4.63","50.95, -1.36","54.08, -4.62"
2998,"Ottawa, Canada - Toronto, Canada",YYZ,YOW,"43.7, -79.61","45.32, -75.67","43.68, -79.63","45.32, -75.67"


Changing order of columns

In [231]:
new_order = ["Normalised City Pair", "Departure Code", "dep_coords",
             "dep_coords_ORIGINAL", "Arrival Code", "arr_coords", "arr_coords_ORIGINAL"]
data = merged[new_order]

In [232]:
data.isnull().sum()

Normalised City Pair     0
Departure Code           0
dep_coords               8
dep_coords_ORIGINAL      0
Arrival Code             0
arr_coords              13
arr_coords_ORIGINAL      0
dtype: int64

Imported database couldn't find matching coordinates for provided airport codes, resulting in a few null values after joining the tables. I will ignore these for now as they can be handled manually.

In [233]:
def distance(row, departure, arrival):
    departure = row[departure]
    arrival = row[arrival]

    if pd.isna(departure) or pd.isna(arrival): # handling missing data
        return None

    distances = geodesic(departure, arrival).miles
    return round(distances, 2)

In [234]:
data["distance_in_miles_calculated_with_original_coords"] = data.apply(distance, departure = "dep_coords_ORIGINAL", arrival = "arr_coords_ORIGINAL", axis = "columns")
data["distance_in_miles_calculated__between_airport_codes"] = data.apply(distance, departure = "dep_coords", arrival = "arr_coords", axis = "columns")

In [235]:
data["distance_diff"] = abs(data["distance_in_miles_calculated_with_original_coords"] - data["distance_in_miles_calculated__between_airport_codes"]) # calculating absolute diff

In [236]:
data = data.sort_values(by = "distance_diff", ascending = False)

In [237]:
data.to_excel("result.xlsx", index = False)

### Useful Links and References:
* [Getting distance between two points based on latitude/longitude](https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude)
* [Airport codes](https://www.ccra.com/airport-codes/)
* [Get address from given coordinate using python](https://stackoverflow.com/questions/60928516/get-address-from-given-coordinate-using-python)
* [How to Use SQL in pandas Using pandasql Queries](https://www.datacamp.com/tutorial/how-to-use-sql-in-pandas-using-pandasql-queries)
* [Using an API to calculate distance between two airports](https://stackoverflow.com/questions/37572731/using-an-api-to-calculate-distance-between-two-airports-two-columns-within-r)
* [Database of airports](https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat)