# Data exploration - Airport codes

In [1]:
import pandas as pd

## Airport codes
This dataset contains a worldwide collection of airports, with their different identification codes, among other data.

Origin: https://datahub.io/core/airport-codes#data

## Data info

In [2]:
df = pd.read_csv("../data/airport-codes_csv.csv")
df.info()
df.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56364 entries, 0 to 56363
Data columns (total 12 columns):
ident           56364 non-null object
type            56364 non-null object
name            56364 non-null object
elevation_ft    49077 non-null float64
continent       28227 non-null object
iso_country     56118 non-null object
iso_region      56364 non-null object
municipality    50523 non-null object
gps_code        41110 non-null object
iata_code       9231 non-null object
local_code      29275 non-null object
coordinates     56364 non-null object
dtypes: float64(1), object(11)
memory usage: 2.8+ MB


Unnamed: 0,ident,type,name,elevation_ft,continent,iso_country,iso_region,municipality,gps_code,iata_code,local_code,coordinates
0,00A,heliport,Total Rf Heliport,11.0,,US,US-PA,Bensalem,00A,,00A,"-74.93360137939453, 40.07080078125"
1,00AA,small_airport,Aero B Ranch Airport,3435.0,,US,US-KS,Leoti,00AA,,00AA,"-101.473911, 38.704022"
2,00AK,small_airport,Lowell Field,450.0,,US,US-AK,Anchor Point,00AK,,00AK,"-151.695999146, 59.94919968"
3,00AL,small_airport,Epps Airpark,820.0,,US,US-AL,Harvest,00AL,,00AL,"-86.77030181884766, 34.86479949951172"
4,00AR,closed,Newport Hospital & Clinic Heliport,237.0,,US,US-AR,Newport,,,,"-91.254898, 35.6087"


In [3]:
df['type'].unique()

array(['heliport', 'small_airport', 'closed', 'seaplane_base',
       'balloonport', 'medium_airport', 'large_airport'], dtype=object)

## Extract US airports

Since the main data (visitor arrivals) is for arrivals to the US, and there is not enough data to find out the origin airport.

In [4]:
us_airports = df[df['iso_country'] == 'US']
us_airports[['latitude', 'longitude']] = us_airports['coordinates'].str.split(',', expand=True).apply(pd.Series, 1)
us_airports['state'] = us_airports['iso_region'].str.split('-', expand=True).apply(pd.Series, 1)[1]
print(us_airports.shape[0])
us_airports.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


23001


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_airports['state'] = us_airports['iso_region'].str.split('-', expand=True).apply(pd.Series, 1)[1]


Unnamed: 0,ident,type,name,elevation_ft,continent,iso_country,iso_region,municipality,gps_code,iata_code,local_code,coordinates,latitude,longitude,state
0,00A,heliport,Total Rf Heliport,11.0,,US,US-PA,Bensalem,00A,,00A,"-74.93360137939453, 40.07080078125",-74.93360137939453,40.07080078125,PA
1,00AA,small_airport,Aero B Ranch Airport,3435.0,,US,US-KS,Leoti,00AA,,00AA,"-101.473911, 38.704022",-101.473911,38.704022,KS
2,00AK,small_airport,Lowell Field,450.0,,US,US-AK,Anchor Point,00AK,,00AK,"-151.695999146, 59.94919968",-151.695999146,59.94919968,AK
3,00AL,small_airport,Epps Airpark,820.0,,US,US-AL,Harvest,00AL,,00AL,"-86.77030181884766, 34.86479949951172",-86.77030181884766,34.86479949951172,AL
4,00AR,closed,Newport Hospital & Clinic Heliport,237.0,,US,US-AR,Newport,,,,"-91.254898, 35.6087",-91.254898,35.6087,AR


## Extract all different IATA codes and Local codes as Airport codes

In [5]:
us_airports_by_ident = us_airports[['state', 'municipality', 'ident', 'latitude', 'longitude']]
us_airports_by_ident.rename(columns={'ident': 'airport_code', 'municipality': 'city'}, inplace=True)

us_airports_by_iata = us_airports[['state', 'municipality', 'iata_code', 'latitude', 'longitude']]
us_airports_by_iata.rename(columns={'iata_code': 'airport_code', 'municipality': 'city'}, inplace=True)

us_airports_by_local_code = us_airports[['state', 'municipality', 'local_code', 'latitude', 'longitude']]
us_airports_by_local_code.rename(columns={'local_code': 'airport_code', 'municipality': 'city'}, inplace=True)

us_airports_all = us_airports_by_ident.append(us_airports_by_iata)
us_airports_all = us_airports_all.append(us_airports_by_local_code)

print(f"All: {us_airports_all.shape[0]}")
us_airports_all = us_airports_all[us_airports_all['airport_code'].notnull()]
us_airports_all = us_airports_all.drop_duplicates().sort_values(['state', 'airport_code'])
print(f"Non null: {us_airports_all.shape[0]}")

us_airports_all.head(20)

All: 69003
Non null: 28380


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(**kwargs)


Unnamed: 0,state,city,airport_code,latitude,longitude
2,AK,Anchor Point,00AK,-151.695999146,59.94919968
63,AK,Purkeypile,01A,-152.27000427246094,62.94359970092773
39407,AK,Crooked Creek,01AA,-158.234251,62.031757
64,AK,Seward,01AK,-149.446249008,60.1058739754
128,AK,Willow,02AK,-150.09800720214844,61.87689971923828
191,AK,Palmer,03AA,-149.284527,61.556055
192,AK,Kenai,03AK,-151.13278198242188,60.72722244262695
252,AK,Soldotna,04AA,-150.811387,60.535833
16751,AK,Chatham,05AA,-134.945999,57.5149
313,AK,Palmer,05AK,-149.1880035,61.66830063


In [6]:
us_airports_all[us_airports_all['city'] == 'New York']

Unnamed: 0,state,city,airport_code,latitude,longitude
7459,NY,New York,6N5,-73.97209930419922,40.74259948730469
7460,NY,New York,6N6,-73.81620025634766,40.84590148925781
7461,NY,New York,6N7,-73.9729,40.734001
28423,NY,New York,JFK,-73.7789,40.639801
25817,NY,New York,JPB,-73.9765,40.7533
25819,NY,New York,JRA,-74.007103,40.754501
25820,NY,New York,JRB,-74.00900269,40.70119858
28423,NY,New York,KJFK,-73.7789,40.639801
28563,NY,New York,KLGA,-73.87259674,40.77719879
28991,NY,New York,KNOP,-73.890999,40.591
