# Airport Detection
Detect departure and arrival airport from Flight Data Monitoring dataset.
Using KNN model to match longitude and latitude of airport.

## Import

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math

from sklearn.neighbors import KNeighborsClassifier

### Constant

In [3]:
# fdm_database_path = '../dataset/database/01_fdm_files.csv'
# fdm_locations_path = '../dataset/database/02_fdm_location.csv'
# flights_airport_output = '../dataset/database/03_flights_airport.csv'

In [3]:
fdm_database_path = '../dataset/database_local/01_fdm_files.csv'
fdm_locations_path = '../dataset/database_local/02_fdm_locations.csv'
flights_airport_output = '../dataset/database_local/03_flights_airport.csv'

## Read Data

#### 1. Read Airport Data

In [4]:
airports_filepath = '../dataset/database/airports (radians).xlsx'
airports = pd.read_excel(airports_filepath, index_col=0)
airports.head()

Unnamed: 0_level_0,icao_code,iata_code,type,name,latitude_rad,longitude_rad,elevation_ft,scheduled_service
no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,5A8,WKK,medium_airport,Aleknagik / New Airport,1.034677,-2.768406,66.0,yes
2,AF-0005,,medium_airport,Khost International Airport (U.C.),0.580926,1.218368,4204.0,no
3,AGGH,HIR,medium_airport,Honiara International Airport,-0.16455,2.793487,28.0,yes
4,AGGM,MUA,medium_airport,Munda Airport,-0.14535,2.744757,10.0,yes
5,AHJ,AHJ,medium_airport,Hongyuan Airport,0.567782,1.786384,11600.0,no


#### 2. Read FDM File Data

In [5]:
fdm_files = pd.read_csv(fdm_database_path)
fdm_files.head()

Unnamed: 0,fname,tail_id,fsize_mb,fullpath
0,686200104121532.csv,tail_686_1,68.39,../dataset/fdm/tail_686_1/686200104121532.csv
1,686200104121245.csv,tail_686_1,61.52,../dataset/fdm/tail_686_1/686200104121245.csv
2,686200104121825.csv,tail_686_2,40.71,../dataset/fdm/tail_686_2/686200104121825.csv
3,686200104130429.csv,tail_686_2,42.63,../dataset/fdm/tail_686_2/686200104130429.csv
4,686200104130652.csv,tail_687_1,95.92,../dataset/fdm/tail_687_1/686200104130652.csv


#### 3. Read FDM Location

In [6]:
fdm_locations = pd.read_csv(fdm_locations_path)
fdm_locations.head()

Unnamed: 0,fname,dep_lat,dep_lon,arr_lat,arr_lon
0,686200104121532.csv,0.585762,-1.514118,0.736623,-1.454892
1,686200104121245.csv,0.736719,-1.454814,0.585786,-1.514111
2,686200104121825.csv,0.736698,-1.454781,0.663881,-1.47655
3,686200104130429.csv,0.663908,-1.476544,0.736575,-1.454951
4,686200104130652.csv,0.736596,-1.454951,0.617748,-1.7034


In [7]:
fdm_locations_cleaned = fdm_locations[~fdm_locations.isnull().any(axis=1)]
fdm_locations_cleaned.shape

(5, 5)

## Exploratory Data Analysis

#### 1. Create Airport Locations

In [8]:
airport_locations = airports[['iata_code', 'latitude_rad', 'longitude_rad']]
airport_locations.head()

Unnamed: 0_level_0,iata_code,latitude_rad,longitude_rad
no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,WKK,1.034677,-2.768406
2,,0.580926,1.218368
3,HIR,-0.16455,2.793487
4,MUA,-0.14535,2.744757
5,AHJ,0.567782,1.786384


In [9]:
airport_locations = airport_locations.dropna()

In [10]:
airport_locations.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4470 entries, 1 to 5157
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   iata_code      4470 non-null   object 
 1   latitude_rad   4470 non-null   float64
 2   longitude_rad  4470 non-null   float64
dtypes: float64(2), object(1)
memory usage: 139.7+ KB


No null value detected

## Model Training

In [11]:
knn = KNeighborsClassifier(n_neighbors=1)

X_train = airport_locations[['latitude_rad', 'longitude_rad']]
y_train = airport_locations['iata_code']

knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=1)

### Predict Airport

#### 1. Departure Airport

In [12]:
departure_list = knn.predict(fdm_locations_cleaned[['dep_lat', 'dep_lon']])
departure_list

array(['BHM', 'DTW', 'DTW', 'LEX', 'DTW'], dtype=object)

#### 2. Arrival Airport

In [13]:
arrival_list = knn.predict(fdm_locations_cleaned[['arr_lat', 'arr_lon']])
arrival_list

array(['DTW', 'BHM', 'LEX', 'DTW', 'OKC'], dtype=object)

#### Join Prediction to Table

In [14]:
fdm_airport = fdm_locations_cleaned[['fname']].copy()
fdm_airport['dep_airport'] = departure_list
fdm_airport['arr_airport'] = arrival_list

In [15]:
fdm_airport.head()

Unnamed: 0,fname,dep_airport,arr_airport
0,686200104121532.csv,BHM,DTW
1,686200104121245.csv,DTW,BHM
2,686200104121825.csv,DTW,LEX
3,686200104130429.csv,LEX,DTW
4,686200104130652.csv,DTW,OKC


#### Merge with FDM Files

In [16]:
fdm_files

Unnamed: 0,fname,tail_id,fsize_mb,fullpath
0,686200104121532.csv,tail_686_1,68.39,../dataset/fdm/tail_686_1/686200104121532.csv
1,686200104121245.csv,tail_686_1,61.52,../dataset/fdm/tail_686_1/686200104121245.csv
2,686200104121825.csv,tail_686_2,40.71,../dataset/fdm/tail_686_2/686200104121825.csv
3,686200104130429.csv,tail_686_2,42.63,../dataset/fdm/tail_686_2/686200104130429.csv
4,686200104130652.csv,tail_687_1,95.92,../dataset/fdm/tail_687_1/686200104130652.csv


In [17]:
fdm_files_merged = fdm_files.copy()
fdm_files_merged = fdm_files_merged.join(fdm_airport.set_index('fname'),
                                         on='fname',
                                         how='left')

In [18]:
fdm_files_merged.head()

Unnamed: 0,fname,tail_id,fsize_mb,fullpath,dep_airport,arr_airport
0,686200104121532.csv,tail_686_1,68.39,../dataset/fdm/tail_686_1/686200104121532.csv,BHM,DTW
1,686200104121245.csv,tail_686_1,61.52,../dataset/fdm/tail_686_1/686200104121245.csv,DTW,BHM
2,686200104121825.csv,tail_686_2,40.71,../dataset/fdm/tail_686_2/686200104121825.csv,DTW,LEX
3,686200104130429.csv,tail_686_2,42.63,../dataset/fdm/tail_686_2/686200104130429.csv,LEX,DTW
4,686200104130652.csv,tail_687_1,95.92,../dataset/fdm/tail_687_1/686200104130652.csv,DTW,OKC


#### Reorder column

In [19]:
col_order = ['fname', 'tail_id', 'dep_airport', 'arr_airport', 'fsize_mb', 'fullpath']
fdm_files_merged = fdm_files_merged[col_order]

In [20]:
fdm_files_merged.head()

Unnamed: 0,fname,tail_id,dep_airport,arr_airport,fsize_mb,fullpath
0,686200104121532.csv,tail_686_1,BHM,DTW,68.39,../dataset/fdm/tail_686_1/686200104121532.csv
1,686200104121245.csv,tail_686_1,DTW,BHM,61.52,../dataset/fdm/tail_686_1/686200104121245.csv
2,686200104121825.csv,tail_686_2,DTW,LEX,40.71,../dataset/fdm/tail_686_2/686200104121825.csv
3,686200104130429.csv,tail_686_2,LEX,DTW,42.63,../dataset/fdm/tail_686_2/686200104130429.csv
4,686200104130652.csv,tail_687_1,DTW,OKC,95.92,../dataset/fdm/tail_687_1/686200104130652.csv


In [21]:
fdm_files_merged.to_csv(flights_airport_output, index=False)