In [1]:
import numpy as np
import pandas as pd
import re
import openpyxl

In [2]:
infile = open('../Raw-FAA-Data/airport-data-dictionaries.xlsx', mode='r+b')

In [3]:
df = pd.read_excel(infile, sheet_name=2, index_col='Field')

pd.set_option('display.max_columns', 5)
pd.set_option('display.max_rows', 140)
pd.set_option('display.max_colwidth', None)
df.head()

Unnamed: 0_level_0,Number,Description
Field,Unnamed: 1_level_1,Unnamed: 2_level_1
SiteNumber,DLID,"Landing facility site number. The unique identifying number of the airport whose runway is being described. Together with the runway id field, this provides the unique key to a runway record."
State,DLID,Runway state post office code. The state where the landing facility is located. This was used in sorting the entire file by state and site number.
RunwayID,DLID,Runway identification. Ex. 01/19; 18L/36R (parallel runways); H1 (helipad); N/S (north/south); ALL/WAY (sealane); B1 (balloonport)
RunwayLength,A31,Physical runway length (nearest foot) (ex. 3500)
RunwayWidth,A32,Physical runway width (nearest foot) (ex. 100)


In [4]:
df.rename(columns={'Number':'category_id', 'Description': 'description'}, inplace=True)
df.head()

Unnamed: 0_level_0,category_id,description
Field,Unnamed: 1_level_1,Unnamed: 2_level_1
SiteNumber,DLID,"Landing facility site number. The unique identifying number of the airport whose runway is being described. Together with the runway id field, this provides the unique key to a runway record."
State,DLID,Runway state post office code. The state where the landing facility is located. This was used in sorting the entire file by state and site number.
RunwayID,DLID,Runway identification. Ex. 01/19; 18L/36R (parallel runways); H1 (helipad); N/S (north/south); ALL/WAY (sealane); B1 (balloonport)
RunwayLength,A31,Physical runway length (nearest foot) (ex. 3500)
RunwayWidth,A32,Physical runway width (nearest foot) (ex. 100)


In [5]:
#NOTE: 'ActivationDate' was modified to 'ActiviationDate' to match typo in source file. 

labels_to_keep = ['SiteNumber', 'State', 'RunwayID', 'RunwayLength', 'RunwayWidth', 'RunwaySurfaceTypeCondition', 'RunwaySurfaceTreatment', 'EdgeLightsIntensity', 'BaseEndID', 'BaseEndTrueAlignment', 'BaseEndILSType', 'BaseEndRightTrafficPattern', 'BaseEndMarkingsType', 'BaseEndMarkingsCondition', 'BaseEndPhysicalLatitude', 'BaseEndPhysicalLatitudeS', 'BaseEndPhysicalLongitude', 'BaseEndPhysicalLongitudeS', 'BaseEndPhysicalElevation', 'BaseEndCrossingHeight', 'BaseEndGlidePathAngle', 'BaseEndDisplacedLength', 'BaseEndVASI', 'BaseEndCenterlineLights', 'BaseEndTouchdownLights', 'BaseEndObjectDescription', 'BaseEndObjectHeight', 'ReciprocalEndID', 'ReciprocalEndTrueAlignment', 'ReciprocalEndILSType', 'ReciprocalEndRightTrafficPattern', 'ReciprocalEndMarkingsType', 'ReciprocalEndMarkingsCondition', 'ReciprocalEndPhysicalLatitude', 'ReciprocalEndPhysicalLatitudeS', 'ReciprocalEndPhysicalLongitude', 'ReciprocalEndPhysicalLongitudeS', 'ReciprocalEndPhysicalElevation', 'ReciprocalEndCrossingHeight', 'ReciprocalEndGlidePathAngle', 'ReciprocalEndDisplacedLength', 'ReciprocalEndVASI', 'ReciprocalEndCenterlineLights', 'ReciprocalEndTouchdownLights', 'ReciprocalEndObjectDescription', 'ReciprocalEndObjectHeight', 'RunwayWeightBearingCapacitySW', 'RunwayWeightBearingCapacityDW', 'RunwayWeightBearingCapacityDT', 'RunwayWeightBearingCapacityDDT', 'BaseEndTakeOffRunAvailableTORA', 'BaseEndTakeOffDistanceAvailableTODA', 'BaseEndAcltStopDistanceAvailableASDA', 'BaseEndLandingDistanceAvailableLDA', 'ReciprocalEndTakeOffRunAvailableTORA', 'ReciprocalEndTakeOffDistanceAvailableTODA', 'ReciprocalEndAcltStopDistanceAvailableASDA', 'ReciprocalEndLandingDistanceAvailableLDA']
labels_to_drop = [x for x in list(df.index) if x not in labels_to_keep]

In [6]:
df.drop(index=labels_to_drop, inplace=True)
df.head()

Unnamed: 0_level_0,category_id,description
Field,Unnamed: 1_level_1,Unnamed: 2_level_1
SiteNumber,DLID,"Landing facility site number. The unique identifying number of the airport whose runway is being described. Together with the runway id field, this provides the unique key to a runway record."
State,DLID,Runway state post office code. The state where the landing facility is located. This was used in sorting the entire file by state and site number.
RunwayID,DLID,Runway identification. Ex. 01/19; 18L/36R (parallel runways); H1 (helipad); N/S (north/south); ALL/WAY (sealane); B1 (balloonport)
RunwayLength,A31,Physical runway length (nearest foot) (ex. 3500)
RunwayWidth,A32,Physical runway width (nearest foot) (ex. 100)


In [7]:

df.index = [re.sub(r'(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|(?<=[A-Z])(?=ID)', '_', label) for label in df.index]
df.index = df.index.str.strip().str.lower()
df.index = [re.sub(r'_end', '', label) for label in df.index]
df.index = [re.sub(r'reciprocal', 'rcpr', label) for label in df.index]
df.index = [re.sub(r'rcpr_id', 'reciprocal_end_id', label) for label in df.index]
df.index = [re.sub(r'base_id', 'base_end_id', label) for label in df.index]
df.index = [re.sub(r'condition', 'cond', label) for label in df.index]
df.index = [re.sub(r'_physical|_description|_traffic|', '', label) for label in df.index]
df.index = [re.sub(r'weight_bearing_capacity', 'capacity_lbs', label) for label in df.index]
df.index = [re.sub(r'_take_off_run_available|_take_off_distance_available|_aclt_stop_distance_available|_landing_distance_available',
            '', label) for label in df.index]


In [8]:
df.index.rename('category', inplace=True)

In [9]:
df.head()

Unnamed: 0_level_0,category_id,description
category,Unnamed: 1_level_1,Unnamed: 2_level_1
site_number,DLID,"Landing facility site number. The unique identifying number of the airport whose runway is being described. Together with the runway id field, this provides the unique key to a runway record."
state,DLID,Runway state post office code. The state where the landing facility is located. This was used in sorting the entire file by state and site number.
runway_id,DLID,Runway identification. Ex. 01/19; 18L/36R (parallel runways); H1 (helipad); N/S (north/south); ALL/WAY (sealane); B1 (balloonport)
runway_length,A31,Physical runway length (nearest foot) (ex. 3500)
runway_width,A32,Physical runway width (nearest foot) (ex. 100)


In [10]:
infile.close()

In [11]:
outfile = '../Clean-Data/runways_dict.tsv'
df.to_csv(outfile, sep='\t')

In [12]:
# outfile = open('../Clean-Data/data_dictionary.xlsx', mode='r+b')
# df_facs = pd.read_excel(outfile, sheet_name='Facilities', index_col='category')
# df_facs.head()
# outfile.close()

In [13]:
# with pd.ExcelWriter('../Clean-Data/data_dictionary.xlsx') as writer:
#     df_facs.to_excel(writer, sheet_name='Facilites', index_label='category')
#     df.to_excel(writer, sheet_name='Runways', index_label='category')
#     