# FAA Aircraft Registration Database

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
import matplotlib as mpl
import geopandas as gpd
import json
import numpy as np
from skimpy import clean_columns

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [4]:
# https://www.faa.gov/licenses_certificates/aircraft_certification/aircraft_registry/releasable_aircraft_download/

In [5]:
# https://registry.faa.gov/database/ReleasableAircraft.zip

### Read master and reference files

In [12]:
master = pd.read_csv(
    "/Users/stiles/data/aircraft/ReleasableAircraft/MASTER.txt", low_memory=False
)

In [13]:
reference = pd.read_csv(
    "/Users/stiles/data/aircraft/ReleasableAircraft/ACFTREF.txt", low_memory=False
)

### Clean up columns

In [None]:
master.columns = (
    master.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace("(", "", regex=False)
    .str.replace(")", "", regex=False)
    .str.replace("-", "_", regex=False)
)

In [None]:
reference.columns = (
    master.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace("(", "", regex=False)
    .str.replace(")", "", regex=False)
    .str.replace("-", "_", regex=False)
)

In [None]:
reference.loc[(reference["type_acft"] == "4"), "type"] = "Fixed wing single engine"
reference.loc[(reference["type_acft"] == "5"), "type"] = "Fixed wing multi engine"
reference.loc[(reference["type_acft"] == "6"), "type"] = "Rotorcraft"

In [None]:
master.name = master.name.str.strip()
master.certification = master.certification.str.strip()
master.type_registrant = master.type_registrant.str.strip()

### Owners

In [None]:
owners = (
    master.groupby(["type_registrant", "certification"])
    .agg("size")
    .reset_index(name="planes")
)

In [None]:
owners.sort_values("planes", ascending=False).head(10)

In [None]:
corptrans = owners[(owners["planes"] > 20) & (owners["certification"] == "1T")]

In [None]:
airlines = [
    "DELTA AIR LINES INC",
    "AMERICAN AIRLINES INC",
    "SKYWEST AIRLINES INC",
    "SWIFT AIR LLC",
    "UNITED AIRLINES INC",
    "SOUTHWEST AIRLINES CO",
    "JETBLUE AIRWAYS CORP",
    "HORIZON AIR INDUSTRIES",
    "ALASKA AIRLINES INC",
    "REPUBLIC AIRWAYS INC",
    "SPIRIT AIRLINES INC",
    "MESA AIRLINES INC",
]

In [None]:
airlines

In [None]:
cargo = [
    "UNITED PARCEL SERVICE CO",
    "FEDERAL EXPRESS CORP",
    "FEDERAL EXPRESS CORPORATION",
    "CARGO AIRCRAFT MANAGEMENT INC",
    "ATLAS AIR INC",
]

In [None]:
cargo

In [None]:
private = ["NETJETS SALES INC", "FLEXJET LLC"]

In [None]:
corptrans.sort_values(by="planes", ascending=False).head(10)

In [None]:
airline_planes = master

In [None]:
airline_planes_slim = airline_planes[["name", "n_number"]]

In [None]:
airline_planes_slim.to_csv("output/airline_planes.csv", index=False)

### Airline planes

In [None]:
american = master[master["name"].str.contains("AMERICAN AIRLINES")]

In [None]:
american.iloc[583]

In [None]:
master["n_number"] = "N" + master["n_number"]

In [None]:
src = master.merge(reference, left_on="mfr_mdl_code", right_on="code")

In [None]:
src[src["n_number"] == "N302FD"]

In [None]:
df = src.copy()

In [None]:
len(df[~df["name"].isin(airlines) | ~df["name"].isin(cargo)])

In [None]:
planes = df[
    (~df["name"].isin(airlines))
    & (~df["name"].isin(cargo))
    & (df["type_aircraft"].str.contains("4|5"))
    & (df["certification"].str.contains("1T"))
]

In [None]:
len(planes)

### Export

In [None]:
df.to_csv("/Users/stiles/data/aircraft/master_faa_owners_database.csv", index=False)