# Query and Join Data
This notebook demonstrates how to query and join data from the models to resemble an imported CSV file. Missing data will be represented as NaN.

In [1]:
from models import Airline, Airport, Airplane, Flight, FlightDetails
from sqlalchemy.orm import sessionmaker, aliased
from sqlalchemy import create_engine
import pandas as pd

# Set up the database connection
engine = create_engine("sqlite:///flights.db")
Session = sessionmaker(bind=engine)
session = Session()

In [2]:
# Alias the Airport table for departure and arrival airports
DepartureAirport = aliased(Airport)
ArrivalAirport = aliased(Airport)

# Query and join data using SQLAlchemy ORM with outer joins
query = (
    session.query(
        Flight.id.label("flight_id"),
        Flight.status,
        FlightDetails.flight_number,
        FlightDetails.call_sign,
        Airline.airline_name,
        Airline.airline_iata,
        Airline.airline_icao,
        Airplane.aircraft_model,
        Airplane.aircraft_reg,
        DepartureAirport.airport_name.label("departure_airport_name"),
        DepartureAirport.airport_iata.label("departure_airport_iata"),
        DepartureAirport.airport_icao.label("departure_airport_icao"),
        DepartureAirport.timezone.label("departure_timezone"),
        DepartureAirport.latitude.label("departure_latitude"),
        DepartureAirport.longitude.label("departure_longitude"),
        ArrivalAirport.airport_name.label("arrival_airport_name"),
        ArrivalAirport.airport_iata.label("arrival_airport_iata"),
        ArrivalAirport.airport_icao.label("arrival_airport_icao"),
        ArrivalAirport.timezone.label("arrival_timezone"),
        ArrivalAirport.latitude.label("arrival_latitude"),
        ArrivalAirport.longitude.label("arrival_longitude"),
        Flight.dep_date_time_UTC.label("departure_datetime"),
        Flight.dep_rev_date_time_UTC.label("departure_datetime_revised"),
        Flight.arr_date_time_UTC.label("arrival_datetime"),
        Flight.arr_rev_date_time_UTC.label("arrival_datetime_revised"),
    )
    .outerjoin(FlightDetails, Flight.flight_details_id == FlightDetails.id)
    .outerjoin(Airline, Flight.arline_id == Airline.id)  # Corrected typo
    .outerjoin(Airplane, Flight.airplane_id == Airplane.id)
    .outerjoin(DepartureAirport, Flight.dep_airport_id == DepartureAirport.id)
    .outerjoin(ArrivalAirport, Flight.arr_airport_id == ArrivalAirport.id)
)

# Convert the query result to a Pandas DataFrame
joined_data = pd.DataFrame(query.all(), columns=[
    "flight_id", "status", "flight_number", "call_sign", "airline_name", 
    "airline_iata", "airline_icao", "aircraft_model", "aircraft_reg", 
    "departure_airport_name", "departure_airport_iata", "departure_airport_icao", 
    "departure_timezone", "departure_latitude", "departure_longitude", "arrival_airport_name", "arrival_airport_iata",
    "arrival_airport_icao", "arrival_timezone", "arrival_latitude", "arrival_longitude", "departure_datetime", "departure_datetime_revised",
    "arrival_datetime", "arrival_datetime_revised"
])

In [3]:
joined_data.head()

Unnamed: 0,flight_id,status,flight_number,call_sign,airline_name,airline_iata,airline_icao,aircraft_model,aircraft_reg,departure_airport_name,...,arrival_airport_name,arrival_airport_iata,arrival_airport_icao,arrival_timezone,arrival_latitude,arrival_longitude,departure_datetime,departure_datetime_revised,arrival_datetime,arrival_datetime_revised
0,1,Arrived,KL 1309,KLM15H,KLM,KL,KLM,Embraer 190,PH-EZH,Amsterdam,...,Gdańsk,GDN,EPGD,Europe/Warsaw,54.380978,18.468655,2025-05-15 18:55:00,2025-05-15 19:11:00,2025-05-15 20:35:00,2025-05-15 20:36:00
1,2,Arrived,FR 4094,RYR37VT,Ryanair,FR,RYR,Boeing 737-800,EI-EVN,Manchester,...,Gdańsk,GDN,EPGD,Europe/Warsaw,54.380978,18.468655,2025-05-15 18:20:00,2025-05-15 18:31:00,2025-05-15 20:35:00,2025-05-15 20:42:00
2,3,Arrived,FR 4943,RYR4943,Ryanair,FR,RYR,Boeing 737-800,SP-RSU,Torp,...,Gdańsk,GDN,EPGD,Europe/Warsaw,54.380978,18.468655,2025-05-15 20:05:00,2025-05-15 20:05:00,2025-05-15 21:10:00,2025-05-15 20:48:00
3,4,Arrived,W6 1742,,Wizz Air,W6,WZZ,Airbus A321,,Copenhagen,...,Gdańsk,GDN,EPGD,Europe/Warsaw,54.380978,18.468655,2025-05-15 20:05:00,2025-05-15 20:05:00,2025-05-15 21:10:00,2025-05-15 21:01:00
4,5,Arrived,FR 4678,RYR1TP,Ryanair,FR,RYR,Boeing 737-800,9H-QDC,Stockholm,...,Gdańsk,GDN,EPGD,Europe/Warsaw,54.380978,18.468655,2025-05-15 18:45:00,2025-05-15 19:03:00,2025-05-15 21:30:00,2025-05-15 21:46:00


In [4]:
# Select specific columns to keep
columns_to_keep = [
    "flight_number",
    "call_sign",
    "status",
    "airline_name",
    "airline_iata",
    "airline_icao",
    "aircraft_reg",
    "aircraft_model",
    "departure_airport_name",
    "departure_airport_iata",
    "departure_airport_icao",
    "departure_timezone",
    "departure_datetime",
    "departure_datetime_revised",
    "departure_latitude",
    "departure_longitude",
    "arrival_airport_name",
    "arrival_airport_iata",
    "arrival_airport_icao",
    "arrival_timezone",
    "arrival_datetime",
    "arrival_datetime_revised",
    "arrival_latitude",
    "arrival_longitude"
]

cleaned_data = joined_data[columns_to_keep]

In [5]:
cleaned_data.head()

Unnamed: 0,flight_number,call_sign,status,airline_name,airline_iata,airline_icao,aircraft_reg,aircraft_model,departure_airport_name,departure_airport_iata,...,departure_latitude,departure_longitude,arrival_airport_name,arrival_airport_iata,arrival_airport_icao,arrival_timezone,arrival_datetime,arrival_datetime_revised,arrival_latitude,arrival_longitude
0,KL 1309,KLM15H,Arrived,KLM,KL,KLM,PH-EZH,Embraer 190,Amsterdam,AMS,...,52.30907,4.763385,Gdańsk,GDN,EPGD,Europe/Warsaw,2025-05-15 20:35:00,2025-05-15 20:36:00,54.380978,18.468655
1,FR 4094,RYR37VT,Arrived,Ryanair,FR,RYR,EI-EVN,Boeing 737-800,Manchester,MAN,...,53.362907,-2.273354,Gdańsk,GDN,EPGD,Europe/Warsaw,2025-05-15 20:35:00,2025-05-15 20:42:00,54.380978,18.468655
2,FR 4943,RYR4943,Arrived,Ryanair,FR,RYR,SP-RSU,Boeing 737-800,Torp,TRF,...,59.178085,10.251807,Gdańsk,GDN,EPGD,Europe/Warsaw,2025-05-15 21:10:00,2025-05-15 20:48:00,54.380978,18.468655
3,W6 1742,,Arrived,Wizz Air,W6,WZZ,,Airbus A321,Copenhagen,CPH,...,55.62905,12.647601,Gdańsk,GDN,EPGD,Europe/Warsaw,2025-05-15 21:10:00,2025-05-15 21:01:00,54.380978,18.468655
4,FR 4678,RYR1TP,Arrived,Ryanair,FR,RYR,9H-QDC,Boeing 737-800,Stockholm,ARN,...,59.64982,17.930365,Gdańsk,GDN,EPGD,Europe/Warsaw,2025-05-15 21:30:00,2025-05-15 21:46:00,54.380978,18.468655


In [6]:
cleaned_data.isnull().sum()

flight_number                    0
call_sign                     2101
status                           0
airline_name                     0
airline_iata                   274
airline_icao                   248
aircraft_reg                  2084
aircraft_model                 290
departure_airport_name           0
departure_airport_iata           0
departure_airport_icao           0
departure_timezone               0
departure_datetime             568
departure_datetime_revised     568
departure_latitude               0
departure_longitude              0
arrival_airport_name             0
arrival_airport_iata             0
arrival_airport_icao             0
arrival_timezone                 0
arrival_datetime              1115
arrival_datetime_revised      1115
arrival_latitude                 0
arrival_longitude                0
dtype: int64

In [7]:
cleaned_data.to_csv("csv_retrieved/cleaned_flight_data.csv", index=False)