# Query and Join Data
This notebook demonstrates how to query and join data from the models to resemble an imported CSV file.

In [24]:
# Import Required Libraries
from models import Airline, Airport, Airplane, Flight, FlightDetails
from sqlalchemy.orm import sessionmaker, aliased
from sqlalchemy import create_engine
import pandas as pd

# Set up the database connection
engine = create_engine("sqlite:///flights.db")  # Replace with your database URL
Session = sessionmaker(bind=engine)
session = Session()

In [25]:
# Perform Joins Directly in the Database
# Alias the Airport table for departure and arrival airports
DepartureAirport = aliased(Airport)
ArrivalAirport = aliased(Airport)

# Query and join data using SQLAlchemy ORM
query = (
    session.query(
        Flight.id.label("flight_id"),
        Flight.status,
        FlightDetails.flight_number,
        FlightDetails.call_sign,
        Airline.airline_name,
        Airline.airline_iata,
        Airline.airline_icao,
        Airplane.aircraft_model,
        Airplane.aircraft_reg,
        DepartureAirport.airport_name.label("departure_airport_name"),
        DepartureAirport.airport_iata.label("departure_airport_iata"),
        DepartureAirport.airport_icao.label("departure_airport_icao"),
        DepartureAirport.timezone.label("departure_timezone"),
        ArrivalAirport.airport_name.label("arrival_airport_name"),
        ArrivalAirport.airport_iata.label("arrival_airport_iata"),
        ArrivalAirport.airport_icao.label("arrival_airport_icao"),
        ArrivalAirport.timezone.label("arrival_timezone"),
        Flight.dep_date_time_UTC.label("departure_datetime"),
        Flight.arr_date_time_UTC.label("arrival_datetime"),
    )
    .join(FlightDetails, Flight.flight_details_id == FlightDetails.id)
    .join(Airline, Flight.arline_id == Airline.id)  # Corrected typo
    .join(Airplane, Flight.airplane_id == Airplane.id)
    .join(DepartureAirport, Flight.dep_airport_id == DepartureAirport.id)
    .join(ArrivalAirport, Flight.arr_airport_id == ArrivalAirport.id)
)

# Convert the query result to a Pandas DataFrame
joined_data = pd.DataFrame(query.all(), columns=[
    "flight_id", "status", "flight_number", "call_sign", "airline_name", 
    "airline_iata", "airline_icao", "aircraft_model", "aircraft_reg", 
    "departure_airport_name", "departure_airport_iata", "departure_airport_icao", 
    "departure_timezone", "arrival_airport_name", "arrival_airport_iata", 
    "arrival_airport_icao", "arrival_timezone", "departure_datetime", 
    "arrival_datetime"
])

In [26]:
# Display the first few rows of the joined data
joined_data.head()

Unnamed: 0,flight_id,status,flight_number,call_sign,airline_name,airline_iata,airline_icao,aircraft_model,aircraft_reg,departure_airport_name,departure_airport_iata,departure_airport_icao,departure_timezone,arrival_airport_name,arrival_airport_iata,arrival_airport_icao,arrival_timezone,departure_datetime,arrival_datetime
0,1,Arrived,FR 2228,RYR8447,Ryanair,FR,RYR,Boeing 737,SP-RZE,Valencia,VLC,LEVC,Europe/Madrid,Kraków,KRK,EPKK,Europe/Warsaw,2025-04-29 18:20:00,2025-04-29 21:20:00
1,2,Arrived,W6 2068,WZZ976,Wizz Air,W6,WZZ,Airbus A321 NEO,9H-WNJ,Milan,MXP,LIMC,Europe/Rome,Kraków,KRK,EPKK,Europe/Warsaw,2025-04-29 20:00:00,2025-04-29 21:55:00
2,3,Arrived,W6 2092,WZZ677,Wizz Air,W6,WZZ,Airbus A320,HA-LXV,Lyon,LYS,LFLL,Europe/Paris,Kraków,KRK,EPKK,Europe/Warsaw,2025-04-29 19:45:00,2025-04-29 21:55:00
3,4,Arrived,FR 6783,RYR4LX,Ryanair,FR,RYR,Boeing 737-800,SP-RKO,Trieste,TRS,LIPQ,Europe/Rome,Kraków,KRK,EPKK,Europe/Warsaw,2025-04-29 21:00:00,2025-04-29 22:30:00
4,5,Arrived,FR 2778,,Ryanair,FR,RYR,Boeing 737-800,,Thessaloniki,SKG,LGTS,Europe/Athens,Kraków,KRK,EPKK,Europe/Warsaw,2025-04-29 20:55:00,2025-04-29 22:55:00


In [27]:
# Select specific columns to keep
columns_to_keep = [
    "flight_number", "call_sign", "status", "airline_name", 
    "airline_iata", "airline_icao", "aircraft_model", 
    "aircraft_reg", "departure_airport_name", "departure_airport_iata", 
    "departure_airport_icao", "arrival_airport_name", 
    "arrival_airport_iata", "arrival_airport_icao", 
    "departure_datetime", "arrival_datetime"
]

cleaned_data = joined_data[columns_to_keep]