In [82]:
## IDEAS
# allow user to choose to display arrvials, or departures, domestic or international
# display some graphics
# image path https://www.sydneyairport.com.au/flights/logo/VA
# combine all (arrivals, departures, international, domestic) into one data frame
# add two columns - flight_type, terminal_type

In [83]:
from datetime import datetime
from selenium import webdriver
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

In [84]:
driver = webdriver.Chrome()

In [85]:
# Define query strings for url
today = datetime.today().strftime('%Y-%m-%d')
flight_types  = ["arrival", "departure"]
terminal_types = ["international", "domestic"]

# Initialise empty lists
journies = []
types = []
terminals = []
stopovers = []
airlines = []
airline_logos = []
flight_numbers = []
other_flight_numbers = []
statuses = []
scheduled_times = []
estimated_times = []

# Loop through each combination of (domestic/international) x (arrival/departure )
for flight_type in flight_types:
    for terminal_type in terminal_types:
        url = "https://www.sydneyairport.com.au/flights/?query=&flightType=" + flight_type + "&terminalType=" + terminal_type + "&date=" + today + "&sortColumn=scheduled_time&ascending=true&showAll=true"
        driver.get(url)
        
        html_soup = BeautifulSoup(driver.page_source, "html.parser")
        flight_containers = html_soup.find_all("div", attrs={"class": "flight-card"})[2:]
        
        # Loop through all containers (one for each flight) to extract and store info
        for container in flight_containers:
            # Arrival / departure
            types.append(flight_type)
            
            # International / domestic
            terminals.append(terminal_type)
            
            # Origin / Destinations
            journies.append(container.find("div", attrs={"class": "destination-name"}).text)

            # Stopovers (if any)
            if container.find("div", attrs={"class": "city-via"}) is not None:
                stopovers.append(container.find("div", attrs={"class": "city-via"}).text)
            else:
                stopovers.append(None)

            # Airlines
            airlines.append(container.find("span", attrs={"class": "with-image"}).text)

            # Airline logos
            airline_logos.append(container.img['src'])

            # Flight numbers
            flight_numbers.append(container.find("div", attrs={"class": "heading-medium"}).text)

            # Alternative flight numbers (if any)
            if container.find("div", attrs={"class": "body-xsmall"}) is not None:
                other_flight_numbers.append(container.find("div", attrs={"class": "body-xsmall"}).text)
            else:
                other_flight_numbers.append(None)

            # Statuses
            statuses.append(container.find("div", attrs={"class": "status"}).text)

            # Scheduled times
            scheduled_times.append(container.find("div", attrs={"class": "large-scheduled-time"}).text)

            # Estimated times
            estimated_times.append(container.find("div", attrs={"class": "estimated-time"}).text)
    
    
# Create dataframe from lists
flights = pd.DataFrame({'journey': journies,
                        'type': types,
                        'terminal': terminals,
                        'stopover': stopovers,
                        'airline': airlines,
                        'airline_logo': airline_logos,
                        'flight_number': flight_numbers,
                        'other_flight_number': other_flight_numbers,
                        'status': statuses,
                        'scheduled_time': scheduled_times,
                        'estimated_time': estimated_times})
# Reorder columns
flights = flights[['type', 'terminal', 'journey', 'stopover', 'airline', 'airline_logo', 'flight_number', 'other_flight_number', 'status', 'scheduled_time', 'estimated_time']]

flights

Unnamed: 0,type,terminal,journey,stopover,airline,airline_logo,flight_number,other_flight_number,status,scheduled_time,estimated_time
0,arrival,international,London,Via Singapore,Qantas,/flights/logo/QF,QF2,EK5002,Arrived,05:10,05:01
1,arrival,international,London,Via Singapore,British Airways,/flights/logo/BA,BA15,"AY5915, IB4745",Arrived,05:10,05:05
2,arrival,international,Singapore,,Singapore Airlines,/flights/logo/SQ,SQ221,"A31213, AF7232, ET1317, LH9780, LX4172, TK9312...",Arrived,05:55,05:38
3,arrival,international,Dubai,,Emirates,/flights/logo/EK,EK412,QF8412,Arrived,06:05,06:10
4,arrival,international,Jakarta,,Qantas,/flights/logo/QF,QF42,,Arrived,06:05,06:04
5,arrival,international,San Francisco,,Qantas,/flights/logo/QF,QF74,AA7396,Arrived,06:05,06:02
6,arrival,international,Denpasar,,Virgin Australia,/flights/logo/VA,VA36,,Arrived,06:10,06:01
7,arrival,international,Tokyo,,Japan Airlines,/flights/logo/JL,JL771,LA7467,Arrived,06:10,06:24
8,arrival,international,Delhi,,Air India,/flights/logo/AI,AI302,,Arrived,06:10,06:40
9,arrival,international,Manila,,Qantas,/flights/logo/QF,QF20,,Arrived,06:10,06:10


In [89]:
flights[flights['status']=='Cancelled']

Unnamed: 0,type,terminal,journey,stopover,airline,airline_logo,flight_number,other_flight_number,status,scheduled_time,estimated_time
118,arrival,domestic,Melbourne,,Jetstar,/flights/logo/JQ,JQ500,,Cancelled,07:25,-
165,arrival,domestic,Cooma,,Rex,/flights/logo/ZL,ZL722,,Cancelled,10:40,-
177,arrival,domestic,Melbourne,,Qantas,/flights/logo/QF,QF422,,Cancelled,11:25,-
181,arrival,domestic,Adelaide,,Jetstar,/flights/logo/JQ,JQ763,,Cancelled,11:30,-
244,arrival,domestic,Gold Coast,,Jetstar,/flights/logo/JQ,JQ411,,Cancelled,14:50,-
288,arrival,domestic,Melbourne,,Jetstar,/flights/logo/JQ,JQ514,,Cancelled,16:45,-
302,arrival,domestic,Melbourne,,Jetstar,/flights/logo/JQ,JQ518,,Cancelled,17:30,-
343,arrival,domestic,Melbourne,,Virgin Australia,/flights/logo/VA,VA869,EY6707,Cancelled,18:40,-
358,arrival,domestic,Brisbane,,Qantas,/flights/logo/QF,QF547,,Cancelled,19:10,-
373,arrival,domestic,Melbourne,,Virgin Australia,/flights/logo/VA,VA879,EY6949,Cancelled,19:55,-
