In [20]:
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


In [21]:
service = Service(executable_path="C:\\Users\\Hanna.Ta\\Projects\\plugshare-web-scraping\\chromedriver.exe")
driver = webdriver.Chrome(service=service)
driver.maximize_window()

driver.get("https://www.changiairport.com/en/flights/departures.html")
dates = ['Thursday, April 4th, 2024',
         'Friday, April 5th, 2024',
         'Saturday, April 6th, 2024',
         'Sunday, April 7th, 2024',
         'Monday, April 8th, 2024',
         'Tuesday, April 9th, 2024',
         'Wednesday, April 10th, 2024']
page_source = driver.page_source

soup = BeautifulSoup(page_source, features='html.parser')

In [22]:
def pick_a_date(date):
    date_picker = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CLASS_NAME, "react-datepicker__container"))
    )
    date_picker = driver.find_element(By.CLASS_NAME, "react-datepicker__container")
    driver.execute_script("arguments[0].scrollIntoView();", date_picker)
    driver.execute_script("arguments[0].click();", date_picker)

    date_element = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, f"//div[@aria-label='Choose {date}']"))
    )
    driver.execute_script("arguments[0].scrollIntoView();", date_element)
    driver.execute_script("arguments[0].click();", date_element)
    
def latest_flight_time():
    soup = BeautifulSoup(driver.page_source, features='html')
    flight = soup.find_all('a', class_='flightlist__item display-lg')[-1]

    flight_time = flight.find('div', class_='flightlist__item-time').get_text().split("(")[0]
    flight_time_prev = flight_time[:-5]
    flight_time = flight_time[-5:]
    return flight_time if flight_time_prev == "" else flight_time_prev

def click_next_flights():
    element = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "div.flightFilters.flightFilters-btn a.next-flights"))
    )
    # Scroll to the element
    driver.execute_script("arguments[0].scrollIntoView();", element)
    driver.execute_script("arguments[0].click();", element)

def get_flight_time(flight_item):
    flight_time_prev = flight_item.get_text().split("(")[0][:-5]
    flight_time = flight_item.get_text().split("(")[0][-5:]
    final_flight_time = flight_time if flight_time_prev == "" else flight_time_prev
    return (flight_time_prev, flight_time, final_flight_time)

def stop_click_next_flights():
    soup = BeautifulSoup(driver.page_source, features='html')
    flights = soup.find_all('a', class_='flightlist__item display-lg')[1:]

    _, _, flight_time = get_flight_time(flights[0].find('div', class_='flightlist__item-time'))

    for flight in flights[1:]:
        _, _, new_flight_time = get_flight_time(flight.find('div', class_='flightlist__item-time'))
        if new_flight_time[:2] >= flight_time[:2]:
            # print(f"{flight_time} - {new_flight_time} -> next")
            flight_time = new_flight_time
            continue
        else:
            print(f"\n>>>>> '{flight_time}' '{new_flight_time}' - STOP!")
            return True
    print(f"{flight_time} -> next", end='\n')
    return False

In [11]:
departure_df = pd.DataFrame(columns=['Date', 'Time', 'Flight Number', 'Airlines Name', 'Airport Name',
                                    'Terminal', 'Boarding info', 'Status'])

In [27]:
# Load date
date = dates[6]
date.split(',')[1].replace("st", "").replace("nd", "").replace("rd", "").replace("th", "")
date_string = ','.join([date.split(',')[0],
                        date.split(',')[1].replace("st", "").replace("nd", "").replace("rd", "").replace("th", ""),
                        date.split(',')[2]])
date_string = datetime.strptime(date_string, '%A, %B %d, %Y').strftime('%d/%m/%Y')
print(date_string)

pick_a_date(date)
time.sleep(3)

# Load next flight for whole day data
while not stop_click_next_flights():
    click_next_flights()

soup = BeautifulSoup(driver.page_source, features='html')
flights = soup.find_all('a', class_='flightlist__item display-lg')[1:]
_, _, flight_time = get_flight_time(flights[0].find('div', class_='flightlist__item-time'))

# Iterate through all flights
for flight in flights[0:]:
    new_flight_time_prev, new_flight_time, new_final_flight_time = get_flight_time(flight.find('div', class_='flightlist__item-time'))
    if new_final_flight_time[:2] >= flight_time[:2]:
        flight_time = new_final_flight_time
        print(f"{new_flight_time_prev} {new_flight_time}")

        flight_details = flight.find('div', class_='flightlist__item-flight').find('div', class_='airlines-details')
        flight_number = flight_details.find('span', class_='airport__flight-number').get_text()
        airlines_name = flight_details.find('span', class_='airport__name').get_text()
        airport_name = flight_details.find('div', class_='airport-name').get_text()
        print(f"{flight_number} {airlines_name} {airport_name}")

        flight_terminal = flight.find('div', class_='flightlist__item-terminal').get_text()
        boarding_info = flight.find('div', class_='flightlist__item-boarding').get_text()
        flight_status = flight.find('div', class_='flightlist__item-status').get_text()
        # print(f"{flight_terminal} {boarding_info} {flight_status}")

        departure_df = pd.concat([departure_df,
                                  pd.DataFrame([{'Date': date_string,
                                        'Time': new_final_flight_time,
                                        'Flight Number': flight_number,
                                        'Airlines Name': airlines_name, 
                                        'Airport Name': airport_name, 
                                        'Terminal': flight_terminal, 
                                        'Boarding info': boarding_info, 
                                        'Status': flight_status}])
                                ], ignore_index=True)
        print()
    else:
        print(f"\n>>>>> {flight_time} - STOP!")
        break

# Save to file
departure_df.to_csv('Departures.csv', index=False)

10/04/2024
01:05 -> next
01:05 -> next
02:00 -> next
02:00 -> next
06:15 -> next
07:25 -> next
08:00 -> next
08:45 -> next
09:30 -> next
10:15 -> next
11:05 -> next
12:00 -> next
12:55 -> next
13:50 -> next
14:45 -> next
15:50 -> next
16:40 -> next
17:30 -> next
18:45 -> next
20:25 -> next
20:25 -> next
21:15 -> next
22:20 -> next

>>>>> '23:55' '00:05' - STOP!
 00:05
SQ225 Singapore Airlines Perth (PER)

 00:05
SQ352 Singapore Airlines Copenhagen (CPH)

 00:10
SQ608 Singapore Airlines Seoul (ICN)

 00:15
SQ336 Singapore Airlines Paris (CDG)

 00:15
TR812 Scoot Jeju (CJU)

 00:25
SQ237 Singapore Airlines Melbourne (MEL)

 00:25
TR172 Scoot Jinan (TNA)

 00:30
PR510 Philippine Airlines Manila (MNL)

 00:30
SQ328 Singapore Airlines Munich (MUC)

 00:30
TR720 Scoot Berlin (BER)

 00:35
7C4056 Jeju Air Busan (PUS)

 00:40
IX681 Air India Express Tiruchirappalli (TRZ)

 00:40
KL836 KLM Royal Dutch Airlines Amsterdam (AMS)

 00:40
ZG54 ZIPAIR Tokyo (Narita) (NRT)

 00:45
SQ231 Singapore Airl

In [28]:
departure_df.to_excel('Departures.xlsx', index=False)