### 匯入套件

In [3]:
import requests
from bs4 import BeautifulSoup
from datetime import date, datetime, timedelta
import json
import pandas as pd
import time
import os
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import FlightMod.FlightInfo as fi

### 設定航班資訊（table）檔案

In [4]:
# 航空公司清單
flight_corp = ["EVA", "CAL", "SJX", "TTW"]

for corp in flight_corp:
    # 設定航班資訊的檔案路徑
    table_file = f'C:/Users/add41/Documents/Data_Engineer/Project/Flights-Data-Crawler/FlightData/{corp}_FlightsTable.csv'
    table_path = Path(table_file)

    # 若檔案不存在則先新建空的df並存檔
    if table_path.exists():
        df_table = pd.read_csv(table_file)

    else:
        columns = [
        'flight_NO',
        'flight_type',
        'flight_company',
        'fly_distance',
        'departure_airport_code',
        'departure_city',
        'arrival_airport_code',
        'arrival_city',
        'departure_date',
        'leave_gate_estimate',
        'leave_gate_actual',
        'departure_time_estimate',
        'departure_time_actual',
        'departure_timezone',
        'arrival_date',
        'landing_time_estimate',
        'landing_time_actual',
        'arrive_gate_estimate',
        'arrive_gate_actual',
        'arrive_timezone',
        'link'
        ]

        df_table = pd.DataFrame(columns=columns)


### 爬蟲使用自訂函式庫

In [5]:
## 函式庫

# def trans_date_from_chinese(chinese_date):
#     clean_date = chinese_date.split("(")[0].strip()
#     fmt = "%Y年 %m月 %d日"
#     trans = datetime.strptime(clean_date, fmt)

#     return trans


# def find_tag(div_list, target_str:str):
#     """用於尋找特定字串標籤的index"""
#     target=0
#     for i in div_list:
#         if i.get_text() == target_str:
#             break
#         else:
#             target += 1
#     return target


# def safe_extract(func):
#     """判斷一個soup物件是否存在/有值，若沒有則回傳None"""
#     try:
#         return func()
#     except (IndexError, AttributeError):
#         return None


# def gate_exist(soup):
#     """判斷一個航班頁面中是否有到/離閘口資料"""
#     gate = 0
#     x = soup
#     for i in x('div'):
#         if '閘口' in i.text:
#             gate = 1
#             break
#     return gate


# def split_tz(time_str:str):
#     time_, tz = time_str.split(' ')
#     return time_, tz


# def crawl_flight_data(soup, url, gate_exist:bool):
#     """當該班機有到/離閘門資料時使用的爬蟲"""
#     flight_data = []

#     # 班機基本資料。較容易在各網頁中出現差異，故先使用函式取得定位，再去取得資訊
#     div_list = soup('div', class_='flightPageDataLabel')

#     # 航班編號、機型、航空公司、飛行距離
#     flight_data.append(safe_extract(lambda: soup('div', class_='flightPageIdent')[0].h1.text.strip()))
#     flight_data.append(safe_extract(lambda: soup('div', class_='flightPageDataRow')[find_tag(div_list, '機型')]('div', class_='flightPageData')[0].text.strip().replace('\xa0', ' ')))
#     flight_data.append(safe_extract(lambda: soup('div', class_='flightPageDataRow')[find_tag(div_list, '航空公司')]('div', class_='flightPageData')[0].text.strip().split('\n')[0]))
#     flight_data.append(safe_extract(lambda: soup('div', class_='flightPageDataRow')[find_tag(div_list, '距離')].span.text.strip().replace(',', '').split(' ')[1]))

#     # 起飛機場、起飛城市
#     flight_data.append(safe_extract(lambda: soup('div', class_='flightPageSummaryOrigin')[0]('span', class_='displayFlexElementContainer')[0].text.strip()))
#     flight_data.append(safe_extract(lambda: soup('div', class_='flightPageSummaryOrigin')[0]('span', class_='flightPageSummaryCity')[0].text.strip()))

#     # 降落機場、降落城市
#     flight_data.append(safe_extract(lambda: soup('div', class_='flightPageSummaryDestination')[0]('span', class_='displayFlexElementContainer')[0].text.strip()))
#     flight_data.append(safe_extract(lambda: soup('div', class_='flightPageSummaryDestination')[0]('span', class_='destinationCity')[0].text.strip()))

#     # 起飛日期
#     flight_data.append(safe_extract(lambda: soup('span', class_='flightPageSummaryDepartureDay')[0].text))
    
#     if gate_exist == 1:
#         # 預計/實際離開閘門時間
#         lg_e_time, lg_e_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[0].span.text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))
#         lg_a_time, lg_a_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[0]('div', class_="flightPageDataActualTimeText")[0].text.strip().replace('\xa0', ' ').replace('\\n', '').replace('\\t', '')))

#         flight_data.append(lg_e_time)
#         flight_data.append(lg_a_time)
        
#         # 預計/實際起飛時間
#         d_e_time, d_e_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[1]('span')[1].text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))
#         d_a_time, d_a_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[1]('span')[0].text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))
        
#         flight_data.append(d_e_time)
#         flight_data.append(d_a_time)
    
#     else:
#         # 預計/實際離開閘門時間
#         flight_data.append(None)
#         flight_data.append(None)
        
#         # 預計/實際起飛時間
#         d_e_time, d_e_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[0]('span')[1].text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))
#         d_a_time, d_a_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[0]('span')[0].text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))
        
#         flight_data.append(d_e_time)
#         flight_data.append(d_a_time)

#     # 起飛時區
#     flight_data.append(d_a_tz)

#     # 抵達日期
#     flight_data.append(safe_extract(lambda: soup('span', class_='flightPageSummaryArrivalDay')[0].text))

#     if gate_exist == 1:
#         # 預計/實際降落時間
#         a_e_time, a_e_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[2]('span')[1].text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))
#         a_a_time, a_a_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[2]('span')[0].text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))

#         flight_data.append(a_e_time)
#         flight_data.append(a_a_time)

#         # 預計/實際抵達閘門時間
#         ag_e_time, ag_e_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[3]('span')[1].text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))
#         ag_a_time, ag_a_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[3]('div', class_='flightPageDataActualTimeText')[0].span.text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))

#         flight_data.append(ag_e_time)
#         flight_data.append(ag_a_time)
    
#     else:
#         # 預計/實際降落時間
#         a_e_time, a_e_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[1]('span')[1].text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))
#         a_a_time, a_a_tz = split_tz(safe_extract(lambda: soup('div', class_='flightPageDataTimesChild')[1]('span')[0].text.strip().replace('\xa0', ' ').replace('\n', '').replace('\t', '')))

#         flight_data.append(a_e_time)
#         flight_data.append(a_a_time)

#         # 預計/實際抵達閘門時間
#         flight_data.append(None)
#         flight_data.append(None)

#     # 降落時區
#     flight_data.append(a_a_tz)

#     # 紀錄該航班網址，若有需要可再重新訪問
#     flight_data.append(url)

#     return flight_data


### 設定航班列表檔案

In [None]:
# 設定航班列表的資料表路徑，並將列表資料讀入
list_file = f'C:/Users/add41/Documents/Data_Engineer/Project/Flights-Data-Crawler/FlightData/{corp}_FlightList.csv'

df_list = pd.read_csv(list_file)


# 設定兩個mask條件篩選df_list中需要爬取的row，條件一：sync值為0（未爬取過）；條件二：日期為兩天以前，確保航行都已結束
today = date.today()
today = pd.Timestamp(today)

df_list['query_date'] = pd.to_datetime(df_list['query_date'])

mask_1 = (df_list['sync'] == 0)
mask_2 = ((today - df_list['query_date']).dt.days >= 2)

source = df_list[mask_1 & mask_2]

### 主要爬蟲程式

In [None]:
# 建立dataframe需要的data list
data = []

# 建立selenium連線
driver_path = './chromedriver.exe'
service = Service(driver_path)
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(service=service, options=chrome_options)
print('建立連線')

# 根據df_list中的link欄位跑回圈，逐一進入網頁取得html編碼
for url in source['link']:
    try:
        driver.get(url)

        # 網頁內有JavaScript動態生成內容，故設定等待網頁讀取完畢後再動作
        wait = WebDriverWait(driver, 15)
        element = wait.until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "flightPageSummaryDepartureDay"))
        )

        # 如果有cookie選項的話選取同意，若沒有就跳過
        try:
            driver.find_element(By.ID, 'onetrust-accept-btn-handler').click()
            time.sleep(4)

        except:
            pass

        # 取得網頁html碼，並轉換成soup物件
        page_source = driver.page_source
    
    except Exception as e:
        print(f"無法存取 {url}: {e}")
        continue

    soup = BeautifulSoup(page_source, 'html.parser')
    flight_no = soup('div', class_='flightPageIdent')[0].h1.text.strip()

    ## 根據取得的soup物件，開始抓取各項資訊
    print(f'開始查詢{flight_no}班機資訊資訊...')

    try:
        gate_exist = fi.gate_exist(soup)
        print(f'{flight_no}航班有無閘門資訊：{gate_exist}')
        flight_data = fi.crawl_flight_data(soup, url, gate_exist=gate_exist)
    
    except Exception as e:
        print(f'發生錯誤：{e}')

    data.append(flight_data)
    print(f'完成存取{flight_no}航班資料')
    time.sleep(7)

driver.quit()

建立連線
開始查詢TTW201班機資訊資訊...
TTW201航班有無閘門資訊：1
完成存取TTW201航班資料


In [None]:
columns = [
    'flight_NO',
    'flight_type',
    'flight_company',
    'fly_distance',
    'departure_airport_code',
    'departure_city',
    'arrival_airport_code',
    'arrival_city',
    'departure_date',
    'leave_gate_estimate',
    'leave_gate_actual',
    'departure_time_estimate',
    'departure_time_actual',
    'departure_timezone',
    'arrival_date',
    'landing_time_estimate',
    'landing_time_actual',
    'arrive_gate_estimate',
    'arrive_gate_actual',
    'arrive_timezone',
    'link'
    ]

df2 = pd.DataFrame(columns=columns, data=data)

# 將本次爬取航班中，航行未完成（沒有降落時間）的資料先去除，待下次再爬取
df2 = df2.dropna(subset=['landing_time_actual'])

## 爬取完成，進行資料清洗

### 將中文日期轉換成Datetime格式

In [None]:
# locale.setlocale(locale.LC_TIME, 'zh_TW.UTF-8')
# df2 = df2.dropna(subset='arrival_date')

# df2['departure_date'] = df2['departure_date'].apply(trans_date_from_chinese)
# df2['arrival_date'] = df2['arrival_date'].apply(trans_date_from_chinese)

### 處理時區問題

In [None]:
# import pytz

# def find_tz(tz_str:str):
#     for i in pytz.all_timezones:
#         tz = pytz.timezone(i)
#         now = datetime.now(tz)
#         if now.tzname() == tz_str:
#             return i


# def to_utc_time(tz_str, time):
#     tz = find_tz(tz_str)
#     tz = pytz.timezone(tz)

#     fmt = "%Y-%m-%d %H:%M"
#     time = datetime.strptime(time, fmt)

#     local_time = tz.localize(time)
#     utc = local_time.astimezone(pytz.utc).strftime(fmt)

#     return utc

In [None]:
# ## 起飛

# df2['departure_time_actual(UTC)'] = df2['departure_date'] + ' ' + df2['departure_time_actual']
# df2['departure_time_actual(UTC)'] = df2.apply(lambda row: to_utc_time(df2['departure_time_actual(UTC)'], df2['departure_timezone']), axis=1)

# df2['departure_time_estimate(UTC)'] = df2['departure_date'] + ' ' + df2['departure_time_estimate']
# df2['departure_time_estimate(UTC)'] = df2.apply(lambda row: to_utc_time(df2['departure_time_estimate(UTC)'], df2['departure_timezone']), axis=1)


# ## 降落

# df2['arrive_time_actual(UTC)'] = df2['arrive_date'] + ' ' + df2['landing_time_actual']
# df2['arrive_time_actual(UTC)'] = df2.apply(lambda row: to_utc_time(df2['arrive_time_actual(UTC)'], df2['arrive_timezone']), axis=1)

# df2['arrive_time_estimate(UTC)'] = df2['arrive_date'] + ' ' + df2['landing_time_estimate']
# df2['arrive_time_estimate(UTC)'] = df2.apply(lambda row: to_utc_time(df2['arrive_time_estimate(UTC)'], df2['arrive_timezone']), axis=1)



## 整合檔案並存檔

In [None]:
# 將爬取的新資料與原本的table資料合併
df_combine = pd.concat([df_table, df2], ignore_index=True)

# 根據link欄位再去除可能的重複值
df_table = df_combine.drop_duplicates(subset='link', keep = 'first')

# 將去除重複後的資料存檔
df_table.to_csv(table_file, index=False)

# 將已經爬取過的航班sync欄位改為1，避免下次重複爬取
mask_done = df_list['link'].isin(df2['link'])
df_list.loc[mask_done, 'sync'] = 1

# 將修改後的df_list再存檔回FlightList.csv檔案
df_list.to_csv(list_file, index=False)

In [None]:
df_combine

Unnamed: 0,flight_NO,flight_type,flight_company,fly_distance,departure_airport_code,departure_city,arrival_airport_code,arrival_city,departure_date,leave_gate_estimate,...,departure_time_estimate,departure_time_actual,departure_timezone,arrival_date,landing_time_estimate,landing_time_actual,arrive_gate_estimate,arrive_gate_actual,arrive_timezone,link
0,EVA5,BOEING 777-300ER (雙發) (B77W),EVA Air,11819,LAX,"Los Angeles, CA",TPE,台灣臺北 TW,2025年 10月 07日 (星期二),12:00,...,12:10,12:11,PDT,2025年 10月 08日 (星期三),16:19,16:20,16:55,16:25,CST,https://www.flightaware.com/live/flight/id/EVA...
1,EVA621,BOEING 777-200LR (雙發) (B77L),EVA Air,11957,LAX,"Los Angeles, CA",TPE,台灣臺北 TW,2025年 10月 07日 (星期二),07:00,...,07:10,07:38,PDT,2025年 10月 08日 (星期三),11:14,12:01,11:45,12:09,CST,https://www.flightaware.com/live/flight/id/EVA...
2,EVA645,BOEING 777-200LR (雙發) (B77L),EVA Air,12425,ORD,"Chicago, IL",TPE,台灣臺北 TW,2025年 10月 07日 (星期二),12:40,...,12:50,15:24,CDT,2025年 10月 08日 (星期三),16:44,20:08,17:15,20:18,CST,https://www.flightaware.com/live/flight/id/EVA...
3,EVA653,BOEING 777-200LR (雙發) (B77L),EVA Air,12455,ORD,"Chicago, IL",TPE,台灣臺北 TW,2025年 10月 07日 (星期二),17:00,...,17:10,17:02,CDT,2025年 10月 08日 (星期三),21:32,21:52,21:35,22:01,CST,https://www.flightaware.com/live/flight/id/EVA...
4,EVA68,BOEING 777-300ER (雙發) (B77W),EVA Air,10180,LHR,英国 伦敦 GB,BKK,泰国 曼谷(首都) TH,2025年 10月 07日 (星期二),21:35,...,21:45,21:44,BST,2025年 10月 08日 (星期三),14:35,14:48,15:30,14:58,+07,https://www.flightaware.com/live/flight/id/EVA...
5,EVA7,BOEING 777-300ER (雙發) (B77W),EVA Air,11329,SFO,"San Francisco, CA",TPE,台灣臺北 TW,2025年 10月 07日 (星期二),12:30,...,12:40,12:44,PDT,2025年 10月 08日 (星期三),16:25,16:25,17:10,16:29,CST,https://www.flightaware.com/live/flight/id/EVA...
6,EVA76,BOEING 777-300ER (雙發) (B77W),EVA Air,9799,AMS,荷兰 阿姆斯特丹(首都) NL,BKK,泰国 曼谷(首都) TH,2025年 10月 07日 (星期二),21:40,...,21:50,22:01,CEST,2025年 10月 08日 (星期三),13:18,13:44,13:55,13:54,+07,https://www.flightaware.com/live/flight/id/EVA...
7,EVA87,BOEING 777-300ER (雙發) (B77W),EVA Air,11860,TPE,台灣臺北 TW,CDG,法国 巴黎 FR,2025年 10月 07日 (星期二),23:30,...,23:40,23:50,CST,2025年 10月 08日 (星期三),07:45,08:09,08:30,08:17,CEST,https://www.flightaware.com/live/flight/id/EVA...
8,EVA95,Boeing 787-9 Dreamliner (雙發) (B789),EVA Air,11236,TPE,台灣臺北 TW,MXP,意大利 米兰 IT,2025年 10月 07日 (星期二),23:15,...,23:25,23:52,CST,2025年 10月 08日 (星期三),06:56,07:08,07:35,07:15,CEST,https://www.flightaware.com/live/flight/id/EVA...
9,EVA697,BOEING 777-200LR (雙發) (B77L),EVA Air,7821,ANC,"Anchorage, AK",TPE,台灣臺北 TW,2025年 10月 07日 (星期二),,...,19:50,20:13,AKDT,2025年 10月 08日 (星期三),21:33,22:31,,,CST,https://www.flightaware.com/live/flight/id/EVA...


In [None]:
# url = source['link']
# url = url.iloc[0]

# # 建立dataframe需要的data list
# data = []

# # 建立selenium連線
# driver_path = './chromedriver.exe'
# service = Service(driver_path)
# chrome_options = Options()
# chrome_options.add_argument("--headless")
# driver = webdriver.Chrome(service=service, options=chrome_options)
# print('建立連線')

# driver.get(url)

# # 網頁內有JavaScript動態生成內容，故設定等待網頁讀取完畢後再動作
# wait = WebDriverWait(driver, 15)
# element = wait.until(
#     EC.presence_of_all_elements_located((By.CLASS_NAME, "flightPageSummaryDepartureDay"))
# )

# # 如果有cookie選項的話選取同意，若沒有就跳過
# try:
#     driver.find_element(By.ID, 'onetrust-accept-btn-handler').click()
#     time.sleep(4)

# except:
#     pass

# # 取得網頁html碼，並轉換成soup物件
# page_source = driver.page_source


# soup = BeautifulSoup(page_source, 'html.parser')
# flight_no = soup('div', class_='flightPageIdent')[0].h1.text.strip()

# ## 根據取得的soup物件，開始抓取各項資訊
# print(f'開始查詢{flight_no}班機資訊資訊...')


# gate_exist = fi.gate_exist(soup)
# print(f'{flight_no}航班有無閘門資訊：{gate_exist}')
# flight_data = fi.crawl_flight_data(soup, url, gate_exist=gate_exist)


# data.append(flight_data)
# print(f'完成存取{flight_no}航班資料')

# df2 = pd.DataFrame(columns=columns, data=data)

# df2


In [1]:
import requests
from bs4 import BeautifulSoup
from datetime import date, datetime, timedelta
import json
import pandas as pd
import time
import os
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import FlightMod.FlightInfo as fi


# 航空公司清單
flight_corp = ["EVA", "CAL", "SJX", "TTW"]

for corp in flight_corp:
    # 設定航班資訊的檔案路徑
    table_file = f'C:/Users/add41/Documents/Data_Engineer/Project/Flights-Data-Crawler/FlightData/{corp}_FlightsTable.csv'
    table_path = Path(table_file)

    # 若檔案不存在則先新建空的df並存檔
    if table_path.exists():
        df_table = pd.read_csv(table_file)

    else:
        columns = [
        'flight_NO',
        'flight_type',
        'flight_company',
        'fly_distance',
        'departure_airport_code',
        'departure_city',
        'arrival_airport_code',
        'arrival_city',
        'departure_date',
        'leave_gate_estimate',
        'leave_gate_actual',
        'departure_time_estimate',
        'departure_time_actual',
        'departure_timezone',
        'arrival_date',
        'landing_time_estimate',
        'landing_time_actual',
        'arrive_gate_estimate',
        'arrive_gate_actual',
        'arrive_timezone',
        'link'
        ]

        df_table = pd.DataFrame(columns=columns)


    # 設定航班列表的資料表路徑，並將列表資料讀入
    list_file = f'C:/Users/add41/Documents/Data_Engineer/Project/Flights-Data-Crawler/FlightData/{corp}_FlightList.csv'

    df_list = pd.read_csv(list_file)


    # 設定兩個mask條件篩選df_list中需要爬取的row，條件一：sync值為0（未爬取過）；條件二：日期為兩天以前，確保航行都已結束
    today = date.today()
    today = pd.Timestamp(today)

    df_list['query_date'] = pd.to_datetime(df_list['query_date'])

    mask_1 = (df_list['sync'] == 0)
    mask_2 = ((today - df_list['query_date']).dt.days >= 2)

    source = df_list[mask_1 & mask_2]


    # 建立dataframe需要的data list
    data = []

    # 建立selenium連線
    driver_path = './chromedriver.exe'
    service = Service(driver_path)
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(service=service, options=chrome_options)
    print('建立連線')

    # 根據df_list中的link欄位跑回圈，逐一進入網頁取得html編碼
    for url in source['link']:
        try:
            driver.get(url)

            # 網頁內有JavaScript動態生成內容，故設定等待網頁讀取完畢後再動作
            wait = WebDriverWait(driver, 15)
            element = wait.until(
                EC.presence_of_all_elements_located((By.CLASS_NAME, "flightPageSummaryDepartureDay"))
            )

            # 如果有cookie選項的話選取同意，若沒有就跳過
            try:
                driver.find_element(By.ID, 'onetrust-accept-btn-handler').click()
                time.sleep(4)

            except:
                pass

            # 取得網頁html碼，並轉換成soup物件
            page_source = driver.page_source
        
        except Exception as e:
            print(f"無法存取 {url}: {e}")
            continue

        soup = BeautifulSoup(page_source, 'html.parser')
        flight_no = soup('div', class_='flightPageIdent')[0].h1.text.strip()

        ## 根據取得的soup物件，開始抓取各項資訊
        print(f'開始查詢{flight_no}班機資訊資訊...')

        try:
            gate_exist = fi.gate_exist(soup)
            print(f'{flight_no}航班有無閘門資訊：{gate_exist}')
            flight_data = fi.crawl_flight_data(soup, url, gate_exist=gate_exist)
        
        except Exception as e:
            print(f'發生錯誤：{e}')

        data.append(flight_data)
        print(f'完成存取{flight_no}航班資料')
        time.sleep(7)

    driver.quit()


    columns = [
        'flight_NO',
        'flight_type',
        'flight_company',
        'fly_distance',
        'departure_airport_code',
        'departure_city',
        'arrival_airport_code',
        'arrival_city',
        'departure_date',
        'leave_gate_estimate',
        'leave_gate_actual',
        'departure_time_estimate',
        'departure_time_actual',
        'departure_timezone',
        'arrival_date',
        'landing_time_estimate',
        'landing_time_actual',
        'arrive_gate_estimate',
        'arrive_gate_actual',
        'arrive_timezone',
        'link'
        ]

    df2 = pd.DataFrame(columns=columns, data=data)

    # 將本次爬取航班中，航行未完成（沒有降落時間）的資料先去除，待下次再爬取
    df2 = df2.dropna(subset=['landing_time_actual'])

    # 將爬取的新資料與原本的table資料合併
    df_combine = pd.concat([df_table, df2], ignore_index=True)

    # 根據link欄位再去除可能的重複值
    df_table = df_combine.drop_duplicates(subset='link', keep = 'first')

    # 將去除重複後的資料存檔
    df_table.to_csv(table_file, index=False)

    # 將已經爬取過的航班sync欄位改為1，避免下次重複爬取
    mask_done = df_list['link'].isin(df2['link'])
    df_list.loc[mask_done, 'sync'] = 1

    # 將修改後的df_list再存檔回FlightList.csv檔案
    df_list.to_csv(list_file, index=False)


print('已更新所有資料！')

建立連線
開始查詢EVA10班機資訊資訊...
EVA10航班有無閘門資訊：1
完成存取EVA10航班資料


KeyboardInterrupt: 