In [21]:
import requests
from bs4 import BeautifulSoup
import json
import tqdm

# 原始字符串
flight_str = "SZX-PEK-SZX,PVG-LAX,LAX-YYZ,YYZ-LHR,LHR-HEL,HEL-FRA,FRA-DXB,DXB-SYD,SYD-SIN,PEK-SZX"

# 步骤1: 解析航线字符串
airport_codes = set(flight_str.replace('-', ',').split(','))

# 步骤2: 获取机场位置
def get_airport_location(code):
    url = f"http://www.gcmap.com/airport/{code}"
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        meta_tag = soup.find('meta', {'name': 'geo.position'}) # <meta name="geo.position" content="22.638;113.812">
        # airport name <tr valign=top><td>Name:</td><td colspan=2 class="fn org">Shenzhen Bao'an International Airport</td></tr>
        airport_name = soup.find('td', {'class': 'fn org'})
        if meta_tag:
            #return meta_tag['content']
            # return array like [113.812, 22.638]
            coor_array = [float(x) for x in meta_tag['content'].split(';')]
            # reverse the array
            coor_array.reverse()
            print(coor_array, airport_name.text)
            return coor_array, airport_name.text
    return None

airport_locations = {code: get_airport_location(code) for code in airport_codes}

# 步骤3: 创建JSON文件
def create_json(flight_str, airport_locations):
    flight_routes = flight_str.split(',')
    route_info = []
    for route in flight_routes:
        airports = route.split('-')
        for i in range(len(airports) - 1):
            start, end = airports[i], airports[i + 1]
            start_location, start_text = airport_locations[start]
            end_location, end_text = airport_locations[end]
            route_info.append({
                "start": start,
                "end": end,
                "start_location": start_location,
                "end_location": end_location,
                "start_text": start_text,
                "end_text": end_text,
            })
    return json.dumps(route_info, indent=4)

# 结果
flight_json = create_json(flight_str, airport_locations)




[24.963333, 60.317222] Vantaa
[103.989333, 1.359211] Changi International Airport
[-118.408048, 33.942496] Los Angeles International Airport
[151.177222, -33.94611] Kingsford Smith
[-0.461388, 51.4775] Heathrow Airport
[-79.630555, 43.676667] Lester B. Pearson International Airport
[116.598, 40.073] Beijing Capital Airport
[8.570456, 50.033306] Rhein-Main
[55.364444, 25.252778] Dubai International Airport
[121.793, 31.145] Pudong International Airport
[113.812, 22.638] Shenzhen Bao'an International Airport


In [22]:
# save to file
with open('flight.json', 'w') as f:
    f.write(flight_json)