## checked list

In [13]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from concurrent.futures import ProcessPoolExecutor
import multiprocessing
from selenium.webdriver.common.keys import Keys
import time


def init_driver():
    """初始化 Selenium WebDriver"""
    options = Options()
    options.add_argument("--start-maximized")
    options.add_argument("--disk-cache-dir=/path/to/cache")
    options.add_argument("--enable-application-cache")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.page_load_strategy = 'eager'

    prefs = {
        "profile.managed_default_content_settings.images": 2,  # 禁用圖片
    }
    options.add_experimental_option("prefs", prefs)
    driver = webdriver.Chrome(executable_path="./chromedriver", options=options)
    return driver


def fetch_address(row):
    """爬取地址信息"""
    spot = row['Spot']
    title = row['Title']
    base_url = "https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot="
    url = base_url + str(spot)
    address = ""

    try:
        driver = init_driver()  # 每個進程初始化自己的 WebDriver
        driver.get(url)
        print(f"Accessing {url}")

        # 等待 iframe 出現並切換
        iframe_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "iframe")))
        driver.switch_to.frame(iframe_element)

        # 檢查指定的元素是否存在
        try:
            element_exists = WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "body > div > div > div.platinumap > div.poiselector"))
            )
            if element_exists:
                print("Main element exists, proceeding to find address...")
                
                # 嘗試查找第一個地址
                try:
                    address_element = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CSS_SELECTOR, "body > div > div > div.platinumap > div.poiselector > div > div > div > div > div.detail__inner > div.scrollview__wrapper.detail__scrollview.-v > div > div > div > div.poiproperties > table > tbody > tr:nth-child(1) > td > div > address"))
                    )
                    address = address_element.text.strip()
                    print(f"Address found using first selector: {address}")
                except Exception:
                    print("First address not found, trying second selector...")

                    # 嘗試第二個地址
                    try:
                        address_element = WebDriverWait(driver, 10).until(
                            EC.presence_of_element_located((By.CSS_SELECTOR, "body > div > div > div.platinumap > div.poiselector > div > div > div > div > div.detail__inner > div.scrollview__wrapper.detail__scrollview.-v > div > div > div > div.poiproperties > table > tbody > tr > td > div > address"))
                        )
                        address = address_element.text.strip()
                        print(f"Address found using second selector: {address}")
                    except Exception:
                        print("Address not found using second selector either.")
            else:
                print("Main element does not exist, skipping address search.")
        except Exception:
            print("Main element does not exist, skipping address search.")

    except Exception as e:
        print(f"Failed to fetch data for Spot {spot}: {e}")
    finally:
        driver.quit()  # 確保關閉 WebDriver

    return {"Spot": spot, "Title": title, "Address": address}


from concurrent.futures import ThreadPoolExecutor

def getAddress(csv_file, checked_situation, save_date):
    # 讀取保存的表格
    df = pd.read_csv(csv_file)

    # 過濾出符合 checked_situation 的資料列
    filtered_df = df[df['checkSituation'] == checked_situation]

    # 如果過濾後的資料列為空，則提示並結束程式
    if filtered_df.empty:
        print(f"No rows found with checkSituation = {checked_situation}. Exiting.")
        return

    # 使用多线程加速爬取
    print("Starting parallel scraping for filtered rows...")
    with ThreadPoolExecutor(max_workers=8) as executor:
        results = list(executor.map(fetch_address, filtered_df.to_dict(orient="records")))

    # 保存結果
    updated_df = pd.DataFrame(results)
    output_csv_file = f"./fullDate_Data/{save_date}/updated_spots_results_{checked_situation}.csv"
    updated_df.to_csv(output_csv_file, index=False, encoding="utf-8-sig")
    print(f"Updated results saved to {output_csv_file}")

    
def load_input_csv(file_path):
    """讀取包含地址的 CSV 文件"""
    return pd.read_csv(file_path)


def open_website(driver, url):
    """打開目標網站"""
    driver.get(url)
    time.sleep(2)  # 等待頁面加載


def process_address(driver, address):
    """輸入地址到網站並開始查詢"""
    try:
        textarea = driver.find_element(By.TAG_NAME, "textarea")
        textarea.send_keys(address)
        textarea.send_keys(Keys.ENTER)
    except Exception as e:
        print(f"Error inputting address '{address}': {e}")
        textarea.send_keys(" ")
        textarea.send_keys(Keys.ENTER)


def select_platform_and_convert(driver):
    """選擇平台並點擊轉換按鈕"""
    try:
        platform_select = driver.find_element(
            By.CSS_SELECTOR, "#platform > div:nth-child(2) > label > span.ant-radio > input"
        )
        platform_select.click()

        convert_button = driver.find_element(
            By.CSS_SELECTOR,
            "#root > div > section > main > form > div > div.ant-col.ant-col-xs-24.ant-col-md-8 > div > div:nth-child(3) > div > div > div > span > button"
        )
        convert_button.click()
    except Exception as e:
        print(f"Error selecting platform or clicking convert button: {e}")


def fetch_results(driver, row, results):
    """抓取轉換後的結果"""
    address = row['Address']
    spot = row['Spot']
    title = row['Title']

    try:
        result_table = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//table/tbody"))
        )

        rows = result_table.find_elements(By.TAG_NAME, "tr")
        print(f"Found {len(rows)} rows in the table.")

        for table_row in rows:
            cols = table_row.find_elements(By.TAG_NAME, "td")
            result_address = cols[1].text.strip()
            longitude = cols[2].text.strip()
            latitude = cols[3].text.strip()

            if result_address == address:
                results.append({
                    "Spot": spot,
                    "Title": title,
                    "Address": address,
                    "Longitude": longitude,
                    "Latitude": latitude,
                    "Available": "start"
                })
                print(f"Match found for Spot {spot}: {longitude}, {latitude}")
                return

        # 如果無匹配
        results.append({
            "Spot": spot,
            "Title": title,
            "Address": address,
            "Longitude": "",
            "Latitude": "",
            "Available": "not yet"
        })

    except Exception as e:
        print(f"Error processing address '{address}': {e}")
        results.append({
            "Spot": spot,
            "Title": title,
            "Address": address,
            "Longitude": "",
            "Latitude": "",
            "Available": "not yet"
        })


def save_results_to_csv(results, output_file):
    """保存結果到 CSV 文件"""
    results_df = pd.DataFrame(results)
    results_df.to_csv(output_file, index=False, encoding='utf-8-sig')
    print(f"Results saved to {output_file}")


def getCoordinate(input_csv, output_csv):
    """主程式"""
    # 初始化
    driver = init_driver()
    df = load_input_csv(input_csv)

    # 打開網站
    open_website(driver, "https://maplocation.sjfkai.com")

    # 保存結果的列表
    results = []

    # 處理每個地址
    for index, row in df.iterrows():
        process_address(driver, row['Address'])
        select_platform_and_convert(driver)
        time.sleep(180)  # 根據網路情況調整
        fetch_results(driver, row, results)

    # 關閉瀏覽器
    driver.quit()

    # 保存結果
    save_results_to_csv(results, output_csv)

In [14]:
import os
import shutil
import qrcode
from PIL import Image
import piexif
import pandas as pd
import simplekml

def clear_folder(folder_path):
    """清空並重新創建指定的文件夾"""
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)
        print(f"Cleared folder: {folder_path}")
    os.makedirs(folder_path, exist_ok=True)
    print(f"Recreated folder: {folder_path}")


def decimal_to_dms(decimal):
    """將十進制經緯度轉換為度分秒格式"""
    decimal = abs(decimal)
    degrees = int(decimal)
    minutes = int((decimal - degrees) * 60)
    seconds = round((decimal - degrees - minutes / 60) * 3600 * 10000)
    return (degrees, 1), (minutes, 1), (seconds, 10000)


def generate_qr_code(url, output_path, exif_bytes=None):
    """生成帶有 EXIF 資訊的 QR Code 圖片"""
    qr = qrcode.QRCode(
        version=1,
        error_correction=qrcode.constants.ERROR_CORRECT_L,
        box_size=10,
        border=4,
    )
    qr.add_data(url)
    qr.make(fit=True)
    img = qr.make_image(fill_color="black", back_color="white")
    img.save(output_path, exif=exif_bytes)
    print(f"QR code saved as {output_path}")


def prepare_exif_data(latitude, longitude, spot):
    """準備 EXIF 資料"""
    latitude_dms = decimal_to_dms(latitude)
    longitude_dms = decimal_to_dms(longitude)

    gps_info = {
        piexif.GPSIFD.GPSLatitude: latitude_dms,
        piexif.GPSIFD.GPSLongitude: longitude_dms,
        piexif.GPSIFD.GPSAltitude: (500, 1),
        piexif.GPSIFD.GPSLatitudeRef: 'N' if latitude >= 0 else 'S',
        piexif.GPSIFD.GPSLongitudeRef: 'E' if longitude >= 0 else 'W',
    }

    exif_dict = {"0th": {}, "Exif": {}, "GPS": gps_info, "1st": {}, "thumbnail": None}
    exif_dict["0th"][piexif.ImageIFD.ImageDescription] = str(spot)
    user_comment = f"Latitude: {latitude}, Longitude: {longitude}".encode("utf-8")
    exif_dict["Exif"][piexif.ExifIFD.UserComment] = user_comment

    return piexif.dump(exif_dict)


def process_rows_for_qr(df, folder_path, base_url, key_url):
    """處理資料表中的每一行以生成 QR Code"""
    for _, row in df.iterrows():
        spot = row['Spot']
        title = row['Title']
        address = row['Address']
        available = row['Available']
        url = f"{base_url}{spot}{key_url}"

        if available == "start":
            latitude = float(row['Latitude'])
            longitude = float(row['Longitude'])
            exif_bytes = prepare_exif_data(latitude, longitude, spot)
            output_path = os.path.join(folder_path, f"{spot}-{available}-{title}.jpg")
            generate_qr_code(url, output_path, exif_bytes)
        else:
            print(f"Skipping Spot {spot}: Not available.")


def get_qrcode(input_csv, output_folder):
    """主程式"""
    base_url = "https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot="
    key_url = "&sr=1047&srcheckin=1"

    # 清空或創建 QR Code 文件夾
    clear_folder(output_folder)

    # 讀取資料
    df = pd.read_csv(input_csv)

    # 處理每一行資料
    process_rows_for_qr(df, output_folder, base_url, key_url)

In [15]:
import os
import pandas as pd
import simplekml
from zipfile import ZipFile


def load_csv(file_path):
    """加載 CSV 文件並返回 DataFrame"""
    return pd.read_csv(file_path)


def create_kml_from_df(df, qrcode_folder, kml_file):
    """從 DataFrame 生成 KML 文件，並嵌入描述和二維碼圖片"""
    kml = simplekml.Kml()

    for _, row in df.iterrows():
        title = row['Title']
        latitude = row['Latitude']
        longitude = row['Longitude']
        spot = row['Spot']
        address = row['Address']
        available = row['Available']

        # 跳過無效的經緯度數據
        if pd.isna(latitude) or pd.isna(longitude) or latitude == " " or longitude == " ":
            continue

        # 構造二維碼文件名和路徑
        qrcode_file = f"{spot}-{available}-{title}.jpg"
        qrcode_path = os.path.join(qrcode_folder, qrcode_file)
        qrcode_relative_path = f"files/{qrcode_file}"

        # 添加 KML 標記點
        pnt = kml.newpoint(name=title)
        pnt.coords = [(float(longitude), float(latitude))]
        pnt.description = (
            f"<b>Title:</b> {title}<br>"
            f"<b>Lat:</b> {latitude}, <b>Lon:</b> {longitude}<br>"
            f"<b>SpotCode:</b> {spot}<br>"
            f"<b>Address:</b> {address}<br>"
            f"<b>QR Code:</b><br>"
            f"<img src='{qrcode_relative_path}' width='256' height='256'>"
        )

        # 設置圖標為二維碼圖片
        if os.path.exists(qrcode_path):
            pnt.style.iconstyle.icon.href = qrcode_relative_path
        else:
            print(f"QR code image not found for {title}: {qrcode_file}")

    # 保存 KML 文件
    kml.save(kml_file)
    print(f"KML file saved as {kml_file}")
    return kml_file


def package_kmz(kml_file, qrcode_folder, kmz_file):
    """將 KML 文件和二維碼圖片打包為 KMZ 文件"""
    with ZipFile(kmz_file, 'w') as kmz:
        # 添加 KML 文件
        kmz.write(kml_file, os.path.basename(kml_file))

        # 添加所有二維碼圖片
        for file in os.listdir(qrcode_folder):
            if file.endswith(".jpg"):
                kmz.write(os.path.join(qrcode_folder, file), f"files/{file}")

    print(f"KMZ file saved as {kmz_file}")


def get_kml_kmz(input_csv, qrcode_folder, kml_file, kmz_file):
    """主程序"""
    # 加載 CSV 文件
    df = load_csv(input_csv)

    # 生成 KML 文件
    create_kml_from_df(df, qrcode_folder, kml_file)

    # 整合成 KMZ 文件
    package_kmz(kml_file, qrcode_folder, kmz_file)

In [16]:
import pandas as pd
import os

def clear_folder(folder_path):
    """
    清空指定的文件夹
    :param folder_path: 文件夹路径
    """
    if os.path.exists(folder_path):
        # 删除文件夹中的所有内容
        shutil.rmtree(folder_path)
        print(f"Cleared folder: {folder_path}")
    # 重新创建空文件夹
    os.makedirs(folder_path, exist_ok=True)
    print(f"Recreated folder: {folder_path}")

# 创建 GPX 文件的函数
def create_gpx_file(title, latitude, longitude, output_folder="./gpsPosition"):
    """
    创建一个 GPX 文件
    :param title: 文件名
    :param latitude: 纬度
    :param longitude: 经度
    :param output_folder: 存储 GPX 文件的文件夹
    """
    # 如果经纬度无效，跳过创建
    if pd.isna(latitude) or pd.isna(longitude) or latitude == " " or longitude == " ":
        print(f"Skipping invalid coordinates for {title}")
        return

    # 创建 GPX 文件内容
    gpx_content = f"""<?xml version="1.0"?>
<gpx version="1.1" creator="gpxgenerator.com">
  <wpt lat="{latitude}" lon="{longitude}">
    <name>{title}</name>
  </wpt>
</gpx>"""

    # 生成文件路径
    output_file = f"{output_folder}/{title}.gpx"

    # 保存文件
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(gpx_content)
    print(f"GPX file saved: {output_file}")

0：已找到spot編號尚未開放的點 

1：本次要完成資料的點

2：已完成mapping的點

3：QRcode失效 / 地點有問題拉掉的點

In [11]:
csv_file = "./checked_spot.csv"
checked_situation = 1
save_date = "1202"

getAddress(csv_file, checked_situation, save_date)

Starting parallel scraping for filtered rows...
Accessing https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot=401346
Accessing https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot=401343
Accessing https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot=401347
Accessing https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot=401341
Accessing https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot=401340
Accessing https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot=401345
Accessing https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot=403904
Accessing https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot=401348
Main element exists, proceeding to find address...
Address found using first selector: ハービスPLAZAのどこか
Main element exists, proceeding to find address...
Address found using first selector: 大阪府大阪市中央区道頓堀1-6-15 ﾄﾞｳﾄﾝﾋﾞﾙ3階
Accessing https://platinumaps.jp/d/seventeen-thecity2024?culture=ja&spot=403905
Accessing http

In [18]:


coordinate_input_csv = f"./fullDate_Data/{save_date}/updated_spots_results_{checked_situation}.csv" 
coordinate_output_csv = f"./fullDate_Data/{save_date}/updated_spots_results_with_coordinates_{checked_situation}.csv" 
#getCoordinate(coordinate_input_csv, coordinate_output_csv)

qr_output_folder = f"./fullDate_Data/{save_date}/qrcode"
get_qrcode(coordinate_output_csv, qr_output_folder)

kml_file = f"./fullDate_Data/{save_date}/map_with_qrcodes.kml"  # 替換為輸出的 KML 文件名
kmz_file = f"./fullDate_Data/{save_date}/map_with_qrcodes.kmz"  # 替換為輸出的 KMZ 文件名

get_kml_kmz(coordinate_output_csv, qr_output_folder, kml_file, kmz_file)


df = pd.read_csv(coordinate_output_csv)
for index, row in df.iterrows():
    title = row['Title']  # 从 CSV 获取标题
    latitude = row['Latitude']  # 从 CSV 获取纬度
    longitude = row['Longitude']  # 从 CSV 获取经度
    # 创建 GPX 文件
    create_gpx_file(title, latitude, longitude)


Recreated folder: ./fullDate_Data/1202/qrcode
QR code saved as ./fullDate_Data/1202/qrcode/401340-start-阪急三番街 北館B1Fニトリ デコホーム前.jpg
QR code saved as ./fullDate_Data/1202/qrcode/401341-start-Hidden Bonbony（位於 Herbis Plaza）.jpg
QR code saved as ./fullDate_Data/1202/qrcode/401343-start-ディアモール大阪 ファッショナブルストリートの中央.jpg
QR code saved as ./fullDate_Data/1202/qrcode/401345-start-グランフロント大阪北館6F ウメキタフロア ギャラリースペース.jpg
QR code saved as ./fullDate_Data/1202/qrcode/401346-start-グランフロント大阪北館5F HMV.jpg
QR code saved as ./fullDate_Data/1202/qrcode/401347-start-NU茶屋町 6F 『TOWER RECORDS』店内.jpg
QR code saved as ./fullDate_Data/1202/qrcode/401348-start-NU茶屋町プラス2F 『WOCCA ROCCA』前.jpg
QR code saved as ./fullDate_Data/1202/qrcode/403904-start-鳥貴族 道頓堀店.jpg
QR code saved as ./fullDate_Data/1202/qrcode/403905-start-鳥貴族 お初天神店.jpg
QR code saved as ./fullDate_Data/1202/qrcode/403906-start-鳥貴族 南船場店.jpg
QR code saved as ./fullDate_Data/1202/qrcode/403907-start-鳥貴族 堺筋本町店.jpg
QR code saved as ./fullDate_Data/1202/qrcode/403908