# 宿泊施設のデータ集め用ノートブック

## 神奈川県

In [10]:
import requests, sqlite3, time, re
from bs4 import BeautifulSoup

BASE_URL = 'https://www.his-vacation.com/area/kanagawa/'
QUERY = '?kd=h_3&sr=pop'
DB_NAME = 'accommodations.db'
HDR = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
LIMIT = 100

def scrape():
    conn = sqlite3.connect(DB_NAME)
    cur = conn.cursor()
    # cur.execute('DROP TABLE IF EXISTS hotels')
    cur.execute('CREATE TABLE IF NOT EXISTS hotels (id INT, name TEXT, lat REAL, lon REAL, rating REAL, review_count INT, price INT)')

    total_count = 0
    page = 1
    
    while total_count < LIMIT:
        url = f"{BASE_URL}{f'{page}.html' if page > 1 else ''}{QUERY}"
        print(f"Accessing: {url}...")
        
        try:
            res = requests.get(url, headers=HDR)
            res.raise_for_status()
            soup = BeautifulSoup(res.content, 'html.parser')
            
            # 修正：result-listの中のliをすべて取得
            items = soup.select('#result-list > li')
            if not items:
                print("宿のリストが見つかりませんでした。")
                break

            rows = []
            for it in items:
                if total_count >= LIMIT: break
                
                try:
                    # 1. ID (data-value)
                    id_tag = it.select_one('p.ut_btn_clip a')
                    hid = id_tag['data-value'] if id_tag else None
                    
                    # 2. 宿名
                    name_tag = it.select_one('h2.ut_name')
                    name = name_tag.get_text(strip=True) if name_tag else "不明"
                    
                    # 3. 評価
                    rate_tag = it.select_one('span.ut_average')
                    rate = float(rate_tag.text) if rate_tag else 0.0
                    
                    # 4. レビュー数
                    rev_tag = it.select_one('[itemprop="reviewCount"]')
                    rev = int(re.sub(r'\D', '', rev_tag.text)) if rev_tag else 0
                    
                    # 5. 大人1名価格 (p.ut_personを狙い撃ち)
                    price_tag = it.select_one('p.ut_person')
                    if price_tag:
                        # 「：」で区切って後半を取得（大人1名の「1」を避ける）
                        p_text = price_tag.get_text()
                        price_val = int(re.sub(r'\D', '', p_text.split('：')[-1]))
                    else:
                        price_val = 0

                    if hid:
                        rows.append((int(hid), name, None, None, rate, rev, price_val))
                        total_count += 1
                except Exception as e:
                    continue

            if rows:
                cur.executemany('INSERT INTO hotels VALUES (?,?,?,?,?,?,?)', rows)
                conn.commit()
                print(f"Page {page}: {len(rows)}件保存 (累計 {total_count}件)")
            else:
                break
                
            page += 1
            time.sleep(1.5)

        except Exception as e:
            print(f"エラーが発生しました: {e}")
            break

    conn.close()
    print(f"全工程完了。合計 {total_count}件取得しました。")

if __name__ == '__main__':
    scrape()

Accessing: https://www.his-vacation.com/area/kanagawa/?kd=h_3&sr=pop...
Page 1: 19件保存 (累計 19件)
Accessing: https://www.his-vacation.com/area/kanagawa/2.html?kd=h_3&sr=pop...
Page 2: 19件保存 (累計 38件)
Accessing: https://www.his-vacation.com/area/kanagawa/3.html?kd=h_3&sr=pop...
Page 3: 18件保存 (累計 56件)
Accessing: https://www.his-vacation.com/area/kanagawa/4.html?kd=h_3&sr=pop...
Page 4: 17件保存 (累計 73件)
Accessing: https://www.his-vacation.com/area/kanagawa/5.html?kd=h_3&sr=pop...
Page 5: 18件保存 (累計 91件)
Accessing: https://www.his-vacation.com/area/kanagawa/6.html?kd=h_3&sr=pop...
Page 6: 9件保存 (累計 100件)
全工程完了。合計 100件取得しました。


## 静岡県

In [8]:
import requests, sqlite3, time, re
from bs4 import BeautifulSoup

BASE_URL = 'https://www.his-vacation.com/area/shizuoka/'
QUERY = '?kd=h_3&sr=pop'
DB_NAME = 'accommodations.db'
HDR = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
LIMIT = 100

def scrape():
    conn = sqlite3.connect(DB_NAME)
    cur = conn.cursor()
    cur.execute('DROP TABLE IF EXISTS hotels')
    cur.execute('CREATE TABLE hotels (id INT, name TEXT, lat REAL, lon REAL, rating REAL, review_count INT, price INT)')

    total_count = 0
    page = 1
    
    while total_count < LIMIT:
        url = f"{BASE_URL}{f'{page}.html' if page > 1 else ''}{QUERY}"
        print(f"Accessing: {url}...")
        
        try:
            res = requests.get(url, headers=HDR)
            res.raise_for_status()
            soup = BeautifulSoup(res.content, 'html.parser')
            
            # 修正：result-listの中のliをすべて取得
            items = soup.select('#result-list > li')
            if not items:
                print("宿のリストが見つかりませんでした。")
                break

            rows = []
            for it in items:
                if total_count >= LIMIT: break
                
                try:
                    # 1. ID (data-value)
                    id_tag = it.select_one('p.ut_btn_clip a')
                    hid = id_tag['data-value'] if id_tag else None
                    
                    # 2. 宿名
                    name_tag = it.select_one('h2.ut_name')
                    name = name_tag.get_text(strip=True) if name_tag else "不明"
                    
                    # 3. 評価
                    rate_tag = it.select_one('span.ut_average')
                    rate = float(rate_tag.text) if rate_tag else 0.0
                    
                    # 4. レビュー数
                    rev_tag = it.select_one('[itemprop="reviewCount"]')
                    rev = int(re.sub(r'\D', '', rev_tag.text)) if rev_tag else 0
                    
                    # 5. 大人1名価格 (p.ut_personを狙い撃ち)
                    price_tag = it.select_one('p.ut_person')
                    if price_tag:
                        # 「：」で区切って後半を取得（大人1名の「1」を避ける）
                        p_text = price_tag.get_text()
                        price_val = int(re.sub(r'\D', '', p_text.split('：')[-1]))
                    else:
                        price_val = 0

                    if hid:
                        rows.append((int(hid), name, None, None, rate, rev, price_val))
                        total_count += 1
                except Exception as e:
                    continue

            if rows:
                cur.executemany('INSERT INTO hotels VALUES (?,?,?,?,?,?,?)', rows)
                conn.commit()
                print(f"Page {page}: {len(rows)}件保存 (累計 {total_count}件)")
            else:
                break
                
            page += 1
            time.sleep(1.5)

        except Exception as e:
            print(f"エラーが発生しました: {e}")
            break

    conn.close()
    print(f"全工程完了。合計 {total_count}件取得しました。")

if __name__ == '__main__':
    scrape()

Accessing: https://www.his-vacation.com/area/shizuoka/?kd=h_3&sr=pop...
Page 1: 19件保存 (累計 19件)
Accessing: https://www.his-vacation.com/area/shizuoka/2.html?kd=h_3&sr=pop...
Page 2: 20件保存 (累計 39件)
Accessing: https://www.his-vacation.com/area/shizuoka/3.html?kd=h_3&sr=pop...
Page 3: 17件保存 (累計 56件)
Accessing: https://www.his-vacation.com/area/shizuoka/4.html?kd=h_3&sr=pop...
Page 4: 19件保存 (累計 75件)
Accessing: https://www.his-vacation.com/area/shizuoka/5.html?kd=h_3&sr=pop...
Page 5: 18件保存 (累計 93件)
Accessing: https://www.his-vacation.com/area/shizuoka/6.html?kd=h_3&sr=pop...
Page 6: 7件保存 (累計 100件)
全工程完了。合計 100件取得しました。


## 山梨県

In [11]:
import requests, sqlite3, time, re
from bs4 import BeautifulSoup

BASE_URL = 'https://www.his-vacation.com/area/yamanashi/'
QUERY = '?kd=h_3&sr=pop'
DB_NAME = 'accommodations.db'
HDR = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
LIMIT = 100

def scrape():
    conn = sqlite3.connect(DB_NAME)
    cur = conn.cursor()
    # cur.execute('DROP TABLE IF EXISTS hotels')
    cur.execute('CREATE TABLE IF NOT EXISTS hotels (id INT, name TEXT, lat REAL, lon REAL, rating REAL, review_count INT, price INT)')

    total_count = 0
    page = 1
    
    while total_count < LIMIT:
        url = f"{BASE_URL}{f'{page}.html' if page > 1 else ''}{QUERY}"
        print(f"Accessing: {url}...")
        
        try:
            res = requests.get(url, headers=HDR)
            res.raise_for_status()
            soup = BeautifulSoup(res.content, 'html.parser')
            
            # 修正：result-listの中のliをすべて取得
            items = soup.select('#result-list > li')
            if not items:
                print("宿のリストが見つかりませんでした。")
                break

            rows = []
            for it in items:
                if total_count >= LIMIT: break
                
                try:
                    # 1. ID (data-value)
                    id_tag = it.select_one('p.ut_btn_clip a')
                    hid = id_tag['data-value'] if id_tag else None
                    
                    # 2. 宿名
                    name_tag = it.select_one('h2.ut_name')
                    name = name_tag.get_text(strip=True) if name_tag else "不明"
                    
                    # 3. 評価
                    rate_tag = it.select_one('span.ut_average')
                    rate = float(rate_tag.text) if rate_tag else 0.0
                    
                    # 4. レビュー数
                    rev_tag = it.select_one('[itemprop="reviewCount"]')
                    rev = int(re.sub(r'\D', '', rev_tag.text)) if rev_tag else 0
                    
                    # 5. 大人1名価格 (p.ut_personを狙い撃ち)
                    price_tag = it.select_one('p.ut_person')
                    if price_tag:
                        # 「：」で区切って後半を取得（大人1名の「1」を避ける）
                        p_text = price_tag.get_text()
                        price_val = int(re.sub(r'\D', '', p_text.split('：')[-1]))
                    else:
                        price_val = 0

                    if hid:
                        rows.append((int(hid), name, None, None, rate, rev, price_val))
                        total_count += 1
                except Exception as e:
                    continue

            if rows:
                cur.executemany('INSERT INTO hotels VALUES (?,?,?,?,?,?,?)', rows)
                conn.commit()
                print(f"Page {page}: {len(rows)}件保存 (累計 {total_count}件)")
            else:
                break
                
            page += 1
            time.sleep(1.5)

        except Exception as e:
            print(f"エラーが発生しました: {e}")
            break

    conn.close()
    print(f"全工程完了。合計 {total_count}件取得しました。")

if __name__ == '__main__':
    scrape()

Accessing: https://www.his-vacation.com/area/yamanashi/?kd=h_3&sr=pop...
Page 1: 19件保存 (累計 19件)
Accessing: https://www.his-vacation.com/area/yamanashi/2.html?kd=h_3&sr=pop...
Page 2: 18件保存 (累計 37件)
Accessing: https://www.his-vacation.com/area/yamanashi/3.html?kd=h_3&sr=pop...
Page 3: 20件保存 (累計 57件)
Accessing: https://www.his-vacation.com/area/yamanashi/4.html?kd=h_3&sr=pop...
Page 4: 17件保存 (累計 74件)
Accessing: https://www.his-vacation.com/area/yamanashi/5.html?kd=h_3&sr=pop...
Page 5: 16件保存 (累計 90件)
Accessing: https://www.his-vacation.com/area/yamanashi/6.html?kd=h_3&sr=pop...
Page 6: 10件保存 (累計 100件)
全工程完了。合計 100件取得しました。


# 各宿に対して座標をつける

In [2]:
import sqlite3
import csv

def export_names_to_csv():
    db_name = 'accommodations.db'
    output_file = 'hotel_names.csv'
    
    try:
        # DB接続
        conn = sqlite3.connect(db_name)
        cur = conn.cursor()
        
        # 名前だけを取得
        cur.execute("SELECT name FROM hotels")
        rows = cur.fetchall()
        
        # CSV書き出し
        with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
            writer = csv.writer(f)
            writer.writerow(['宿名']) # ヘッダー
            writer.writerows(rows)
            
        print(f"成功: {len(rows)}件の宿名を '{output_file}' に保存しました。")
        
    except Exception as e:
        print(f"エラーが発生しました: {e}")
    finally:
        conn.close()

if __name__ == '__main__':
    export_names_to_csv()

成功: 300件の宿名を 'hotel_names.csv' に保存しました。


## 宿名をCSVにし、スプレッドシートにインポートしてから拡張機能を使い座標をつける。完成したCSVファイルから元のDBに座標を置換

In [6]:
import sqlite3
import pandas as pd
import numpy as np

def safe_import_coords():
    db_name = 'accommodations.db'
    csv_name = 'hotel_coords.csv' # スプレッドシートからDLしたファイル名
    
    try:
        # ヘッダー名に依存せず「列番号」で読み込む (0番目:宿名, 6番目:lat, 7番目:lon)
        # スプレッドシートのA, G, H列を想定
        df = pd.read_csv(csv_name, usecols=[0, 6, 7], names=['name', 'lat', 'lon'], header=0)
        
        # NaNをNoneに変換
        df = df.replace({np.nan: None})

        conn = sqlite3.connect(db_name)
        cur = conn.cursor()

        print("流し込み開始...")
        success_count = 0
        
        for _, row in df.iterrows():
            if row['name']:
                # UPDATE文の実行
                cur.execute("""
                    UPDATE hotels 
                    SET lat = ?, lon = ? 
                    WHERE name = ?
                """, (row['lat'], row['lon'], row['name']))
                success_count += 1
        
        conn.commit()
        print(f"完了！ {success_count}件の照合を行い、DBを更新しました。")

        # 最終チェック表示
        print("\n--- DBの中身（最新5件） ---")
        cur.execute("SELECT name, lat, lon FROM hotels WHERE lat IS NOT NULL LIMIT 5")
        for r in cur.fetchall():
            print(f"宿名: {r[0][:15]}... | 座標: {r[1]}, {r[2]}")

    except Exception as e:
        print(f"エラー発生: {e}\n※CSVの列がA, G, Hになっているか再確認してください。")
    finally:
        if 'conn' in locals():
            conn.close()

if __name__ == '__main__':
    safe_import_coords()

流し込み開始...
完了！ 300件の照合を行い、DBを更新しました。

--- DBの中身（最新5件） ---
宿名: 熱海温泉　熱海ニューフジヤホテ... | 座標: 35.0972758, 139.0724481
宿名: グランドメルキュール浜名湖リゾ... | 座標: 34.7069647, 137.6154507
宿名: 絶景の癒しの湯宿　茄子のはな... | 座標: 34.8985612, 139.0956342
宿名: 熱海温泉　伊東園ホテル熱海館... | 座標: 35.1020191, 139.0765092
宿名: 亀の井ホテル　熱海... | 座標: 35.0923395, 139.063659
