In [None]:
import sqlite3
import math
import pandas as pd

def haversine(lat1, lon1, lat2, lon2):
    # 地球の半径 (km)
    R = 6371.0
    
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat / 2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

def analyze_correlation(radius_km=3.0):
    # 1. データの読み込み
    conn_h = sqlite3.connect('accommodations.db')
    conn_a = sqlite3.connect('attractions.db')
    
    hotels = pd.read_sql_query("SELECT name, rating, lat, lon FROM hotels WHERE lat IS NOT NULL", conn_h)
    attractions = pd.read_sql_query("SELECT lat, lon FROM attractions", conn_a)
    
    conn_h.close()
    conn_a.close()

    # 2. 各宿について範囲内の観光地をカウント
    counts = []
    for _, hotel in hotels.iterrows():
        count = 0
        for _, attr in attractions.iterrows():
            dist = haversine(hotel['lat'], hotel['lon'], attr['lat'], attr['lon'])
            if dist <= radius_km:
                count += 1
        counts.append(count)
    
    hotels['attr_count'] = counts

    # 3. 分析：相関係数の算出
    correlation = hotels['attr_count'].corr(hotels['rating'])
    
    print(f"--- 分析結果 (半径 {radius_km}km) ---")
    print(f"相関係数: {correlation:.4f}")
    
    if abs(correlation) < 0.2:
        print("結果: ほとんど相関が見られません。")
    elif abs(correlation) < 0.4:
        print("結果: 弱い相関があります。")
    else:
        print("結果: 強い相関があります！仮説的中かもしれません。")

    # 上位5件を表示
    print("\n--- 観光地数が多い宿 TOP 5 ---")
    print(hotels.sort_values('attr_count', ascending=False)[['name', 'attr_count', 'rating']].head())

    return hotels

if __name__ == '__main__':
    # 半径3kmで分析実行
    df_result = analyze_correlation(radius_km=3.0)