#### Import

In [1]:
import duckdb
con = duckdb.connect('data.db')

In [2]:
# show table
con.sql('show tables')

┌─────────────────────────────┐
│            name             │
│           varchar           │
├─────────────────────────────┤
│ Bus_Stop_Info               │
│ Business_Operation          │
│ MRT_Business_Area           │
│ MRT_Event                   │
│ MRT_Flow_Record             │
│ MRT_Station_Info            │
│ Representative              │
│ Shop_Rental_Listing         │
│ Ubike_Station_Info          │
│ Ubike_Station_Rental_Record │
│ Village_Info                │
│ Village_Population_By_Age   │
├─────────────────────────────┤
│           12 rows           │
└─────────────────────────────┘

## 查詢周邊商圈、租房資訊

### 商圈平均租金以及周邊捷運站

In [3]:
con.sql("""--sql
    WITH nearest_stations AS (
    SELECT 
        s.district,
        s.case_name,
        s.address,
        s.monthly_rent,
        s.area_ping,
        m.station_id,
        m.station_name,
        MIN(
            6371 * ACOS(
                COS(RADIANS(s.latitude)) * COS(RADIANS(m.latitude)) *
                COS(RADIANS(m.longitude) - RADIANS(s.longitude)) +
                SIN(RADIANS(s.latitude)) * SIN(RADIANS(m.latitude))
            )
        ) AS nearest_distance_km
    FROM Shop_Rental_Listing s
    CROSS JOIN MRT_Station_Info m
    GROUP BY s.district, s.case_name, s.address, s.monthly_rent, s.area_ping, m.station_id, m.station_name
    HAVING nearest_distance_km <= 1
)
SELECT 
    mba.name , -- 商圈名稱
    ROUND(AVG(ns.monthly_rent)) AS average_monthly_rent,
    ns.station_name,
    mba.tag ,
    ns.district
FROM nearest_stations ns
JOIN MRT_Business_Area mba ON ns.station_id = mba.station_id -- 加入商圈數據
GROUP BY mba.name, mba.tag, ns.station_name, ns.district
ORDER BY ns.district, ns.station_name; 
""")

┌────────────────────────┬──────────────────────┬──────────────┬──────────┬──────────┐
│          name          │ average_monthly_rent │ station_name │   tag    │ district │
│        varchar         │        double        │   varchar    │ varchar  │ varchar  │
├────────────────────────┼──────────────────────┼──────────────┼──────────┼──────────┤
│ 條通(商圈)             │             183482.0 │ 中山         │ 吃在商圈 │ 中山區   │
│ 晶華酒店、欣欣百貨周邊 │             183482.0 │ 中山         │ 玩在商圈 │ 中山區   │
│ 中山捷運站周邊         │             183482.0 │ 中山         │ 買在商圈 │ 中山區   │
│ 中山北路婚紗(商圈)     │             183482.0 │ 中山         │ 玩在商圈 │ 中山區   │
│ 赤峰街 (商圈)          │             183482.0 │ 中山         │ 玩在商圈 │ 中山區   │
│ 朝陽服飾材料(商圈)     │             183482.0 │ 中山         │ 買在商圈 │ 中山區   │
│ 中山國中捷運站周邊     │             172210.0 │ 中山國中     │ 吃在商圈 │ 中山區   │
│ 大直(商圈)             │             252600.0 │ 劍南路       │ 玩在商圈 │ 中山區   │
│ 南京復興捷運站周邊     │             268530.0 │ 南京復興     │ 吃在商圈 │ 中山區   │
│ 華陰街(商圈)           

In [5]:
con.sql("""--sql
    SELECT s.address, s.monthly_rent, ROUND(s.monthly_rent/area_ping) as monthly_rent_per_ping, s.area_ping, s.shop_floor, s.total_floor,
           s.deposit, r.name, r.phone
    FROM Shop_Rental_Listing s
    LEFT JOIN Representative r ON s.phone = r.phone
    --WHERE r.name IS NULL
""")

┌──────────────────────────┬──────────────┬───────────────────────┬───────────┬────────────┬─────────────┬─────────┬──────────────────────┬────────────┐
│         address          │ monthly_rent │ monthly_rent_per_ping │ area_ping │ shop_floor │ total_floor │ deposit │         name         │   phone    │
│         varchar          │    int64     │        double         │   int64   │  varchar   │    int64    │  int64  │       varchar        │  varchar   │
├──────────────────────────┼──────────────┼───────────────────────┼───────────┼────────────┼─────────────┼─────────┼──────────────────────┼────────────┤
│ 台北市士林區文林路       │        84000 │                4421.0 │        19 │ 1~2        │           3 │  168000 │ 曾先生               │ 0228839111 │
│ 台北市士林區天母東路     │       120000 │                2182.0 │        55 │ 1~2        │           5 │  240000 │ 侯沛昌               │ 0227721010 │
│ 台北市大安區敦化南路一段 │       470000 │                4947.0 │        95 │ 1          │          23 │  940000 │ 松信

## 店面資料與所在商圈及鄰近捷運站

In [None]:
con.sql("""--sql
    WITH distances AS (
    SELECT
        s.district,
        s.village,
        s.case_name,
        s.monthly_rent,
        s.area_ping,
        s.address,
        m.station_id,
        m.station_name,
        (
            6371 * ACOS(
                COS(RADIANS(s.latitude)) * COS(RADIANS(m.latitude)) *
                COS(RADIANS(m.longitude) - RADIANS(s.longitude)) +
                SIN(RADIANS(s.latitude)) * SIN(RADIANS(m.latitude))
            )
        ) AS distance_km
    FROM Shop_Rental_Listing s
    CROSS JOIN MRT_Station_Info m
),
nearest_stations AS (
    SELECT 
        d.district,
        d.case_name,
        d.address,
        d.village,
        d.station_id,
        d.station_name,
        d.monthly_rent,
        d.area_ping,
        MIN(d.distance_km) AS nearest_distance_km
    FROM distances d
    GROUP BY d.district, d.case_name, d.address, d.village, d.station_id, d.station_name, d.monthly_rent, d.area_ping
    HAVING MIN(d.distance_km) <= 1
)
SELECT 
    ns.district,
    ns.village,
    ns.address,
    ns.case_name,
    ns.station_id,
    ns.station_name,
    ns.monthly_rent,
    ns.area_ping,
    mba.name, -- 商圈名稱
    mba.tag
FROM nearest_stations ns
JOIN MRT_Business_Area mba ON ns.station_id = mba.station_id -- 加入商圈數據
ORDER BY ns.village, ns.nearest_distance_km ASC;
""")

┌──────────┬─────────┬──────────────────────────┬───────────────────────────────────────────┬────────────┬──────────────┬──────────────┬───────────┬──────────────────────┬──────────┐
│ district │ village │         address          │                 case_name                 │ station_id │ station_name │ monthly_rent │ area_ping │         name         │   tag    │
│ varchar  │ varchar │         varchar          │                  varchar                  │  varchar   │   varchar    │    int64     │   int64   │       varchar        │ varchar  │
├──────────┼─────────┼──────────────────────────┼───────────────────────────────────────────┼────────────┼──────────────┼──────────────┼───────────┼──────────────────────┼──────────┤
│ 南港區   │ 三重里  │ 台北市南港區經貿二路     │ ✯經貿聯合廣場-南港軟體園區站旁商業金店面✯ │ BR23       │ 南港軟體園區 │       648000 │       121 │ 中國信託金融園區     │ 玩在商圈 │
│ 南港區   │ 三重里  │ 台北市南港區經貿二路     │ 經貿園區挑高金店面                        │ BR23       │ 南港軟體園區 │       110000 │        44 │ 中國信託金融園區     │ 玩在商圈 

## 店面附近捷運站近兩年平均人流

In [7]:
con.sql("""--sql
    WITH distances AS (
    SELECT
        s.district,
        s.village,
        s.case_name,
        m.station_id,
        m.station_name,
        (
            6371 * ACOS(
                COS(RADIANS(s.latitude)) * COS(RADIANS(m.latitude)) *
                COS(RADIANS(m.longitude) - RADIANS(s.longitude)) +
                SIN(RADIANS(s.latitude)) * SIN(RADIANS(m.latitude))
            )
        ) AS distance_km
    FROM Shop_Rental_Listing s
    CROSS JOIN MRT_Station_Info m
),
flow_data AS (
    SELECT
        mf.time_period,
        mf.station_id, 
        mi.station_name, 
        ROUND(AVG(mf.entrance_count + mf.exit_count)) AS avg_flow
    FROM MRT_Flow_Record mf
    LEFT JOIN MRT_Station_Info mi ON mf.station_id = mi.station_id
    WHERE mf.date >= CURRENT_DATE - INTERVAL '2 years' -- 限制為最近兩年
    AND mf.time_period NOT BETWEEN 2 AND 5 -- 過濾時段
    GROUP BY mf.station_id, mi.station_name, mf.time_period 
),
nearest_stations AS (
    SELECT 
        d.district,
        d.case_name,
        d.village,
        d.station_id,
        d.station_name,
        MIN(d.distance_km) AS nearest_distance_km
    FROM distances d
    GROUP BY d.district, d.case_name, d.village, d.station_id, d.station_name
    HAVING MIN(d.distance_km) <= 1
)
SELECT 
    ns.district,
    ns.case_name,
    ns.village,
    ns.station_id,
    ns.station_name,
    ns.nearest_distance_km,
    fd.time_period,
    fd.avg_flow
FROM nearest_stations ns
JOIN flow_data fd ON ns.station_id = fd.station_id
ORDER BY ns.village, ns.nearest_distance_km, fd.time_period ASC;    
""")

┌──────────┬──────────────────────┬─────────┬────────────┬──────────────┬─────────────────────┬─────────────┬──────────┐
│ district │      case_name       │ village │ station_id │ station_name │ nearest_distance_km │ time_period │ avg_flow │
│ varchar  │       varchar        │ varchar │  varchar   │   varchar    │       double        │    int64    │  double  │
├──────────┼──────────────────────┼─────────┼────────────┼──────────────┼─────────────────────┼─────────────┼──────────┤
│ 北投區   │ 全新黃金店面Ｓ６     │ 一德里  │ R25        │ 關渡         │ 0.18402785721499842 │           0 │    112.0 │
│ 北投區   │ 全新黃金店面Ｓ６     │ 一德里  │ R25        │ 關渡         │ 0.18402785721499842 │           1 │      8.0 │
│ 北投區   │ 全新黃金店面Ｓ６     │ 一德里  │ R25        │ 關渡         │ 0.18402785721499842 │           6 │    411.0 │
│ 北投區   │ 全新黃金店面Ｓ６     │ 一德里  │ R25        │ 關渡         │ 0.18402785721499842 │           7 │   1513.0 │
│ 北投區   │ 全新黃金店面Ｓ６     │ 一德里  │ R25        │ 關渡         │ 0.18402785721499842 │           8 │   

## 店面附近 Ubike 近兩年平均人流

In [48]:
con.sql("""--sql
    WITH distances AS (
    SELECT
        s.district,
        s.village,
        s.case_name,
        m.station_id,
        m.station_name,
        (
            6371 * ACOS(
                COS(RADIANS(s.latitude)) * COS(RADIANS(m.latitude)) *
                COS(RADIANS(m.longitude) - RADIANS(s.longitude)) +
                SIN(RADIANS(s.latitude)) * SIN(RADIANS(m.latitude))
            )
        ) AS distance_km
    FROM Shop_Rental_Listing s
    CROSS JOIN Ubike_Station_Info m
),
flow_data AS (
    SELECT
        mf.time_period,
        mf.station_id, 
        mi.station_name, 
        ROUND(AVG(mf.rent_count + mf.return_count)) AS avg_flow
    FROM Ubike_Station_Rental_Record mf
    LEFT JOIN Ubike_Station_Info mi ON mf.station_id = mi.station_id
    WHERE mf.date >= CURRENT_DATE - INTERVAL '2 years' -- 限制為最近兩年
    GROUP BY mf.station_id, mi.station_name, mf.time_period 
),
nearest_stations AS (
    SELECT 
        d.district,
        d.case_name,
        d.village,
        d.station_id,
        d.station_name,
        MIN(d.distance_km) AS nearest_distance_km
    FROM distances d
    GROUP BY d.district, d.case_name, d.village, d.station_id, d.station_name
    HAVING MIN(d.distance_km) <= 1
)
SELECT 
    ns.district,
    ns.case_name,
    ns.village,
    ns.station_id,
    ns.station_name,
    ns.nearest_distance_km,
    fd.time_period,
    fd.avg_flow
FROM nearest_stations ns
JOIN flow_data fd ON ns.station_id = fd.station_id
ORDER BY ns.village, ns.nearest_distance_km, fd.time_period ASC;    
""")

┌──────────┬────────────────────┬─────────┬────────────┬─────────────────────┬─────────────────────┬─────────────┬──────────┐
│ district │     case_name      │ village │ station_id │    station_name     │ nearest_distance_km │ time_period │ avg_flow │
│ varchar  │      varchar       │ varchar │   int64    │       varchar       │       double        │    int64    │  double  │
├──────────┼────────────────────┼─────────┼────────────┼─────────────────────┼─────────────────────┼─────────────┼──────────┤
│ 北投區   │ 全新黃金店面Ｓ６   │ 一德里  │  500109011 │ 捷運關渡站(1號出口) │ 0.15240317944583845 │           0 │     11.0 │
│ 北投區   │ 全新黃金店面Ｓ６   │ 一德里  │  500109011 │ 捷運關渡站(1號出口) │ 0.15240317944583845 │           1 │      7.0 │
│ 北投區   │ 全新黃金店面Ｓ６   │ 一德里  │  500109011 │ 捷運關渡站(1號出口) │ 0.15240317944583845 │           2 │      6.0 │
│ 北投區   │ 全新黃金店面Ｓ６   │ 一德里  │  500109011 │ 捷運關渡站(1號出口) │ 0.15240317944583845 │           3 │      6.0 │
│ 北投區   │ 全新黃金店面Ｓ６   │ 一德里  │  500109011 │ 捷運關渡站(1號出口) │ 0.15240317944583845 │  

## MRT 人流

In [30]:
con.sql("""--sql
    SELECT m.station_id, m.date, m.time_period, mi.station_name, mi.address, mi.longitude, mi.latitude, SUM(m.entrance_count + m.exit_count) as flow
    FROM MRT_Flow_Record m
    LEFT JOIN MRT_Station_Info mi ON m.station_id = mi.station_id
    WHERE time_period NOT BETWEEN 2 AND 5
    GROUP BY m.station_id, m.date, m.time_period, mi.station_name, mi.address, mi.longitude, mi.latitude
    ORDER BY m.station_id ASC, m.date ASC, m.time_period ASC
""")

┌────────────┬────────────┬─────────────┬──────────────┬────────────────────────────────┬───────────┬──────────┬────────┐
│ station_id │    date    │ time_period │ station_name │            address             │ longitude │ latitude │  flow  │
│  varchar   │    date    │    int64    │   varchar    │            varchar             │  double   │  double  │ int128 │
├────────────┼────────────┼─────────────┼──────────────┼────────────────────────────────┼───────────┼──────────┼────────┤
│ BL01       │ 2017-01-01 │           0 │ 頂埔         │ 新北市土城區中央路四段51-6號B3 │  121.4205 │ 24.96012 │    182 │
│ BL01       │ 2017-01-01 │           1 │ 頂埔         │ 新北市土城區中央路四段51-6號B3 │  121.4205 │ 24.96012 │      9 │
│ BL01       │ 2017-01-01 │           6 │ 頂埔         │ 新北市土城區中央路四段51-6號B3 │  121.4205 │ 24.96012 │    221 │
│ BL01       │ 2017-01-01 │           7 │ 頂埔         │ 新北市土城區中央路四段51-6號B3 │  121.4205 │ 24.96012 │    395 │
│ BL01       │ 2017-01-01 │           8 │ 頂埔         │ 新北市土城區中央路四段51-6號B3 │  121

## UBike 人流

In [31]:
con.sql("""--sql
    SELECT u.station_id, u.date, u.time_period, ui.station_name, ui.address, ui.longitude, ui.latitude, SUM(u.rent_count + u.return_count) as flow
    FROM Ubike_Station_Rental_Record u
    LEFT JOIN Ubike_Station_Info ui ON u.station_id = ui.station_id
    GROUP BY u.station_id, u.date, u.time_period, ui.station_name, ui.address, ui.longitude, ui.latitude
    ORDER BY u.station_id ASC, u.date ASC, u.time_period ASC
""")

┌────────────┬────────────┬─────────────┬─────────────────────┬─────────────────────────────┬───────────┬──────────┬────────┐
│ station_id │    date    │ time_period │    station_name     │           address           │ longitude │ latitude │  flow  │
│   int64    │    date    │    int64    │       varchar       │           varchar           │  double   │  double  │ int128 │
├────────────┼────────────┼─────────────┼─────────────────────┼─────────────────────────────┼───────────┼──────────┼────────┤
│  500101001 │ 2022-12-31 │          16 │ 捷運科技大樓站      │ 大安區復興南路二段235號前   │  121.5436 │ 25.02605 │     13 │
│  500101001 │ 2022-12-31 │          17 │ 捷運科技大樓站      │ 大安區復興南路二段235號前   │  121.5436 │ 25.02605 │      7 │
│  500101001 │ 2022-12-31 │          18 │ 捷運科技大樓站      │ 大安區復興南路二段235號前   │  121.5436 │ 25.02605 │      3 │
│  500101001 │ 2022-12-31 │          19 │ 捷運科技大樓站      │ 大安區復興南路二段235號前   │  121.5436 │ 25.02605 │      5 │
│  500101001 │ 2022-12-31 │          20 │ 捷運科技大樓站      │ 大安區復興南路

## 村裡人口、年齡、性別

In [66]:
con.sql("""--sql
    SELECT vi.district, vi.village, vi.household_count, vi.avg_income,
    ROUND(AVG(vi.avg_income) OVER (PARTITION BY vi.district)) AS nearby_avg_income, vi.median_income,
    ROUND(AVG(vi.median_income) OVER (PARTITION BY vi.district)) AS nearby_median_income,
    ROUND(vi.male_population * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.village), 4) AS male_population_ratio, 
    ROUND(vi.female_population * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.village), 4) AS female_population_ratio, 
    ROUND(v.age_0_9 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.village), 4) AS avg_0_9_ratio, 
    ROUND(v.age_10_19 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.village), 4) AS avg_10_19_ratio, 
    ROUND(v.age_20_29 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.village), 4) AS avg_20_29_ratio, 
    ROUND(v.age_30_64 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.village), 4) AS avg_30_64_ratio, 
    ROUND(v.age_over_65 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.village), 4) AS avg_over_65_ratio,
    ROUND(v.age_0_9 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.district), 4) AS nearby_0_9_ratio, 
    ROUND(v.age_10_19 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.district), 4) AS nearby_10_19_ratio, 
    ROUND(v.age_20_29 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.district), 4) AS nearby_20_29_ratio, 
    ROUND(v.age_30_64 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.district), 4) AS nearby_30_64_ratio, 
    ROUND(v.age_over_65 * 1.0 / SUM(vi.male_population + vi.female_population) OVER (PARTITION BY vi.district), 4) AS nearby_over_65_ratio 
    FROM Village_Info vi
    LEFT JOIN Village_Population_By_Age v ON vi.district = v.district AND vi.village = v.village
""")

┌──────────┬─────────┬─────────────────┬────────────┬───────────────────┬───────────────┬──────────────────────┬───────────────────────┬─────────────────────────┬───────────────┬─────────────────┬─────────────────┬─────────────────┬───────────────────┬──────────────────┬────────────────────┬────────────────────┬────────────────────┬──────────────────────┐
│ district │ village │ household_count │ avg_income │ nearby_avg_income │ median_income │ nearby_median_income │ male_population_ratio │ female_population_ratio │ avg_0_9_ratio │ avg_10_19_ratio │ avg_20_29_ratio │ avg_30_64_ratio │ avg_over_65_ratio │ nearby_0_9_ratio │ nearby_10_19_ratio │ nearby_20_29_ratio │ nearby_30_64_ratio │ nearby_over_65_ratio │
│ varchar  │ varchar │      int64      │   int64    │      double       │     int64     │        double        │        double         │         double          │    double     │     double      │     double      │     double      │      double       │      double      │       double

## 競爭市場

In [54]:
con.sql("""--sql
    SELECT district, village, business_type, business_sub_type, COUNT(business_name) as shop_cnt, ROUND(AVG(capital)) as avg_capital
    FROM Business_Operation
    GROUP BY district, village, business_type, business_sub_type
""")

┌──────────┬─────────┬────────────────────────┬──────────────────────────────┬──────────┬─────────────┐
│ district │ village │     business_type      │      business_sub_type       │ shop_cnt │ avg_capital │
│ varchar  │ varchar │        varchar         │           varchar            │  int64   │   double    │
├──────────┼─────────┼────────────────────────┼──────────────────────────────┼──────────┼─────────────┤
│ 大安區   │ 光武里  │ 批發及零售業           │ 文教育樂用品零售業           │        2 │    150000.0 │
│ 中山區   │ 中庄里  │ 營建工程業             │ 建物完工裝修工程業           │        3 │    130000.0 │
│ 內湖區   │ 五分里  │ 住宿及餐飲業           │ 餐食業                       │       11 │    150727.0 │
│ 中山區   │ 朱園里  │ 教育業                 │ 其他教育業                   │        2 │    150000.0 │
│ 士林區   │ 福華里  │ 批發及零售業           │ 綜合商品零售業               │        1 │    100000.0 │
│ 松山區   │ 精忠里  │ 批發及零售業           │ 布疋及服飾品批發業           │        1 │    200000.0 │
│ 松山區   │ 精忠里  │ 批發及零售業           │ 文教育樂用品批發業           │        1 │  