In [1]:
import os
import re
import time
import datetime
from tqdm import tqdm 
import pandas as pd
import requests

In [2]:
import sys
from pathlib import Path
project_root = Path.cwd()  # 找出根目錄：Path.cwd()找出現在所在目錄(/run).parent(上一層是notebook).parent(再上層一層business_district_discovery)
print(project_root)
sys.path.append(str(project_root))

from config import plvrurls_1140416, plvr_column_names
from utils import combined_df, parse_admin_region

c:\PyWorkspace\presale-scraper\presale_scraper


In [None]:
import_name = 'plvrurls_1140416'
input_time = import_name.split('_')[1]  # 提取 "1140416" 部分

df = combined_df(plvrurls_1140416, input_time)

In [None]:
if not os.path.exists('../data'):
    os.makedirs('../data')
    
# 使用今天的日期作為檔名的一部分
today = datetime.datetime.now().strftime('%Y%m%d')
df.to_csv(f'../data/plvr_rawdata_{today}.csv', index=False, encoding='utf-8-sig')

In [8]:
df = pd.read_csv('../data/plvr_rawdata_20250417.csv', encoding='utf-8-sig', dtype={'AA12': 'str', 'cp': 'str', 's': 'str'})

In [9]:
df = df.rename(columns=plvr_column_names, inplace=False)

In [10]:
proc_df = df.copy()
proc_df = proc_df[["縣市", "坐落街道", "建物型態", "社區名稱", "棟號", "交易日期", "總面積", "交易總價",  
"建物單價", "樓層", "解約情形", "備查編號", "備註", "主要用途", "使用分區", "車位總價", "車位筆數",  "經度", "緯度"]]

In [11]:
# 在 proc_df 裡面新增一個「行政區」欄位
proc_df["行政區"] = proc_df["坐落街道"].apply(parse_admin_region)
# 新增「交易年月」欄位
proc_df['交易年月'] = proc_df['交易日期'].str.replace(r'(\d+)/(\d+)/\d+', r'\1\2', regex=True)

# 「交易總價」及「交易單價」由字串轉為數值
proc_df['交易總價'] = pd.to_numeric(proc_df['交易總價'].str.replace(',', ''), errors='coerce')
proc_df['交易總價'] = (proc_df['交易總價'] / 10000).round(0).astype(int)

proc_df['建物單價'] = pd.to_numeric(proc_df['建物單價'].str.replace(',', ''), errors='coerce')
proc_df['建物單價'] = (proc_df['建物單價'] / 10000).round(1).fillna(0).astype(int)
# proc_df[proc_df['備查編號'].notna()]
proc_df

Unnamed: 0,縣市,坐落街道,建物型態,社區名稱,棟號,交易日期,總面積,交易總價,建物單價,樓層,...,備查編號,備註,主要用途,使用分區,車位總價,車位筆數,經度,緯度,行政區,交易年月
0,臺北市,中山區吉林路、民生東路口,住宅大樓(11層含以上有電梯),嘉潤一御,C-11F號,114/02/20,19.07,2520,132,11/11,...,,,住家用,商,,0,121.530063,25.057760,中山區,11402
1,臺北市,萬華區雙園里莒光路347巷,住宅大樓(11層含以上有電梯),中正城閱.,甲棟C1-4F號,114/02/22,55.66,4136,84,4/14,...,,建照資料於110.05.24變更,住家用,住,300,1,121.497135,25.030122,萬華區,11402
2,臺北市,中正區愛國西路,住宅大樓(11層含以上有電梯),南風裡,A號,114/02/04,33.16,4245,128,6/14,...,,,住家用,商,,0,121.508015,25.037244,中正區,11402
3,臺北市,萬華區中華路一段,住宅大樓(11層含以上有電梯),ASTER ONE,A12-02F號,114/02/03,21.64,2449,113,2/15,...,,,住商用,商,,0,121.507376,25.040831,萬華區,11402
4,臺北市,萬華區中華路一段,住宅大樓(11層含以上有電梯),ASTER ONE,A12-03F號,114/02/03,39.83,3380,116,3/15,...,,"此戶房價含露臺,面積另計1.24坪",住商用,商,400,1,121.507376,25.040831,萬華區,11402
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
412147,花蓮縣,花蓮市富裕一街,華廈(10層含以下有電梯),璽鈺亞崴,A棟3樓C號,110/03/06,35.70,737,23,3/10,...,,,住家用,住,120,1,121.596907,23.995866,花蓮市,11003
412148,花蓮縣,花蓮市富裕一街,華廈(10層含以下有電梯),璽鈺亞崴,A棟2樓C號,110/03/06,35.70,691,22,2/10,...,,,住家用,住,100,1,121.596907,23.995866,花蓮市,11003
412149,花蓮縣,吉安鄉昌隆三街,華廈(10層含以下有電梯),欣悅敦峰,B棟02號,110/03/07,48.48,940,22,5/10,...,,,住家用,住,120,1,121.581537,23.969786,吉安鄉,11003
412150,花蓮縣,吉安鄉昌隆三街,華廈(10層含以下有電梯),欣悅敦峰,B棟02號,110/03/31,35.93,785,21,4/10,...,,,住家用,住,,0,121.581537,23.969786,吉安鄉,11003
