In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import pyproj
import geopandas as gpd
from shapely.geometry import Point

In [2]:
train = pd.read_csv("30_Training Dataset_V2/training_data.csv")
train.head()

Unnamed: 0,ID,縣市,鄉鎮市區,路名,土地面積,使用分區,移轉層次,總樓層數,主要用途,主要建材,...,建物面積,車位面積,車位個數,橫坐標,縱坐標,備註,主建物面積,陽台面積,附屬建物面積,單價
0,TR-1,台北市,大安區,敦化南路二段,-0.256716,,11,11,住家用,鋼筋混凝土造,...,-0.174154,-0.819326,0.0,305266,2768378,,0.393926,0.1837,-0.438452,4.627714
1,TR-2,台北市,萬華區,水源路,0.100134,,7,12,住家用,鋼筋混凝土造,...,0.314204,-0.819326,0.0,300677,2767990,,-0.316131,0.608577,-0.438452,1.887258
2,TR-3,高雄市,鳳山區,北忠街,0.181921,,10,15,集合住宅,其他,...,0.423366,0.161624,1.0,184815,2504666,,-0.098871,-0.36062,1.525881,1.489072
3,TR-4,新北市,新莊區,福前街,0.085594,,9,14,集合住宅,鋼筋混凝土造,...,0.164249,0.524653,1.0,296653,2772355,,-0.071147,0.315088,0.231984,2.051217
4,TR-5,新北市,板橋區,文化路一段,-0.938116,,41,43,住家用,鋼骨造,...,0.985839,0.532377,1.0,297377,2768472,,0.791954,1.7194,-0.438452,3.269198


# 計算最近設施實際距離

In [3]:
facility_data = pd.read_csv("landmark.csv")
facility_data.head(5)

Unnamed: 0,縣市代號,縣市,大類別,小類別代號,小類別名稱,地標名稱,緯度,經度
0,A,臺北市,交通運輸,11,火車站,臺北市臺鐵南港車站,25.053541,121.607264
1,A,臺北市,交通運輸,11,火車站,臺北市臺鐵松山車站,25.049244,121.578933
2,A,臺北市,交通運輸,11,火車站,臺北市臺鐵臺北車站,25.047379,121.516891
3,A,臺北市,交通運輸,11,火車站,松山火車站,25.049388,121.579563
4,A,臺北市,交通運輸,11,火車站,南港火車站,25.053683,121.606943


In [5]:
facility_data.shape

(58323, 8)

## 篩選重要設施
- 交通：火車站、客運站、捷運站、高鐵站
- 教育：國小、國中、幼稚園、托兒所
- 公園：一般公園、里鄰公園
- 娛樂：娛樂設施、商店、超級市場、大型購物中心、百貨公司
- 其他：醫療保健、金融機構、加油站
- 嫌惡設施：產業用水及設施、工廠、主要道路建設

In [4]:
facility_data['小類別名稱'].unique()

array(['火車站', '客運站', '捷運站', '飯店', '碼頭', '高鐵站', '航空站', '主要道路建設', '國小',
       '國中', '高中', '專科', '大學及研究所', '幼稚園、托兒所', '傳統市場', '生活百貨量販', '一般公園',
       '里鄰公園', '觀光遊憩設施', '停車場地', '服務性設施', '行政機關', '醫療保健', '加油站', '百貨公司',
       '金融機構', '娛樂設施', '電力資源', '產業用水及設施', '工廠', '商店', '超級市場', '大型購物中心'],
      dtype=object)

In [17]:
data = {}
for i in facility_data['小類別名稱'].unique():
    data['最近'+i] = []
df = pd.DataFrame(data)
df

Unnamed: 0,最近火車站,最近客運站,最近捷運站,最近飯店,最近碼頭,最近高鐵站,最近航空站,最近主要道路建設,最近國小,最近國中,...,最近加油站,最近百貨公司,最近金融機構,最近娛樂設施,最近電力資源,最近產業用水及設施,最近工廠,最近商店,最近超級市場,最近大型購物中心


In [19]:
def train_coordination(x):
  x1, y1 = x['橫坐標'] ,x['縱坐標']
  proj = pyproj.Transformer.from_crs(3826, 4326, always_xy=True)
  x2, y2 = proj.transform(x1, y1)
  return x2, y2

train[['lon','lat']] = train.apply(train_coordination,axis=1,result_type='expand')
new_train = train.drop(['橫坐標','縱坐標'], axis=1)

In [21]:
# 轉換 train 和 facility_data 為 GeoDataFrame
train['geometry'] = [Point(xy) for xy in zip(train['lon'], train['lat'])]
train = gpd.GeoDataFrame(train, geometry='geometry')

# 設定crs座標 TWD97
facility_data = gpd.GeoDataFrame(facility_data, geometry=gpd.points_from_xy(facility_data['經度'], facility_data['緯度']))
facility_data.crs = "EPSG:3826"
new_train.crs = "EPSG:3826"

In [32]:
for i in facility_data['小類別名稱'].unique():
    # 選取特定設施，計算該設施對房子的最小距離
    specific_facility = facility_data[facility_data['小類別名稱']==i]

In [68]:
# 計算附近數量的函數
def min_nearby_facility_distance(point, facility_data, max_distance=500):
    
    full_distance = (facility_data.geometry.distance(point) * 101.77545 * 1000).sort_values().reset_index(drop=True)
    # print(full_distance[0])
    
    return full_distance[0]

for i in facility_data['小類別名稱'].unique():
    
    # 選取特定設施，計算該設施對房子的最小距離
    specific_facility = facility_data[facility_data['小類別名稱']==i]
    train['最近'+i] = train.geometry.apply(min_nearby_facility_distance, facility_data = specific_facility)
    # min_nearby_facility_distance(train.geometry[0], facility_data)

In [75]:
train

Unnamed: 0,ID,縣市,鄉鎮市區,路名,土地面積,使用分區,移轉層次,總樓層數,主要用途,主要建材,...,最近加油站,最近百貨公司,最近金融機構,最近娛樂設施,最近電力資源,最近產業用水及設施,最近工廠,最近商店,最近超級市場,最近大型購物中心
0,TR-1,台北市,大安區,敦化南路二段,-0.256716,,11,11,住家用,鋼筋混凝土造,...,998.473629,443.345269,447.425044,2232.721884,99927.028611,90883.193360,137048.919105,171415.420242,9907.627773,41995.153449
1,TR-2,台北市,萬華區,水源路,0.100134,,7,12,住家用,鋼筋混凝土造,...,448.386374,1825.370372,541.116917,652.635054,97163.858484,91140.483076,136458.703475,169114.219348,5745.826358,44430.815619
2,TR-3,高雄市,鳳山區,北忠街,0.181921,,10,15,集合住宅,其他,...,448.784165,4321.647967,263.236560,1559.977525,5113.742660,1343.918776,2903.279491,155.658006,827.162783,129753.557176
3,TR-4,新北市,新莊區,福前街,0.085594,,9,14,集合住宅,鋼筋混凝土造,...,343.276053,4694.998177,622.803722,2064.189161,98663.458620,95797.250255,140391.552888,171151.625585,7679.115919,50100.306317
4,TR-5,新北市,板橋區,文化路一段,-0.938116,,41,43,住家用,鋼骨造,...,542.163259,1370.237578,276.579824,1171.327466,95876.316870,92156.041482,136828.895462,168159.512681,4247.781629,46911.495176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11746,TR-11747,桃園市,八德區,介壽路二段,0.706189,其他,4,5,住家用,鋼筋混凝土造,...,377.606655,5690.206392,304.761853,1710.102972,80263.605376,85192.933817,128810.504307,153456.518531,16973.628093,31524.539728
11747,TR-11748,新竹市,新竹市,東南街,-0.225809,,2,7,住家用,鋼筋混凝土造,...,855.742477,779.369033,539.334236,701.764388,60371.470557,54175.583089,123565.866291,129064.825771,52933.989221,185.577977
11748,TR-11749,新北市,汐止區,建成路,-0.943908,,15,17,住家用,鋼筋混凝土造,...,684.966692,6929.691306,408.496397,5517.793479,110716.641176,95484.678845,143350.243687,181232.023292,22506.447487,41803.565118
11749,TR-11750,新北市,土城區,學士路,-1.147111,,12,16,住家用,鋼筋混凝土造,...,982.562964,3068.482496,182.365664,2652.481829,91484.577134,88429.041654,132682.299429,163725.439724,497.164625,45039.429924
