In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
import torch

In [None]:
# load public test data
path = 'D:/永豐攻房戰/30_Public Dataset_Public Sumission Template_v2/public_dataset.csv'
public_df = pd.read_csv(path)

scaler_y = FunctionTransformer(np.log1p, np.expm1)

city_group = public_df.groupby('縣市') # 依照縣市分組
taipei_df = city_group.get_group('台北市')
newtaipei_df = city_group.get_group('新北市')
taoyuan_df = city_group.get_group('桃園市')
taichung_df = city_group.get_group('台中市')
tainan_df =  city_group.get_group('台南市')
kaoshung_df = city_group.get_group('高雄市')
main_six = ['台北市', '新北市', '桃園市', '台中市', '台南市', '高雄市']
others_df = pd.concat([city_group.get_group(group) for group in city_group.groups if group not in main_six]) # 取得六都以外的縣市
print('台北市:', len(taipei_df))
print('新北市:', len(newtaipei_df))
print('桃園市:', len(taoyuan_df))
print('台中市:', len(taichung_df))
print('台南市:', len(tainan_df))
print('高雄市:', len(kaoshung_df))
print('其他:', len(others_df))
df_lst = [taipei_df, newtaipei_df, taoyuan_df, taichung_df, tainan_df, kaoshung_df, others_df] # 用來對ID

# 將各組資料取出後 normalize, 並存到dict, 作為各網路 input
input_dict = {}
names = ['taipei', 'newtaipei', 'taoyuan', 'taichung', 'tainan', 'kaoshung', 'others']
for i, df in enumerate(df_lst):
    target = df[['單價']] # 1 target
    target = target.to_numpy()
    numeric_data = df[['土地面積', '移轉層次', '總樓層數', '屋齡', '建物面積', '車位面積', '車位個數', '橫坐標', '縱坐標', '主建物面積', '陽台面積', '附屬建物面積']] # 12 features
    numeric_data = numeric_data.to_numpy()
    scaler_X = StandardScaler()
    scaler_X.fit(numeric_data)
    X_scaled = scaler_X.transform(numeric_data)
    input_dict[names[i]] = X_scaled
    

In [None]:
# load models
from ANN_model import Taipei_features12_NN, NewTaipei_features12_NN, Taoyuan_features12_NN, Taichung_features12_NN, Tainan_features12_NN, Kaoshung_features12_NN, Others_features12_NN

model1 = Taipei_features12_NN()
model2 = NewTaipei_features12_NN()
model3 = Taoyuan_features12_NN()
model4 = Taichung_features12_NN()
model5 = Tainan_features12_NN()
model6 = Kaoshung_features12_NN()
model7 = Others_features12_NN()

model1.load_state_dict(torch.load('Models/taipei_features12_model1.pt'))
model2.load_state_dict(torch.load('Models/newtaipei_features12_model1.pt'))
model3.load_state_dict(torch.load('Models/taoyuan_features12_model1.pt'))
model4.load_state_dict(torch.load('Models/taichung_features12_model1.pt'))
model5.load_state_dict(torch.load('Models/tainan_features12_model1_1.pt'))
model6.load_state_dict(torch.load('Models/kaoshung_features12_model1.pt'))
model7.load_state_dict(torch.load('Models/others_features12_model1.pt'))
model1.eval()
model2.eval()
model3.eval()
model4.eval()
model5.eval()
model6.eval()
model7.eval()

# 各網路輸入對應的 input 預測
pred_df_lst = []
for i, model in enumerate([model1, model2, model3, model4, model5, model6, model7]):
    input = torch.from_numpy(input_dict[names[i]]).type(torch.FloatTensor)
    y_pred = scaler_y.inverse_transform(model(input).detach().numpy()) # 還原 log1p
    y_pred_df = pd.DataFrame({'pred': y_pred})
    pred_df_lst.append(pd.concat((df_lst[i][['ID']], y_pred_df), axis=1, ignore_index=True))
merged_pred_df = pd.concat((pred_df_lst), axis=0, ignore_index=True) # 把所有預測df合併
merged_pred_df 