In [1125]:
import pymysql
import matplotlib
import sklearn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import RandomizedSearchCV    # iter>60 的話有 95%找到距離最佳解 5%的答案
from scipy.stats import randint
from sklearn.linear_model import SGDRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPRegressor
from sklearn.externals import joblib

# import資料跟前處理

In [23]:
conn = pymysql.connect(host='localhost',
                      user='root',
                      password='',
                      db='ab106',
                      charset='utf8',
                      autocommit=True,
                      cursorclass=pymysql.cursors.DictCursor)
c = conn.cursor()

In [74]:
data = pd.read_sql_query("""SELECT * FROM monthly_index join real_taipei_analysis on month = tx_month join quarter_index on tx_quarter = quarter""", conn)

In [75]:
m = 0
for value in data['mrt_count']:
    if value > 0:
        data.loc[ m, 'mrt_count'] = value
    else:
        data.loc[ m, 'mrt_count'] = 0
    m = m+1
    
m = 0
for value in data['school_count']:
    if value > 0:
        data.loc[ m, 'school_count'] = value
    else:
        data.loc[ m, 'school_count'] = 0
    m = m+1

In [76]:
#將樓層在地下的資料去除
m = 0
for value in data['floor']:
    if int(value) < 0:
        data = data.drop(data.index[m])
        m = m-1
    m = m+1

In [77]:
data = data[data.type != '廠辦']
data = data[data.type != '其他']
data = data[data.type != '工廠']
data = data[data.type != '倉庫']
data = data[data.type != '農舍']
data = data[data.type != '辦公商業大樓']
data = data[data.type != '店面']
data = data[data.type != '店面(店鋪)']
data = data[data.type != '透天厝']

In [78]:
data = data.drop(['estate', 'addr', 'non_city_sec', 'non_city_set', 'tx_date', 'land_house_car', 'finish_date', 'seperate_bollean', 'car_type', 'car_mm', 'car_price', 'remark', 'id', 'tx_month', 'tx_quarter', 'month', 'sinyi_new_taipei', 'sinyi_taipei_zone', 'quarter', 'transaction%taiwan', 'transaction%newTaipei', 'priceTrendTaiwan', 'priceTrendNewTaipei', 'priceTrendTaipei', 'priceIncomeRatioTaiwan', 'priceIncomeRatioNewTaipei', 'houseMortgageTaiwan', 'houseMortgageNewTaipei', 'housePriceIndexTaiwan', 'housePriceIndexNewTaipei', 'housePriceIndexTaipei', 'sinyiNewTaipeiIndex', 'sinyiTaiwanIndex', 'lon', 'lat', 'priceIncomeRatioTaipei', 'houseMortgageTaipei', 'monitor', 'sec_type', 'purpose', 'city'], axis=1)

In [79]:
corr_matrix = data.corr()
corr_matrix["total_price"].sort_values(ascending=False)

total_price           1.000000
house_mm              0.764832
car_num               0.653450
land_mm               0.584681
unit_price            0.456993
house_num             0.368768
wc_num                0.309070
room_num              0.257979
rest_num              0.201764
manage_bollean        0.114940
GDPbyPerson           0.006234
mrt_count             0.005316
rent_index           -0.001094
m2                   -0.001146
m1b                  -0.003702
land_num             -0.004294
economicGroth        -0.009284
school_count         -0.009775
rate                 -0.013285
leading_index        -0.031334
cathayTaipeiIndex    -0.038719
price_index          -0.040444
sinyi_taipei         -0.041203
transaction%taipei   -0.050283
sinyiTaipeiIndex     -0.058457
age                  -0.205500
Name: total_price, dtype: float64

In [80]:
data['m1m2ratio'] = data['m1b'] / data['m2']
data = data.drop(['m1b', 'm2'], axis = 1)

In [81]:
data_songshan = data[data.sec == '松山區']
data_sinyi = data[data.sec == '信義區']
data_daan = data[data.sec == '大安區']
data_jhongshan = data[data.sec == '中山區']
data_jhongjheng = data[data.sec == '中正區']
data_datong = data[data.sec == '大同區']
data_wanhua = data[data.sec == '萬華區']
data_wunshan = data[data.sec == '文山區']
data_nangang = data[data.sec == '南港區']
data_neihu = data[data.sec == '內湖區']
data_shihlin = data[data.sec == '士林區']
data_beitou = data[data.sec == '北投區']

In [82]:
def priceOutlier(data):
    Q1_total = np.percentile(data['total_price'], 25)
    Q3_total = np.percentile(data['total_price'], 75)
    IHR_total = Q3_total - Q1_total
    upperLimit_total = Q3_total + (1.5 * IHR_total)
    lowerLimit_total = Q1_total - (1.5 * IHR_total)

    data = data[data.total_price <= upperLimit_total]
    data = data[data.total_price >= lowerLimit_total]
    
    Q1_unit = np.percentile(data['unit_price'], 25)
    Q3_unit = np.percentile(data['unit_price'], 75)
    IHR_unit = Q3_unit - Q1_unit
    upperLimit_unit = Q3_unit + (1.5 * IHR_unit)
    lowerLimit_unit = Q1_unit - (1.5 * IHR_unit)

    data = data[data.unit_price <= upperLimit_unit]
    data = data[data.unit_price >= lowerLimit_unit]
    
    return(data)

In [87]:
data_songshan = data_songshan.drop('sec', axis = 1)
data_sinyi = data_sinyi.drop('sec', axis = 1)
data_daan = data_daan.drop('sec', axis = 1)
data_jhongshan = data_jhongshan.drop('sec', axis = 1)
data_jhongjheng = data_jhongjheng.drop('sec', axis = 1)
data_datong = data_datong.drop('sec', axis = 1)
data_wanhua = data_wanhua.drop('sec', axis = 1)
data_wunshan = data_wunshan.drop('sec', axis = 1)
data_nangang = data_nangang.drop('sec', axis = 1)
data_neihu = data_neihu.drop('sec', axis = 1)
data_shihlin = data_shihlin.drop('sec', axis = 1)
data_beitou = data_beitou.drop('sec', axis = 1)

In [89]:
data_songshan = priceOutlier(data_songshan)
data_sinyi = priceOutlier(data_sinyi)
data_daan = priceOutlier(data_daan)
data_jhongshan = priceOutlier(data_jhongshan)
data_jhongjheng = priceOutlier(data_jhongjheng)
data_datong = priceOutlier(data_datong)
data_wanhua = priceOutlier(data_wanhua)
data_wunshan = priceOutlier(data_wunshan)
data_nangang = priceOutlier(data_nangang)
data_neihu = priceOutlier(data_neihu)
data_shihlin = priceOutlier(data_shihlin)
data_beitou = priceOutlier(data_beitou)

In [102]:
#將set帶入後回傳一個 dummy_data，內含所有dummy variables
def handleDummy(dataFrame):
    encoder = LabelBinarizer()
    
    
    dummyData = pd.DataFrame(encoder.fit_transform(dataFrame['type']), columns = encoder.classes_, index = dataFrame.index)
    
    dummyData3 = dataFrame.loc[:,['manage_bollean','material']]
    
    dummy_data = pd.concat([dummyData, dummyData3], axis=1, join_axes=[dataFrame.index])
    return(dummy_data)

#將set 輸入後會回傳 y (data_label) 跟 data_value
def handleStandardScaler(dataFrame):
    scaler = StandardScaler()
    data_value = dataFrame.drop([ 'type', 'manage_bollean', 'material', 'total_price'], axis = 1)
    for column in data_value.columns:
        data_value[column] = scaler.fit_transform(data_value[column])
    return(data_value)    

#將兩個 handler 回傳的資料結合起來
def dummyValueCombination(data_value, dummy_data):
    total_set = pd.concat([data_value, dummy_data ], axis = 1)
    return(total_set)

#將 y 分出來
def handleTag(dataFrame):
    return(dataFrame['total_price'])

def totalHandler(dataFrame):
    dummy_data = handleDummy(dataFrame)
    data_value = handleStandardScaler(dataFrame)
    totalData = dummyValueCombination(data_value, dummy_data)
    return(totalData)

In [103]:
train_set_songshan, test_set_songshan = train_test_split( data_songshan, test_size=0.2, random_state = 42)
train_set_sinyi, test_set_sinyi = train_test_split( data_sinyi, test_size=0.2, random_state = 42)
train_set_daan, test_set_daan = train_test_split( data_daan, test_size=0.2, random_state = 42)
train_set_jhongshan, test_set_jhongshan = train_test_split( data_jhongshan, test_size=0.2, random_state = 42)
train_set_jhongjheng, test_set_jhongjheng = train_test_split( data_jhongjheng, test_size=0.2, random_state = 42)
train_set_datong, test_set_datong = train_test_split( data_datong, test_size=0.2, random_state = 42)
train_set_wanhua, test_set_wanhua = train_test_split( data_wanhua, test_size=0.2, random_state = 42)
train_set_wunshan, test_set_wunshan = train_test_split( data_wunshan, test_size=0.2, random_state = 42)
train_set_nangang, test_set_nangang = train_test_split( data_nangang, test_size=0.2, random_state = 42)
train_set_neihu, test_set_neihu = train_test_split( data_neihu, test_size=0.2, random_state = 42)
train_set_shihlin, test_set_shihlin = train_test_split( data_shihlin, test_size=0.2, random_state = 42)
train_set_beitou, test_set_beitou = train_test_split( data_beitou, test_size=0.2, random_state = 42)

In [422]:
train_set_songshan_label = handleTag(train_set_songshan)
total_train_set_songshan = totalHandler(train_set_songshan)
test_set_songshan_label = handleTag(test_set_songshan)
total_test_set_songshan = totalHandler(test_set_songshan)

train_set_sinyi_label = handleTag(train_set_sinyi)
total_train_set_sinyi = totalHandler(train_set_sinyi)
test_set_sinyi_label = handleTag(test_set_sinyi)
total_test_set_sinyi = totalHandler(test_set_sinyi)

train_set_daan_label = handleTag(train_set_daan)
total_train_set_daan = totalHandler(train_set_daan)
test_set_daan_label = handleTag(test_set_daan)
total_test_set_daan = totalHandler(test_set_daan)

train_set_jhongshan_label = handleTag(train_set_jhongshan)
total_train_set_jhongshan = totalHandler(train_set_jhongshan)
test_set_jhongshan_label = handleTag(test_set_jhongshan)
total_test_set_jhongshan = totalHandler(test_set_jhongshan)

train_set_jhongjheng_label = handleTag(train_set_jhongjheng)
total_train_set_jhongjheng = totalHandler(train_set_jhongjheng)
test_set_jhongjheng_label = handleTag(test_set_jhongjheng)
total_test_set_jhongjheng = totalHandler(test_set_jhongjheng)

train_set_datong_label = handleTag(train_set_datong)
total_train_set_datong = totalHandler(train_set_datong)
test_set_datong_label = handleTag(test_set_datong)
total_test_set_datong = totalHandler(test_set_datong)

train_set_wanhua_label = handleTag(train_set_wanhua)
total_train_set_wanhua = totalHandler(train_set_wanhua)
test_set_wanhua_label = handleTag(test_set_wanhua)
total_test_set_wanhua = totalHandler(test_set_wanhua)

train_set_wunshan_label = handleTag(train_set_wunshan)
total_train_set_wunshan = totalHandler(train_set_wunshan)
test_set_wunshan_label = handleTag(test_set_wunshan)
total_test_set_wunshan = totalHandler(test_set_wunshan)

train_set_nangang_label = handleTag(train_set_nangang)
total_train_set_nangang = totalHandler(train_set_nangang)
test_set_nangang_label = handleTag(test_set_nangang)
total_test_set_nangang = totalHandler(test_set_nangang)

train_set_neihu_label = handleTag(train_set_neihu)
total_train_set_neihu = totalHandler(train_set_neihu)
test_set_neihu_label = handleTag(test_set_neihu)
total_test_set_neihu = totalHandler(test_set_neihu)

train_set_shihlin_label = handleTag(train_set_shihlin)
total_train_set_shihlin = totalHandler(train_set_shihlin)
test_set_shihlin_label = handleTag(test_set_shihlin)
total_test_set_shihlin = totalHandler(test_set_shihlin)

train_set_beitou_label = handleTag(train_set_beitou)
total_train_set_beitou = totalHandler(train_set_beitou)
test_set_beitou_label = handleTag(test_set_beitou)
total_test_set_beitou = totalHandler(test_set_beitou)



# 開始測試分析-松山

In [107]:
sgd_songshan = SGDRegressor(penalty = 'elasticnet')
sgd_songshan.fit(total_train_set_songshan, train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, sgd_songshan.predict(total_train_set_songshan)))

2245158.9373510485

In [108]:
rfr_songshan = RandomForestRegressor()
rfr_songshan.fit(total_train_set_songshan, train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, rfr_songshan.predict(total_train_set_songshan)))

362632.27737149619

In [121]:
scores = cross_val_score(rfr_songshan, total_train_set_songshan, train_set_songshan_label, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

776197.06158853485

In [113]:
log_train_set_songshan_label = np.log(train_set_songshan_label)

In [115]:
svr_songshan = SVR(kernel = 'rbf')
svr_songshan.fit(total_train_set_songshan, log_train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, np.exp(svr_songshan.predict(total_train_set_songshan))))

1434475.6414464172

In [110]:
gbr_songshan = GradientBoostingRegressor()
gbr_songshan.fit(total_train_set_songshan, train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, gbr_songshan.predict(total_train_set_songshan)))

500631.41243543779

In [111]:
dtr_songshan = DecisionTreeRegressor()
dtr_songshan.fit(total_train_set_songshan, train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, dtr_songshan.predict(total_train_set_songshan)))

0.0

In [150]:
scores = cross_val_score(dtr_songshan, total_train_set_songshan, train_set_songshan_label, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1154452.0595718026

In [112]:
mlpr_songshan = MLPRegressor()
mlpr_songshan.fit(total_train_set_songshan, train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, mlpr_songshan.predict(total_train_set_songshan)))



23416299.905828264

In [None]:
#開始調整模型

In [127]:
# sgd不採用
param_distribs = {
        'n_iter': randint(low=30, high=100),
        'l1_ratio': np.arange(0.1, 1.0 ,0.05),
        'alpha': [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
    }

sgd_search_nopoly_songshan = RandomizedSearchCV(SGDRegressor(penalty='elasticnet', fit_intercept= False, learning_rate = 'optimal'), 
                                param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
sgd_search_nopoly_songshan.fit(total_train_set_songshan, train_set_songshan_label)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=False, l1_ratio=0.15, learning_rate='optimal',
       loss='squared_loss', n_iter=5, penalty='elasticnet', power_t=0.25,
       random_state=None, shuffle=True, verbose=0, warm_start=False),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'n_iter': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000015D59358>, 'l1_ratio': array([ 0.1 ,  0.15,  0.2 ,  0.25,  0.3 ,  0.35,  0.4 ,  0.45,  0.5 ,
        0.55,  0.6 ,  0.65,  0.7 ,  0.75,  0.8 ,  0.85,  0.9 ,  0.95]), 'alpha': [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 1e-05]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [128]:
sgd_search_nopoly_songshan.best_estimator_

SGDRegressor(alpha=0.001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=False, l1_ratio=0.50000000000000011,
       learning_rate='optimal', loss='squared_loss', n_iter=96,
       penalty='elasticnet', power_t=0.25, random_state=None, shuffle=True,
       verbose=0, warm_start=False)

In [133]:
cvres = sgd_search_nopoly_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

1.78421625533e+12 {'alpha': 0.0001, 'l1_ratio': 0.45000000000000007, 'n_iter': 34}
2642020514.36 {'alpha': 0.1, 'l1_ratio': 0.80000000000000016, 'n_iter': 94}
7964912.36892 {'alpha': 1, 'l1_ratio': 0.35000000000000009, 'n_iter': 58}
1.82789044613e+12 {'alpha': 0.0001, 'l1_ratio': 0.70000000000000018, 'n_iter': 35}
21030595.0336 {'alpha': 100, 'l1_ratio': 0.80000000000000016, 'n_iter': 62}
22900348.4219 {'alpha': 100, 'l1_ratio': 0.25000000000000006, 'n_iter': 33}
4011885098.32 {'alpha': 0.1, 'l1_ratio': 0.80000000000000016, 'n_iter': 32}
5117260783.04 {'alpha': 0.1, 'l1_ratio': 0.8500000000000002, 'n_iter': 79}
19192057.636 {'alpha': 10, 'l1_ratio': 0.25000000000000006, 'n_iter': 75}
2053364.09123 {'alpha': 0.001, 'l1_ratio': 0.65000000000000013, 'n_iter': 60}
434473788.467 {'alpha': 0.01, 'l1_ratio': 0.55000000000000016, 'n_iter': 45}
1.45529837747e+13 {'alpha': 1e-05, 'l1_ratio': 0.50000000000000011, 'n_iter': 56}
22544745.3316 {'alpha': 100, 'l1_ratio': 0.55000000000000016, 'n_iter'

In [122]:
#考慮
param_distribs = {
        'n_estimators': randint(low=10, high=150),
        'max_depth': randint(low=1, high=10)
    }

random_forest_reg_nopoly = RandomForestRegressor()
rnd_search_nopoly_songshan = RandomizedSearchCV(random_forest_reg_nopoly, param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
rnd_search_nopoly_songshan.fit(total_train_set_songshan, train_set_songshan_label)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000000137B9240>, 'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000000163E1940>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [123]:
rnd_search_nopoly_songshan.best_estimator_

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=9,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=122, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [130]:
cvres = rnd_search_nopoly_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

6284091.42145 {'max_depth': 1, 'n_estimators': 85}
3137656.50997 {'max_depth': 3, 'n_estimators': 11}
780848.754722 {'max_depth': 8, 'n_estimators': 132}
6306194.62977 {'max_depth': 1, 'n_estimators': 13}
1484233.95413 {'max_depth': 5, 'n_estimators': 66}
1087759.08379 {'max_depth': 6, 'n_estimators': 21}
776973.55073 {'max_depth': 9, 'n_estimators': 37}
1121641.21778 {'max_depth': 6, 'n_estimators': 23}
795299.735673 {'max_depth': 8, 'n_estimators': 37}
772857.076763 {'max_depth': 8, 'n_estimators': 80}
777059.810842 {'max_depth': 8, 'n_estimators': 101}
4283375.15804 {'max_depth': 2, 'n_estimators': 127}
4304605.32489 {'max_depth': 2, 'n_estimators': 108}
774837.42312 {'max_depth': 8, 'n_estimators': 91}
4319102.1934 {'max_depth': 2, 'n_estimators': 57}
737441.303702 {'max_depth': 9, 'n_estimators': 98}
1463730.71704 {'max_depth': 5, 'n_estimators': 125}
4313203.59881 {'max_depth': 2, 'n_estimators': 25}
2182447.55926 {'max_depth': 4, 'n_estimators': 44}
757758.082373 {'max_depth': 9

In [134]:
#考慮
param_distribs = {
        'max_depth': randint(2, 10, 1),
        'learning_rate': np.arange(0.05, 0.65 ,0.05),
        'n_estimators': randint(low=30, high=150)
    }

gbr_search_nopoly_songshan = RandomizedSearchCV(GradientBoostingRegressor(), 
                                param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
gbr_search_nopoly_songshan.fit(total_train_set_songshan, train_set_songshan_label)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_split=1e-07,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=100,
             presort='auto', random_state=None, subsample=1.0, verbose=0,
             warm_start=False),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000011A56B70>, 'learning_rate': array([ 0.05,  0.1 ,  0.15,  0.2 ,  0.25,  0.3 ,  0.35,  0.4 ,  0.45,
        0.5 ,  0.55,  0.6 ]), 'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000015946AC8>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_me

In [135]:
gbr_search_nopoly_songshan.best_estimator_

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.10000000000000001, loss='ls', max_depth=5,
             max_features=None, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=104, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False)

In [139]:
cvres = gbr_search_nopoly_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

859532.512101 {'learning_rate': 0.5, 'max_depth': 5, 'n_estimators': 124}
627259.945912 {'learning_rate': 0.15000000000000002, 'max_depth': 4, 'n_estimators': 103}
934046.62487 {'learning_rate': 0.60000000000000009, 'max_depth': 3, 'n_estimators': 50}
698069.649553 {'learning_rate': 0.35000000000000003, 'max_depth': 7, 'n_estimators': 115}
832683.301373 {'learning_rate': 0.5, 'max_depth': 8, 'n_estimators': 41}
621053.736402 {'learning_rate': 0.10000000000000001, 'max_depth': 4, 'n_estimators': 105}
689869.550777 {'learning_rate': 0.29999999999999999, 'max_depth': 4, 'n_estimators': 106}
657033.65248 {'learning_rate': 0.20000000000000001, 'max_depth': 5, 'n_estimators': 57}
794422.800364 {'learning_rate': 0.15000000000000002, 'max_depth': 9, 'n_estimators': 146}
706351.025425 {'learning_rate': 0.050000000000000003, 'max_depth': 8, 'n_estimators': 121}
674924.642108 {'learning_rate': 0.10000000000000001, 'max_depth': 3, 'n_estimators': 121}
667132.086659 {'learning_rate': 0.100000000000

In [144]:
#考慮
param_distribs = {
        'min_samples_split': randint(10, 100),
        'max_depth': randint(5, 15)
    }

dtr_search_nopoly_songshan = RandomizedSearchCV(DecisionTreeRegressor(), 
                                param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
dtr_search_nopoly_songshan.fit(total_train_set_songshan, train_set_songshan_label)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best'),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000001249C278>, 'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000000112B9C88>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [145]:
dtr_search_nopoly_songshan.best_estimator_

DecisionTreeRegressor(criterion='mse', max_depth=13, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=14,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best')

In [149]:
cvres = dtr_search_nopoly_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

1359296.14013 {'max_depth': 13, 'min_samples_split': 26}
1734914.87556 {'max_depth': 6, 'min_samples_split': 22}
1968453.31574 {'max_depth': 10, 'min_samples_split': 68}
1868825.62838 {'max_depth': 7, 'min_samples_split': 60}
1238500.29739 {'max_depth': 9, 'min_samples_split': 18}
1719726.01102 {'max_depth': 6, 'min_samples_split': 12}
1522107.27941 {'max_depth': 13, 'min_samples_split': 38}
1775135.53916 {'max_depth': 6, 'min_samples_split': 38}
2249550.94382 {'max_depth': 6, 'min_samples_split': 85}
1915764.30164 {'max_depth': 6, 'min_samples_split': 52}
1892957.07617 {'max_depth': 6, 'min_samples_split': 48}
1775510.38632 {'max_depth': 8, 'min_samples_split': 51}
2210876.66682 {'max_depth': 10, 'min_samples_split': 85}
1523449.45229 {'max_depth': 8, 'min_samples_split': 38}
1768025.46123 {'max_depth': 6, 'min_samples_split': 32}
1502995.85088 {'max_depth': 10, 'min_samples_split': 36}
2305405.15922 {'max_depth': 9, 'min_samples_split': 96}
2075678.69708 {'max_depth': 10, 'min_sample

In [151]:
param_distribs = {
    'hidden_layer_sizes':[(50,50), (60,60), (70,70), (80,80), (90,90),(100,100),
                             (60,50), (70,60),(80,70),(90,80), (100,90),(70,50),(80,60),
                             (90,70),(100,80)],
    'alpha':[0.1, 0.01, 0.001, 0.0001, 0.0001, 0.00001, 0.000001]
}

mlpr_search_nopoly_songshan = RandomizedSearchCV(MLPRegressor(max_iter = 300),
                                       param_distributions = param_distribs,
                                       n_iter=60, cv=5, scoring = 'neg_mean_squared_error', n_jobs=-1)
mlpr_search_nopoly_songshan.fit(total_train_set_songshan, train_set_songshan_label)



RandomizedSearchCV(cv=5, error_score='raise',
          estimator=MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
          fit_params={}, iid=True, n_iter=60, n_jobs=-1,
          param_distributions={'hidden_layer_sizes': [(50, 50), (60, 60), (70, 70), (80, 80), (90, 90), (100, 100), (60, 50), (70, 60), (80, 70), (90, 80), (100, 90), (70, 50), (80, 60), (90, 70), (100, 80)], 'alpha': [0.1, 0.01, 0.001, 0.0001, 0.0001, 1e-05, 1e-06]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [152]:
mlpr_search_nopoly_songshan.best_estimator_

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [407]:
cvres = mlpr_search_nopoly_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

20292173.94 {'hidden_layer_sizes': (70, 60), 'alpha': 0.1}
20492493.5209 {'hidden_layer_sizes': (80, 60), 'alpha': 1e-06}
17052436.5425 {'hidden_layer_sizes': (100, 100), 'alpha': 1e-06}
17596745.2762 {'hidden_layer_sizes': (100, 100), 'alpha': 0.0001}
17413075.3362 {'hidden_layer_sizes': (100, 90), 'alpha': 0.0001}
18269805.5104 {'hidden_layer_sizes': (90, 90), 'alpha': 0.0001}
20394783.5699 {'hidden_layer_sizes': (70, 60), 'alpha': 0.0001}
21246600.4471 {'hidden_layer_sizes': (60, 50), 'alpha': 0.001}
19251137.3664 {'hidden_layer_sizes': (90, 70), 'alpha': 0.001}
21345792.1294 {'hidden_layer_sizes': (60, 50), 'alpha': 1e-06}
20363367.716 {'hidden_layer_sizes': (70, 60), 'alpha': 0.0001}
20169455.6292 {'hidden_layer_sizes': (80, 60), 'alpha': 0.001}
20980634.8843 {'hidden_layer_sizes': (60, 60), 'alpha': 0.1}
21702271.8912 {'hidden_layer_sizes': (50, 50), 'alpha': 0.001}
18953889.7494 {'hidden_layer_sizes': (80, 80), 'alpha': 0.01}
21207523.4251 {'hidden_layer_sizes': (60, 50), 'alpha

In [None]:
#選出最後要的預測模型

In [190]:
rfr_final_songshan = RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=5,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=73, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)
rfr_final_songshan.fit(total_train_set_songshan, train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, rfr_final_songshan.predict(total_train_set_songshan)))

1303330.3414162607

In [191]:
scores = cross_val_score(rfr_final_songshan, total_train_set_songshan, train_set_songshan_label, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1465430.349243111

In [372]:
gbr_final_songshan = GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3,
             max_features=None, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=40, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False)
gbr_final_songshan.fit(total_train_set_songshan, train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, gbr_final_songshan.predict(total_train_set_songshan)))

976768.97076529614

In [373]:
scores = cross_val_score(gbr_final_songshan, total_train_set_songshan, train_set_songshan_label, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1130794.1655203861

In [395]:
ada_reg_dtr_final_songshan = AdaBoostRegressor(
          DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=70,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best'), n_estimators = 96,
          learning_rate = 0.5
)
ada_reg_dtr_final_songshan.fit(total_train_set_songshan, train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, ada_reg_dtr_final_songshan.predict(total_train_set_songshan)))

956453.61967436632

In [396]:
scores = cross_val_score(ada_reg_dtr_final_songshan, total_train_set_songshan, train_set_songshan_label, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1154637.0324855095

In [414]:
mlpr_final_songshan =MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.01, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
mlpr_final_songshan.fit(total_train_set_songshan, train_set_songshan_label)
np.sqrt(mean_squared_error(train_set_songshan_label, mlpr_final_songshan.predict(total_train_set_songshan)))

1480562.5340213391

In [415]:
scores = cross_val_score(mlpr_final_songshan, total_train_set_songshan, train_set_songshan_label, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1588303.9563699691

# 進入測試

In [423]:
np.sqrt(mean_squared_error(test_set_songshan_label, rfr_final_songshan.predict(total_test_set_songshan)))

1379403.534811297

In [424]:
# 就是你了皮卡丘！
np.sqrt(mean_squared_error(test_set_songshan_label, gbr_final_songshan.predict(total_test_set_songshan)))

1121455.6485260352

In [425]:
np.sqrt(mean_squared_error(test_set_songshan_label, ada_reg_dtr_final_songshan.predict(total_test_set_songshan)))

1135076.3876894168

In [426]:
np.sqrt(mean_squared_error(test_set_songshan_label, mlpr_final_songshan.predict(total_test_set_songshan)))

1460382.487717435

# 松山 with stacking

In [430]:
# 如果要作stacking的話
train_set_songshan, test_set_songshan = train_test_split( data_songshan, test_size=0.2, random_state = 42)
train_set_songshan1, train_set_songshan2 = train_test_split( train_set_songshan, test_size=0.5, random_state = 42)

train_set_sinyi, test_set_sinyi = train_test_split( data_sinyi, test_size=0.2, random_state = 42)
train_set_sinyi1, train_set_sinyi2 = train_test_split( train_set_sinyi, test_size=0.5, random_state = 42)

train_set_daan, test_set_daan = train_test_split( data_daan, test_size=0.2, random_state = 42)
train_set_daan1, train_set_daan2 = train_test_split( train_set_daan, test_size=0.5, random_state = 42)

train_set_jhongshan, test_set_jhongshan = train_test_split( data_jhongshan, test_size=0.2, random_state = 42)
train_set_jhongshan1, train_set_jhongshan2 = train_test_split( train_set_jhongshan, test_size=0.5, random_state = 42)

train_set_jhongjheng, test_set_jhongjheng = train_test_split( data_jhongjheng, test_size=0.2, random_state = 42)
train_set_jhongjheng1, train_set_jhongjheng2 = train_test_split( train_set_jhongjheng, test_size=0.5, random_state = 42)

train_set_datong, test_set_datong = train_test_split( data_datong, test_size=0.2, random_state = 42)
train_set_datong1, train_set_datong2 = train_test_split( train_set_datong, test_size=0.5, random_state = 42)

train_set_wanhua, test_set_wanhua = train_test_split( data_wanhua, test_size=0.2, random_state = 42)
train_set_wanhua1, train_set_wanhua2 = train_test_split( train_set_wanhua, test_size=0.5, random_state = 42)

train_set_wunshan, test_set_wunshan = train_test_split( data_wunshan, test_size=0.2, random_state = 42)
train_set_wunshan1, train_set_wunshan2 = train_test_split( train_set_wunshan, test_size=0.5, random_state = 42)

train_set_nangang, test_set_nangang = train_test_split( data_nangang, test_size=0.2, random_state = 42)
train_set_nangang1, train_set_nangang2 = train_test_split( train_set_nangang, test_size=0.5, random_state = 42)

train_set_neihu, test_set_neihu = train_test_split( data_neihu, test_size=0.2, random_state = 42)
train_set_neihu1, train_set_neihu2 = train_test_split( train_set_neihu, test_size=0.5, random_state = 42)

train_set_shihlin, test_set_shihlin = train_test_split( data_shihlin, test_size=0.2, random_state = 42)
train_set_shihlin1, train_set_shihlin2 = train_test_split( train_set_shihlin, test_size=0.5, random_state = 42)

train_set_beitou, test_set_beitou = train_test_split( data_beitou, test_size=0.2, random_state = 42)
train_set_beitou1, train_set_beitou2 = train_test_split( train_set_beitou, test_size=0.5, random_state = 42)

In [849]:
train_set_songshan_label1 = handleTag(train_set_songshan1)
total_train_set_songshan1 = totalHandler(train_set_songshan1)
train_set_songshan_label2 = handleTag(train_set_songshan2)
total_train_set_songshan2 = totalHandler(train_set_songshan2)
test_set_songshan_label = handleTag(test_set_songshan)
total_test_set_songshan = totalHandler(test_set_songshan)



In [436]:
# out
param_distribs = {
        'n_iter': randint(low=30, high=100),
        'l1_ratio': np.arange(0, 1.05 ,0.05),
        'alpha': [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
    }

sgd_search_layer1_songshan = RandomizedSearchCV(SGDRegressor(penalty='elasticnet', fit_intercept= False, learning_rate = 'optimal'), 
                                param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
sgd_search_layer1_songshan.fit(total_train_set_songshan1, train_set_songshan_label1)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=False, l1_ratio=0.15, learning_rate='optimal',
       loss='squared_loss', n_iter=5, penalty='elasticnet', power_t=0.25,
       random_state=None, shuffle=True, verbose=0, warm_start=False),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'n_iter': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000000DB6EDD8>, 'l1_ratio': array([ 0.  ,  0.05,  0.1 ,  0.15,  0.2 ,  0.25,  0.3 ,  0.35,  0.4 ,
        0.45,  0.5 ,  0.55,  0.6 ,  0.65,  0.7 ,  0.75,  0.8 ,  0.85,
        0.9 ,  0.95,  1.  ]), 'alpha': [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 1e-05]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [437]:
sgd_search_layer1_songshan.best_estimator_

SGDRegressor(alpha=0.001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=False, l1_ratio=0.5, learning_rate='optimal',
       loss='squared_loss', n_iter=85, penalty='elasticnet', power_t=0.25,
       random_state=None, shuffle=True, verbose=0, warm_start=False)

In [438]:
cvres = sgd_search_layer1_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

23309068.4776 {'alpha': 100, 'l1_ratio': 0.10000000000000001, 'n_iter': 31}
3.33358209779e+12 {'alpha': 0.0001, 'l1_ratio': 0.20000000000000001, 'n_iter': 44}
16269470.395 {'alpha': 10, 'l1_ratio': 0.65000000000000002, 'n_iter': 82}
15499067.5832 {'alpha': 10, 'l1_ratio': 0.70000000000000007, 'n_iter': 89}
72827600.0804 {'alpha': 1, 'l1_ratio': 0.95000000000000007, 'n_iter': 87}
2.31969232966e+12 {'alpha': 0.0001, 'l1_ratio': 0.25, 'n_iter': 61}
4.20447423642e+13 {'alpha': 1e-05, 'l1_ratio': 0.25, 'n_iter': 33}
23283477.451 {'alpha': 100, 'l1_ratio': 0.15000000000000002, 'n_iter': 98}
9816728.67867 {'alpha': 1, 'l1_ratio': 0.40000000000000002, 'n_iter': 41}
5049828.45595 {'alpha': 0.001, 'l1_ratio': 0.90000000000000002, 'n_iter': 62}
82951945.1332 {'alpha': 0.1, 'l1_ratio': 0.30000000000000004, 'n_iter': 87}
1453336715.87 {'alpha': 0.1, 'l1_ratio': 0.65000000000000002, 'n_iter': 32}
823972289.697 {'alpha': 0.1, 'l1_ratio': 0.65000000000000002, 'n_iter': 98}
2.04185918503e+12 {'alpha': 

In [439]:
#考慮
param_distribs = {
        'n_estimators': randint(low=10, high=150),
        'max_depth': randint(low=1, high=10)
    }

random_forest_reg_nopoly = RandomForestRegressor()
rnd_search_layer1_songshan = RandomizedSearchCV(random_forest_reg_nopoly, param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
rnd_search_layer1_songshan.fit(total_train_set_songshan1, train_set_songshan_label1)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000000E4D04A8>, 'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000013B97B00>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [440]:
rnd_search_layer1_songshan.best_estimator_

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=9,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=113, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [455]:
cvres = rnd_search_layer1_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

1023238.20637 {'max_depth': 9, 'n_estimators': 113}
1025226.93515 {'max_depth': 9, 'n_estimators': 66}
6176454.87765 {'max_depth': 1, 'n_estimators': 61}
2193177.31341 {'max_depth': 4, 'n_estimators': 128}
1029814.16004 {'max_depth': 9, 'n_estimators': 133}
1094138.02062 {'max_depth': 7, 'n_estimators': 109}
1092755.61271 {'max_depth': 8, 'n_estimators': 17}
1026629.48302 {'max_depth': 9, 'n_estimators': 142}
1556971.90122 {'max_depth': 5, 'n_estimators': 96}
1038899.90198 {'max_depth': 9, 'n_estimators': 98}
3066878.70468 {'max_depth': 3, 'n_estimators': 74}
1088056.57631 {'max_depth': 7, 'n_estimators': 141}
1076431.32066 {'max_depth': 8, 'n_estimators': 69}
1256152.10044 {'max_depth': 6, 'n_estimators': 22}
3082787.96653 {'max_depth': 3, 'n_estimators': 102}
1558626.89103 {'max_depth': 5, 'n_estimators': 44}
1590598.32508 {'max_depth': 5, 'n_estimators': 53}
1065497.1482 {'max_depth': 8, 'n_estimators': 27}
1601927.25219 {'max_depth': 5, 'n_estimators': 35}
2209975.38484 {'max_depth

In [442]:
param_distribs = {
        'min_samples_split': randint(10, 100),
        'max_depth': randint(5, 15)
    }

dtr_search_layer1_songshan = RandomizedSearchCV(DecisionTreeRegressor(), 
                                param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
dtr_search_layer1_songshan.fit(total_train_set_songshan1, train_set_songshan_label1)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best'),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000011428048>, 'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000000CF3C048>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [443]:
dtr_search_layer1_songshan.best_estimator_

DecisionTreeRegressor(criterion='mse', max_depth=13, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=17,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best')

In [458]:
cvres = dtr_search_layer1_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

2229412.79986 {'max_depth': 14, 'min_samples_split': 53}
2229412.79986 {'max_depth': 8, 'min_samples_split': 53}
2028895.73455 {'max_depth': 14, 'min_samples_split': 38}
2179240.16435 {'max_depth': 10, 'min_samples_split': 48}
2670848.14701 {'max_depth': 5, 'min_samples_split': 79}
2399236.44616 {'max_depth': 12, 'min_samples_split': 64}
2475641.76674 {'max_depth': 14, 'min_samples_split': 68}
1634450.19451 {'max_depth': 13, 'min_samples_split': 17}
2473661.431 {'max_depth': 13, 'min_samples_split': 66}
2728360.25667 {'max_depth': 8, 'min_samples_split': 86}
2001911.28008 {'max_depth': 9, 'min_samples_split': 31}
1740045.16307 {'max_depth': 10, 'min_samples_split': 19}
2844143.03051 {'max_depth': 13, 'min_samples_split': 92}
2063185.14126 {'max_depth': 9, 'min_samples_split': 39}
2069047.82508 {'max_depth': 7, 'min_samples_split': 39}
1680939.53078 {'max_depth': 14, 'min_samples_split': 10}
2356050.19681 {'max_depth': 5, 'min_samples_split': 27}
2277531.0207 {'max_depth': 11, 'min_samp

In [445]:
param_distribs = {
        'max_depth': randint(2, 10, 1),
        'learning_rate': np.arange(0.05, 0.65 ,0.05),
        'n_estimators': randint(low=30, high=150)
    }

gbr_search_layer1_songshan = RandomizedSearchCV(GradientBoostingRegressor(), 
                                param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
gbr_search_layer1_songshan.fit(total_train_set_songshan1, train_set_songshan_label1)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_split=1e-07,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=100,
             presort='auto', random_state=None, subsample=1.0, verbose=0,
             warm_start=False),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000000166B3F60>, 'learning_rate': array([ 0.05,  0.1 ,  0.15,  0.2 ,  0.25,  0.3 ,  0.35,  0.4 ,  0.45,
        0.5 ,  0.55,  0.6 ]), 'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000000CB60320>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_me

In [446]:
gbr_search_layer1_songshan.best_estimator_

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.10000000000000001, loss='ls', max_depth=5,
             max_features=None, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=124, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False)

In [460]:
cvres = gbr_search_layer1_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

{'learning_rate': 0.45000000000000001, 'max_depth': 3, 'n_estimators': 100}
{'learning_rate': 0.050000000000000003, 'max_depth': 10, 'n_estimators': 81}
{'learning_rate': 0.10000000000000001, 'max_depth': 9, 'n_estimators': 63}
{'learning_rate': 0.050000000000000003, 'max_depth': 8, 'n_estimators': 99}
{'learning_rate': 0.29999999999999999, 'max_depth': 8, 'n_estimators': 93}
{'learning_rate': 0.35000000000000003, 'max_depth': 8, 'n_estimators': 54}
{'learning_rate': 0.55000000000000004, 'max_depth': 9, 'n_estimators': 85}
{'learning_rate': 0.15000000000000002, 'max_depth': 9, 'n_estimators': 36}
{'learning_rate': 0.15000000000000002, 'max_depth': 8, 'n_estimators': 136}
{'learning_rate': 0.29999999999999999, 'max_depth': 10, 'n_estimators': 133}
{'learning_rate': 0.40000000000000002, 'max_depth': 3, 'n_estimators': 88}
{'learning_rate': 0.10000000000000001, 'max_depth': 10, 'n_estimators': 145}
{'learning_rate': 0.60000000000000009, 'max_depth': 3, 'n_estimators': 56}
{'learning_rate'

In [449]:
param_distribs = {
    'hidden_layer_sizes':[(50,50), (60,60), (70,70), (80,80), (90,90),(100,100),
                             (60,50), (70,60),(80,70),(90,80), (100,90),(70,50),(80,60),
                             (90,70),(100,80)],
    'alpha':[0.1, 0.01, 0.001, 0.0001, 0.0001, 0.00001, 0.000001]
}

mlpr_search_layer1_songshan = RandomizedSearchCV(MLPRegressor(max_iter = 300, learning_rate_init= 0.01),
                                       param_distributions = param_distribs,
                                       n_iter=60, cv=5, scoring = 'neg_mean_squared_error', n_jobs=-1)
mlpr_search_layer1_songshan.fit(total_train_set_songshan1, train_set_songshan_label1)



RandomizedSearchCV(cv=5, error_score='raise',
          estimator=MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.01, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
          fit_params={}, iid=True, n_iter=60, n_jobs=-1,
          param_distributions={'hidden_layer_sizes': [(50, 50), (60, 60), (70, 70), (80, 80), (90, 90), (100, 100), (60, 50), (70, 60), (80, 70), (90, 80), (100, 90), (70, 50), (80, 60), (90, 70), (100, 80)], 'alpha': [0.1, 0.01, 0.001, 0.0001, 0.0001, 1e-05, 1e-06]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [451]:
mlpr_search_layer1_songshan.best_estimator_

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.01, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [463]:
cvres = mlpr_search_layer1_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

3376357.29328 {'hidden_layer_sizes': (80, 70), 'alpha': 1e-05}
3043424.54939 {'hidden_layer_sizes': (100, 100), 'alpha': 0.001}
3709184.66364 {'hidden_layer_sizes': (60, 60), 'alpha': 1e-06}
3922600.71874 {'hidden_layer_sizes': (50, 50), 'alpha': 0.1}
3126298.95843 {'hidden_layer_sizes': (100, 80), 'alpha': 0.01}
3584981.44811 {'hidden_layer_sizes': (70, 60), 'alpha': 0.1}
3845389.22733 {'hidden_layer_sizes': (60, 50), 'alpha': 1e-05}
3898845.27585 {'hidden_layer_sizes': (60, 50), 'alpha': 0.01}
3464903.70249 {'hidden_layer_sizes': (70, 70), 'alpha': 0.001}
3391036.34754 {'hidden_layer_sizes': (70, 70), 'alpha': 0.0001}
3409119.13703 {'hidden_layer_sizes': (70, 70), 'alpha': 1e-06}
3420719.93554 {'hidden_layer_sizes': (80, 60), 'alpha': 1e-05}
4001797.04552 {'hidden_layer_sizes': (50, 50), 'alpha': 1e-06}
3733692.81892 {'hidden_layer_sizes': (70, 50), 'alpha': 0.0001}
3108299.43882 {'hidden_layer_sizes': (90, 90), 'alpha': 1e-06}
3329132.52288 {'hidden_layer_sizes': (80, 70), 'alpha': 

# layer1 predictor

In [847]:
rfr_layer1_songshan = RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=5,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=60, n_jobs=-1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)
rfr_layer1_songshan.fit(total_train_set_songshan1, train_set_songshan_label1)
np.sqrt(mean_squared_error(train_set_songshan_label1, rfr_layer1_songshan.predict(total_train_set_songshan1)))

1299394.4202457245

In [848]:
scores = cross_val_score(rfr_layer1_songshan, total_train_set_songshan1, train_set_songshan_label1, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1533177.1545099304

In [688]:
gbr_layer1_songshan = GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.09, loss='ls', max_depth=2,
             max_features=None, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=70, min_weight_fraction_leaf=0.0,
             n_estimators=95, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False)
gbr_layer1_songshan.fit(total_train_set_songshan1, train_set_songshan_label1)
np.sqrt(mean_squared_error(train_set_songshan_label1, gbr_layer1_songshan.predict(total_train_set_songshan1)))

939919.12547957106

In [689]:
scores = cross_val_score(gbr_layer1_songshan, total_train_set_songshan1, train_set_songshan_label1, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1207674.2301700893

In [757]:
ada_reg_dtr_layer1_songshan = AdaBoostRegressor(
          DecisionTreeRegressor(criterion='mse', max_depth=4, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=20,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best'), n_estimators = 40,
          learning_rate = 0.5
)
ada_reg_dtr_layer1_songshan.fit(total_train_set_songshan1, train_set_songshan_label1)
np.sqrt(mean_squared_error(train_set_songshan_label1, ada_reg_dtr_layer1_songshan.predict(total_train_set_songshan1)))

1401502.7766832237

In [758]:
scores = cross_val_score(ada_reg_dtr_layer1_songshan, total_train_set_songshan1, train_set_songshan_label1, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1639132.5952045871

In [821]:
mlpr_layer1_songshan =MLPRegressor(activation='relu', alpha=1, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(50, 50), learning_rate='constant',
       learning_rate_init=0.05, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
mlpr_layer1_songshan.fit(total_train_set_songshan1, train_set_songshan_label1)
np.sqrt(mean_squared_error(train_set_songshan_label1, mlpr_layer1_songshan.predict(total_train_set_songshan1)))

1471934.8588423505

In [822]:
scores = cross_val_score(mlpr_layer1_songshan, total_train_set_songshan1, train_set_songshan_label1, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1710808.4512724995

# 用rfr_layer1_songshan, gbr_layer1_songshan, ada_reg_dtr_layer1_songshan, mlpr_layer1_songshan 來組合成第二層的 X

In [850]:
y_predicted_rfr_layer1_songshan = rfr_layer1_songshan.predict(total_train_set_songshan2)

y_predicted_gbr_layer1_songshan = gbr_layer1_songshan.predict(total_train_set_songshan2)

y_predicted_ada_reg_dtr_layer1_songshan = ada_reg_dtr_layer1_songshan.predict(total_train_set_songshan2)

y_predicted_mlpr_layer1_songshan = mlpr_layer1_songshan.predict(total_train_set_songshan2)

In [851]:
final_train_set2_songshan = pd.DataFrame({
    'rfr_layer1_songshan': y_predicted_rfr_layer1_songshan,
    'gbr_layer1_songshan': y_predicted_gbr_layer1_songshan,
    'ada_reg_dtr_layer1_songshan': y_predicted_ada_reg_dtr_layer1_songshan,
    'mlpr_layer1_songshan': y_predicted_mlpr_layer1_songshan,
})
final_train_set2_songshan

Unnamed: 0,ada_reg_dtr_layer1_songshan,gbr_layer1_songshan,mlpr_layer1_songshan,rfr_layer1_songshan
0,1.962246e+07,1.896829e+07,1.909281e+07,1.887133e+07
1,2.752119e+07,2.670543e+07,2.403873e+07,2.751019e+07
2,1.681258e+07,1.651548e+07,1.714057e+07,1.768405e+07
3,1.716026e+07,1.599174e+07,1.383091e+07,1.798176e+07
4,2.353474e+07,2.197233e+07,2.295866e+07,2.146735e+07
5,2.035253e+07,2.079099e+07,2.319435e+07,2.021161e+07
6,2.627581e+07,2.370971e+07,2.441849e+07,2.298450e+07
7,1.406260e+07,1.420005e+07,1.558874e+07,1.474636e+07
8,1.304780e+07,1.312996e+07,1.141711e+07,1.186318e+07
9,1.551190e+07,1.681000e+07,1.728039e+07,1.531657e+07


In [853]:
final_train_set2_songshan_nopoly = final_train_set2_songshan.copy()

scaler = StandardScaler()

for column in final_train_set2_songshan_nopoly.columns:
    final_train_set2_songshan_nopoly[column] = scaler.fit_transform(final_train_set2_songshan_nopoly[column])
    
final_train_set2_songshan_nopoly.head()



Unnamed: 0,ada_reg_dtr_layer1_songshan,gbr_layer1_songshan,mlpr_layer1_songshan,rfr_layer1_songshan
0,-0.209393,-0.249353,-0.225918,-0.252133
1,0.595457,0.537197,0.261232,0.630273
2,-0.49571,-0.498702,-0.418204,-0.373407
3,-0.460283,-0.551945,-0.744189,-0.342997
4,0.189253,0.056035,0.15485,0.013035


# blender測試

In [857]:
param_distribs = {
        'n_estimators': randint(low=10, high=150),
        'max_depth': randint(low=1, high=10)
    }

random_forest_reg_nopoly = RandomForestRegressor()
rnd_search_blender_songshan = RandomizedSearchCV(random_forest_reg_nopoly, param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
rnd_search_blender_songshan.fit(final_train_set2_songshan_nopoly, train_set_songshan_label2)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000000D1B4E48>, 'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000000129CC278>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [858]:
rnd_search_blender_songshan.best_estimator_

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=9,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=126, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [872]:
cvres = rnd_search_blender_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

2613513.98043 {'max_depth': 2, 'n_estimators': 132}
876981.970531 {'max_depth': 6, 'n_estimators': 126}
854215.469157 {'max_depth': 9, 'n_estimators': 98}
853149.541897 {'max_depth': 9, 'n_estimators': 94}
2630615.57476 {'max_depth': 2, 'n_estimators': 86}
921148.809232 {'max_depth': 5, 'n_estimators': 71}
894778.332279 {'max_depth': 7, 'n_estimators': 10}
2693746.46066 {'max_depth': 2, 'n_estimators': 11}
1431792.07452 {'max_depth': 3, 'n_estimators': 32}
857278.534182 {'max_depth': 9, 'n_estimators': 114}
865786.49008 {'max_depth': 6, 'n_estimators': 51}
2634621.81494 {'max_depth': 2, 'n_estimators': 145}
862964.95225 {'max_depth': 7, 'n_estimators': 108}
878273.170633 {'max_depth': 6, 'n_estimators': 144}
858535.746564 {'max_depth': 9, 'n_estimators': 37}
860642.084273 {'max_depth': 9, 'n_estimators': 141}
880652.424046 {'max_depth': 6, 'n_estimators': 46}
1020878.85692 {'max_depth': 4, 'n_estimators': 50}
1428376.57239 {'max_depth': 3, 'n_estimators': 88}
915852.156596 {'max_depth'

In [860]:
param_distribs = {
        'min_samples_split': randint(10, 100),
        'max_depth': randint(5, 15)
    }

dtr_search_blender_songshan = RandomizedSearchCV(DecisionTreeRegressor(), 
                                param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
dtr_search_blender_songshan.fit(final_train_set2_songshan_nopoly, train_set_songshan_label2)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best'),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000000B291320>, 'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000000000B291C18>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

In [861]:
dtr_search_blender_songshan.best_estimator_

DecisionTreeRegressor(criterion='mse', max_depth=8, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=14,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best')

In [874]:
cvres = dtr_search_blender_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

{'max_depth': 14, 'min_samples_split': 17}
{'max_depth': 10, 'min_samples_split': 98}
{'max_depth': 11, 'min_samples_split': 54}
{'max_depth': 11, 'min_samples_split': 98}
{'max_depth': 11, 'min_samples_split': 73}
{'max_depth': 8, 'min_samples_split': 19}
{'max_depth': 6, 'min_samples_split': 68}
{'max_depth': 11, 'min_samples_split': 10}
{'max_depth': 11, 'min_samples_split': 56}
{'max_depth': 12, 'min_samples_split': 58}
{'max_depth': 10, 'min_samples_split': 19}
{'max_depth': 7, 'min_samples_split': 22}
{'max_depth': 11, 'min_samples_split': 49}
{'max_depth': 5, 'min_samples_split': 13}
{'max_depth': 9, 'min_samples_split': 66}
{'max_depth': 12, 'min_samples_split': 48}
{'max_depth': 6, 'min_samples_split': 16}
{'max_depth': 7, 'min_samples_split': 73}
{'max_depth': 13, 'min_samples_split': 19}
{'max_depth': 12, 'min_samples_split': 15}
{'max_depth': 9, 'min_samples_split': 52}
{'max_depth': 6, 'min_samples_split': 68}
{'max_depth': 11, 'min_samples_split': 38}
{'max_depth': 13, 'm

In [863]:
param_distribs = {
        'max_depth': randint(2, 10, 1),
        'learning_rate': np.arange(0.05, 0.65 ,0.05),
        'n_estimators': randint(low=30, high=150)
    }

gbr_blender_blender_songshan = RandomizedSearchCV(GradientBoostingRegressor(), 
                                param_distributions=param_distribs,
                                n_iter=100, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
gbr_blender_blender_songshan.fit(final_train_set2_songshan_nopoly, train_set_songshan_label2)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_split=1e-07,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=100,
             presort='auto', random_state=None, subsample=1.0, verbose=0,
             warm_start=False),
          fit_params={}, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000011D04A58>, 'learning_rate': array([ 0.05,  0.1 ,  0.15,  0.2 ,  0.25,  0.3 ,  0.35,  0.4 ,  0.45,
        0.5 ,  0.55,  0.6 ]), 'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000013946BE0>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_me

In [864]:
gbr_blender_blender_songshan.best_estimator_

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.45000000000000001, loss='ls', max_depth=8,
             max_features=None, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=106, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False)

In [876]:
cvres = gbr_blender_blender_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

{'learning_rate': 0.25, 'max_depth': 5, 'n_estimators': 142}
{'learning_rate': 0.10000000000000001, 'max_depth': 3, 'n_estimators': 38}
{'learning_rate': 0.35000000000000003, 'max_depth': 6, 'n_estimators': 101}
{'learning_rate': 0.45000000000000001, 'max_depth': 9, 'n_estimators': 65}
{'learning_rate': 0.050000000000000003, 'max_depth': 7, 'n_estimators': 43}
{'learning_rate': 0.35000000000000003, 'max_depth': 3, 'n_estimators': 61}
{'learning_rate': 0.55000000000000004, 'max_depth': 4, 'n_estimators': 130}
{'learning_rate': 0.55000000000000004, 'max_depth': 9, 'n_estimators': 89}
{'learning_rate': 0.29999999999999999, 'max_depth': 5, 'n_estimators': 91}
{'learning_rate': 0.55000000000000004, 'max_depth': 5, 'n_estimators': 71}
{'learning_rate': 0.050000000000000003, 'max_depth': 10, 'n_estimators': 88}
{'learning_rate': 0.050000000000000003, 'max_depth': 7, 'n_estimators': 110}
{'learning_rate': 0.35000000000000003, 'max_depth': 5, 'n_estimators': 38}
{'learning_rate': 0.200000000000

In [867]:
param_distribs = {
    'hidden_layer_sizes':[(50,50), (60,60), (70,70), (80,80), (90,90),(100,100),
                             (60,50), (70,60),(80,70),(90,80), (100,90),(70,50),(80,60),
                             (90,70),(100,80)],
    'alpha':[0.1, 0.01, 0.001, 0.0001, 0.0001, 0.00001, 0.000001],
    'learning_rate_init':[0.1, 0.05, 0.01,  0.005, 0.001, 0.0005, 0.0001]
}

mlpr_search_blender_songshan = RandomizedSearchCV(MLPRegressor(max_iter = 300),
                                       param_distributions = param_distribs,
                                       n_iter=60, cv=5, scoring = 'neg_mean_squared_error', n_jobs=-1)
mlpr_search_blender_songshan.fit(final_train_set2_songshan_nopoly, train_set_songshan_label2)

RandomizedSearchCV(cv=5, error_score='raise',
          estimator=MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
          fit_params={}, iid=True, n_iter=60, n_jobs=-1,
          param_distributions={'hidden_layer_sizes': [(50, 50), (60, 60), (70, 70), (80, 80), (90, 90), (100, 100), (60, 50), (70, 60), (80, 70), (90, 80), (100, 90), (70, 50), (80, 60), (90, 70), (100, 80)], 'alpha': [0.1, 0.01, 0.001, 0.0001, 0.0001, 1e-05, 1e-06], 'learning_rate_init': [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, sc

In [868]:
mlpr_search_blender_songshan.best_estimator_

MLPRegressor(activation='relu', alpha=1e-06, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(90, 70), learning_rate='constant',
       learning_rate_init=0.05, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [880]:
cvres = mlpr_search_blender_songshan.cv_results_
for mean_score, params in zip(cvres['mean_test_score'], cvres['params']):
    print(np.sqrt(-mean_score), params)

988841.830041 {'learning_rate_init': 0.05, 'hidden_layer_sizes': (80, 80), 'alpha': 1e-05}
990319.984876 {'learning_rate_init': 0.1, 'hidden_layer_sizes': (90, 90), 'alpha': 1e-06}
10571528.6058 {'learning_rate_init': 0.005, 'hidden_layer_sizes': (70, 70), 'alpha': 1e-06}
22867622.7158 {'learning_rate_init': 0.001, 'hidden_layer_sizes': (80, 70), 'alpha': 1e-06}
22747127.2289 {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100, 90), 'alpha': 0.0001}
993971.718947 {'learning_rate_init': 0.05, 'hidden_layer_sizes': (100, 90), 'alpha': 0.0001}
986440.892023 {'learning_rate_init': 0.1, 'hidden_layer_sizes': (100, 80), 'alpha': 0.1}
987458.61777 {'learning_rate_init': 0.05, 'hidden_layer_sizes': (100, 80), 'alpha': 1e-05}
22859112.9698 {'learning_rate_init': 0.001, 'hidden_layer_sizes': (90, 70), 'alpha': 1e-05}
22977393.0868 {'learning_rate_init': 0.001, 'hidden_layer_sizes': (50, 50), 'alpha': 1e-06}
3608809.41557 {'learning_rate_init': 0.01, 'hidden_layer_sizes': (90, 80), 'alpha': 

In [None]:
# 開始找 blender

In [918]:
rfr_blender_songshan = RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=4,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=83, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)
rfr_blender_songshan.fit(final_train_set2_songshan_nopoly, train_set_songshan_label2)
np.sqrt(mean_squared_error(train_set_songshan_label2, rfr_blender_songshan.predict(final_train_set2_songshan_nopoly)))

895105.4016498453

In [919]:
scores = cross_val_score(rfr_blender_songshan, final_train_set2_songshan_nopoly, train_set_songshan_label2, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

1013678.2872436208

In [1024]:
gbr_blender_songshan = GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3,
             max_features=None, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=80, min_weight_fraction_leaf=0.0,
             n_estimators=31, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False)
gbr_blender_songshan.fit(final_train_set2_songshan_nopoly, train_set_songshan_label2)
np.sqrt(mean_squared_error(train_set_songshan_label2, gbr_blender_songshan.predict(final_train_set2_songshan_nopoly)))

855633.69423001574

In [1025]:
scores = cross_val_score(gbr_blender_songshan, final_train_set2_songshan_nopoly, train_set_songshan_label2, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

977206.5951234618

In [1094]:
ada_reg_dtr_blender_songshan = AdaBoostRegressor(
          DecisionTreeRegressor(criterion='mse', max_depth=4, max_features=None,
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=90,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best'), n_estimators = 30,
          learning_rate = 0.15
)
ada_reg_dtr_blender_songshan.fit(final_train_set2_songshan_nopoly, train_set_songshan_label2)
np.sqrt(mean_squared_error(train_set_songshan_label2, ada_reg_dtr_blender_songshan.predict(final_train_set2_songshan_nopoly)))

855846.87333825708

In [1095]:
scores = cross_val_score(ada_reg_dtr_blender_songshan, final_train_set2_songshan_nopoly, train_set_songshan_label2, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

973381.43243151496

In [1114]:
mlpr_blender_songshan = MLPRegressor(activation='relu', alpha=1e-06, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(90, 70), learning_rate='constant',
       learning_rate_init=0.1, max_iter=1000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
mlpr_blender_songshan.fit(final_train_set2_songshan_nopoly, train_set_songshan_label2)
np.sqrt(mean_squared_error(train_set_songshan_label2, mlpr_blender_songshan.predict(final_train_set2_songshan_nopoly)))

974373.41280396457

In [1115]:
scores = cross_val_score(mlpr_blender_songshan, final_train_set2_songshan_nopoly, train_set_songshan_label2, scoring="neg_mean_squared_error", cv=10)
rmse_scores = np.sqrt(-scores)
rmse_scores.mean()

977877.12260212493

# 進入test

In [1116]:
y_predicted_rfr_layer1_songshan_test = rfr_layer1_songshan.predict(total_test_set_songshan)

y_predicted_gbr_layer1_songshan_test = gbr_layer1_songshan.predict(total_test_set_songshan)

y_predicted_ada_reg_dtr_layer1_songshan_test = ada_reg_dtr_layer1_songshan.predict(total_test_set_songshan)

y_predicted_mlpr_layer1_songshan_test = mlpr_layer1_songshan.predict(total_test_set_songshan)

In [1118]:
final_test_set_songshan = pd.DataFrame({
    'rfr_layer1_songshan': y_predicted_rfr_layer1_songshan_test,
    'gbr_layer1_songshan': y_predicted_gbr_layer1_songshan_test,
    'ada_reg_dtr_layer1_songshan': y_predicted_ada_reg_dtr_layer1_songshan_test,
    'mlpr_layer1_songshan': y_predicted_mlpr_layer1_songshan_test,
})
final_test_set_songshan.head()

Unnamed: 0,ada_reg_dtr_layer1_songshan,gbr_layer1_songshan,mlpr_layer1_songshan,rfr_layer1_songshan
0,22458560.0,21095740.0,19881560.0,20822850.0
1,7436710.0,7607244.0,6295716.0,7096813.0
2,46811140.0,43448950.0,43524820.0,45726760.0
3,16935580.0,16349160.0,16357710.0,16594360.0
4,39444030.0,36430450.0,39420970.0,36755000.0


In [1119]:
final_test_set_songshan_nopoly = final_test_set_songshan.copy()

scaler = StandardScaler()

for column in final_test_set_songshan_nopoly.columns:
    final_test_set_songshan_nopoly[column] = scaler.fit_transform(final_test_set_songshan_nopoly[column])
    
final_test_set_songshan_nopoly.head()



Unnamed: 0,ada_reg_dtr_layer1_songshan,gbr_layer1_songshan,mlpr_layer1_songshan,rfr_layer1_songshan
0,0.091928,-0.029112,-0.139297,-0.049884
1,-1.467059,-1.42409,-1.494383,-1.462983
2,2.61927,2.282654,2.218943,2.513981
3,-0.481254,-0.520003,-0.490774,-0.485209
4,1.854702,1.556802,1.809614,1.590335


In [1120]:
#淘汰
np.sqrt(mean_squared_error(test_set_songshan_label, rfr_blender_songshan.predict(final_test_set_songshan_nopoly)))

1136127.9209814814

In [1121]:
#考慮
np.sqrt(mean_squared_error(test_set_songshan_label, gbr_blender_songshan.predict(final_test_set_songshan_nopoly)))

1044091.8822383484

In [1122]:
#淘汰
np.sqrt(mean_squared_error(test_set_songshan_label, ada_reg_dtr_blender_songshan.predict(final_test_set_songshan_nopoly)))

1125485.4745221422

In [1123]:
#考慮
np.sqrt(mean_squared_error(test_set_songshan_label, mlpr_blender_songshan.predict(final_test_set_songshan_nopoly)))

1055460.4223014521

# 將訓練好的模型保存下來

In [1126]:
joblib.dump(gbr_final_songshan, 'gbr_final_songshan.pkl')
joblib.dump(ada_reg_dtr_final_songshan, 'ada_reg_dtr_final_songshan.pkl')
joblib.dump(rfr_layer1_songshan, 'rfr_layer1_songshan.pkl')
joblib.dump(gbr_layer1_songshan, 'gbr_layer1_songshan.pkl')
joblib.dump(ada_reg_dtr_layer1_songshan, 'ada_reg_dtr_layer1_songshan.pkl')
joblib.dump(mlpr_layer1_songshan, 'mlpr_layer1_songshan.pkl')
joblib.dump(gbr_blender_songshan, 'gbr_blender_songshan.pkl')
joblib.dump(mlpr_blender_songshan, 'mlpr_blender_songshan.pkl')

['mlpr_blender_songshan.pkl']

# 信義

In [None]:
sgd_sinyi = SGDRegressor(penalty = 'elasticnet')
sgd_sinyi.fit(total_train_set_sinyi, train_set_sinyi_label)
np.sqrt(mean_squared_error(train_set_sinyi_label, rfr_sinyi.predict(total_train_set_sinyi)))

In [None]:
rfr_sinyi = RandomForestRegressor()
rfr_sinyi.fit(total_train_set_sinyi, train_set_sinyi_label)
np.sqrt(mean_squared_error(train_set_sinyi_label, rfr_sinyi.predict(total_train_set_sinyi)))

In [None]:
svr_sinyi = SVR(kernel = 'rbf')
svr_sinyi.fit(total_train_set_sinyi, train_set_sinyi_label)
np.sqrt(mean_squared_error(train_set_sinyi_label, svr_sinyi.predict(total_train_set_sinyi)))

In [None]:
gbr_sinyi = GradientBoostingRegressor()
gbr_sinyi.fit(total_train_set_sinyi, train_set_sinyi_label)
np.sqrt(mean_squared_error(train_set_sinyi_label, gbr_sinyi.predict(total_train_set_sinyi)))

In [None]:
mlpr_sinyi = MLPRegressor()
mlpr_sinyi.fit(total_train_set_sinyi, train_set_sinyi_label)
np.sqrt(mean_squared_error(train_set_sinyi_label, mlpr_sinyi.predict(total_train_set_sinyi)))

# 大安

In [None]:
sgd_daan = SGDRegressor(penalty = 'elasticnet')
sgd_daan.fit(total_train_set_daan, train_set_daan_label)
np.sqrt(mean_squared_error(train_set_daan_label, sgd_daan.predict(total_train_set_daan)))

In [None]:
rfr_daan = RandomForestRegressor()
rfr_daan.fit(total_train_set_daan, train_set_daan_label)
np.sqrt(mean_squared_error(train_set_daan_label, rfr_daan.predict(total_train_set_daan)))

In [None]:
svr_daan = SVR(kernel = 'rbf')
svr_daan.fit(total_train_set_daan, train_set_daan_label)
np.sqrt(mean_squared_error(train_set_daan_label, svr_daan.predict(total_train_set_daan)))

In [None]:
gbr_daan = GradientBoostingRegressor()
gbr_daan.fit(total_train_set_daan, train_set_daan_label)
np.sqrt(mean_squared_error(train_set_daan_label, gbr_daan.predict(total_train_set_daan)))

In [None]:
mlpr_daan = MLPRegressor()
mlpr_daan.fit(total_train_set_daan, train_set_daan_label)
np.sqrt(mean_squared_error(train_set_daan_label, mlpr_daan.predict(total_train_set_daan)))

# 中山

In [None]:
sgd_jhongshan = SGDRegressor(penalty = 'elasticnet')
sgd_jhongshan.fit(total_train_set_jhongshan, train_set_jhongshan_label)
np.sqrt(mean_squared_error(train_set_jhongshan_label, sgd_jhongshan.predict(total_train_set_jhongshan)))

In [None]:
rfr_jhongshan = RandomForestRegressor()
rfr_jhongshan.fit(total_train_set_jhongshan, train_set_jhongshan_label)
np.sqrt(mean_squared_error(train_set_jhongshan_label, rfr_jhongshan.predict(total_train_set_jhongshan)))

In [None]:
svr_jhongshan = SVR(kernel = 'rbf')
svr_jhongshan.fit(total_train_set_jhongshan, train_set_jhongshan_label)
np.sqrt(mean_squared_error(train_set_jhongshan_label, svr_jhongshan.predict(total_train_set_jhongshan)))

In [None]:
gbr_jhongshan = GradientBoostingRegressor()
gbr_jhongshan.fit(total_train_set_jhongshan, train_set_jhongshan_label)
np.sqrt(mean_squared_error(train_set_jhongshan_label, gbr_jhongshan.predict(total_train_set_jhongshan)))

In [None]:
mlpr_jhongshan = MLPRegressor()
mlpr_jhongshan.fit(total_train_set_jhongshan, train_set_jhongshan_label)
np.sqrt(mean_squared_error(train_set_jhongshan_label, mlpr_jhongshan.predict(total_train_set_jhongshan)))

# 中正

In [None]:
sgd_jhongjheng = SGDRegressor(penalty = 'elasticnet')
sgd_jhongjheng.fit(total_train_set_jhongjheng, train_set_jhongjheng_label)
np.sqrt(mean_squared_error(train_set_jhongjheng_label, sgd_jhongjheng.predict(total_train_set_jhongjheng)))

In [None]:
rfr_jhongjheng = RandomForestRegressor()
rfr_jhongjheng.fit(total_train_set_jhongjheng, train_set_jhongjheng_label)
np.sqrt(mean_squared_error(train_set_jhongjheng_label, rfr_jhongjheng.predict(total_train_set_jhongjheng)))

In [None]:
svr_jhongjheng = SVR(kernel = 'rbf')
svr_jhongjheng.fit(total_train_set_jhongjheng, train_set_jhongjheng_label)
np.sqrt(mean_squared_error(train_set_jhongjheng_label, svr_jhongjheng.predict(total_train_set_jhongjheng)))

In [None]:
gbr_jhongjheng = GradientBoostingRegressor()
gbr_jhongjheng.fit(total_train_set_jhongjheng, train_set_jhongjheng_label)
np.sqrt(mean_squared_error(train_set_jhongjheng_label, mgbr_jhongjheng.predict(total_train_set_jhongjheng)))

In [None]:
mlpr_jhongjheng = MLPRegressor()
mlpr_jhongjheng.fit(total_train_set_jhongjheng, train_set_jhongjheng_label)
np.sqrt(mean_squared_error(train_set_jhongjheng_label, mlpr_jhongjheng.predict(total_train_set_jhongjheng)))

# 大同

In [None]:
sgd_datong = SGDRegressor(penalty = 'elasticnet')
sgd_datong.fit(total_train_set_datong, train_set_datong_label)
np.sqrt(mean_squared_error(train_set_datong_label, sgd_datong.predict(total_train_set_datong)))

In [None]:
rfr_datong = RandomForestRegressor()
rfr_datong.fit(total_train_set_datong, train_set_datong_label)
np.sqrt(mean_squared_error(train_set_datong_label, rfr_datong.predict(total_train_set_datong)))

In [None]:
svr_datong = SVR(kernel = 'rbf')
svr_datong.fit(total_train_set_datong, train_set_datong_label)
np.sqrt(mean_squared_error(train_set_datong_label, svr_datong.predict(total_train_set_datong)))

In [None]:
gbr_datong = GradientBoostingRegressor()
gbr_datong.fit(total_train_set_datong, train_set_datong_label)
np.sqrt(mean_squared_error(train_set_datong_label, gbr_datong.predict(total_train_set_datong)))

In [None]:
mlpr_datong = MLPRegressor()
mlpr_datong.fit(total_train_set_datong, train_set_datong_label)
np.sqrt(mean_squared_error(train_set_datong_label, mlpr_datong.predict(total_train_set_datong)))

# 萬華

In [None]:
sgd_wanhua = SGDRegressor(penalty = 'elasticnet')
sgd_wanhua.fit(total_train_set_wanhua, train_set_wanhua_label)
np.sqrt(mean_squared_error(train_set_wanhua_label, sgd_wanhua.predict(total_train_set_wanhua)))

In [None]:
rfr_wanhua = RandomForestRegressor()
rfr_wanhua.fit(total_train_set_wanhua, train_set_wanhua_label)
np.sqrt(mean_squared_error(train_set_wanhua_label, rfr_wanhua.predict(total_train_set_wanhua)))

In [None]:
svr_wanhua = SVR(kernel = 'rbf')
svr_wanhua.fit(total_train_set_wanhua, train_set_wanhua_label)
np.sqrt(mean_squared_error(train_set_wanhua_label, svr_wanhua.predict(total_train_set_wanhua)))

In [None]:
gbr_wanhua = GradientBoostingRegressor()
gbr_wanhua.fit(total_train_set_wanhua, train_set_wanhua_label)
np.sqrt(mean_squared_error(train_set_wanhua_label, gbr_wanhua.predict(total_train_set_wanhua)))

In [None]:
mlpr_wanhua = MLPRegressor()
mlpr_wanhua.fit(total_train_set_wanhua, train_set_wanhua_label)
np.sqrt(mean_squared_error(train_set_wanhua_label, mlpr_wanhua.predict(total_train_set_wanhua)))

# 文山

In [None]:
sgd_wunshan = SGDRegressor(penalty = 'elasticnet')
sgd_wunshan.fit(total_train_set_wunshan, train_set_wunshan_label)
np.sqrt(mean_squared_error(train_set_wunshan_label, sgd_wunshan.predict(total_train_set_wunshan)))

In [None]:
rfr_wunshan = RandomForestRegressor()
rfr_wunshan.fit(total_train_set_wunshan, train_set_wunshan_label)
np.sqrt(mean_squared_error(train_set_wunshan_label, rfr_wunshan.predict(total_train_set_wunshan)))

In [None]:
svr_wunshan = SVR(kernel = 'rbf')
svr_wunshan.fit(total_train_set_wunshan, train_set_wunshan_label)
np.sqrt(mean_squared_error(train_set_wunshan_label, svr_wunshan.predict(total_train_set_wunshan)))

In [None]:
gbr_wunshan = GradientBoostingRegressor()
gbr_wunshan.fit(total_train_set_wunshan, train_set_wunshan_label)
np.sqrt(mean_squared_error(train_set_wunshan_label, gbr_wunshan.predict(total_train_set_wanhua)))

In [None]:
mlpr_wunshan = MLPRegressor()
mlpr_wunshan.fit(total_train_set_wunshan, train_set_wunshan_label)
np.sqrt(mean_squared_error(train_set_wunshan_label, mlpr_wunshan.predict(total_train_set_wunshan)))

# 南港

In [None]:
sgd_nangang = SGDRegressor(penalty = 'elasticnet')
sgd_nangang.fit(total_train_set_nangang, train_set_nangang_label)
np.sqrt(mean_squared_error(train_set_nangang_label, sgd_wunshan.predict(total_train_set_nangang)))

In [None]:
rfr_nangang = RandomForestRegressor()
rfr_nangang.fit(total_train_set_nangang, train_set_nangang_label)
np.sqrt(mean_squared_error(train_set_nangang_label, rfr_nangang.predict(total_train_set_nangang)))

In [None]:
svr_nangang = SVR(kernel = 'rbf')
svr_nangang.fit(total_train_set_nangang, train_set_nangang_label)
np.sqrt(mean_squared_error(train_set_nangang_label, svr_nangang.predict(total_train_set_nangang)))

In [None]:
gbr_nangang = GradientBoostingRegressor()
gbr_nangang.fit(total_train_set_nangang, train_set_nangang_label)
np.sqrt(mean_squared_error(train_set_nangang_label, gbr_nangang.predict(total_train_set_nangang)))

In [None]:
mlpr_nangang = MLPRegressor()
mlpr_nangang.fit(total_train_set_nangang, train_set_nangang_label)
np.sqrt(mean_squared_error(train_set_nangang_label, mlpr_nangang.predict(total_train_set_nangang)))

# 內湖

In [None]:
sgd_neihu = SGDRegressor(penalty = 'elasticnet')
sgd_neihu.fit(total_train_set_neihu, train_set_neihu_label)
np.sqrt(mean_squared_error(train_set_neihu_label, sgd_neihu.predict(total_train_set_neihu)))

In [None]:
rfr_neihu = RandomForestRegressor()
rfr_neihu.fit(total_train_set_neihu, train_set_neihu_label)
np.sqrt(mean_squared_error(train_set_neihu_label, rfr_neihu.predict(total_train_set_neihu)))

In [None]:
svr_neihu = SVR(kernel = 'rbf')
svr_neihu.fit(total_train_set_neihu, train_set_neihu_label)
np.sqrt(mean_squared_error(train_set_neihu_label, svr_neihu.predict(total_train_set_neihu)))

In [None]:
gbr_neihu = GradientBoostingRegressor()
gbr_neihu.fit(total_train_set_neihu, train_set_neihu_label)
np.sqrt(mean_squared_error(train_set_neihu_label, gbr_neihu.predict(total_train_set_neihu)))

In [None]:
mlpr_neihu = MLPRegressor()
mlpr_neihu.fit(total_train_set_neihu, train_set_neihu_label)
np.sqrt(mean_squared_error(train_set_neihu_label, mlpr_neihu.predict(total_train_set_neihu)))

# 士林

In [None]:
sgd_shihlin = SGDRegressor(penalty = 'elasticnet')
sgd_shihlin.fit(total_train_set_shihlin, train_set_shihlin_label)
np.sqrt(mean_squared_error(train_set_shihlin_label, sgd_shihlin.predict(total_train_set_shihlin)))

In [None]:
rfr_shihlin = RandomForestRegressor()
rfr_shihlin.fit(total_train_set_shihlin, train_set_shihlin_label)
np.sqrt(mean_squared_error(train_set_shihlin_label, rfr_shihlin.predict(total_train_set_shihlin)))

In [None]:
svr_shihlin = SVR(kernel = 'rbf')
svr_shihlin.fit(total_train_set_shihlin, train_set_shihlin_label)
np.sqrt(mean_squared_error(train_set_shihlin_label, svr_neihu.predict(total_train_set_shihlin)))

In [None]:
gbr_shihlin = GradientBoostingRegressor()
gbr_shihlin.fit(total_train_set_shihlin, train_set_shihlin_label)
np.sqrt(mean_squared_error(train_set_shihlin_label, gbr_shihlin.predict(total_train_set_shihlin)))

In [None]:
mlpr_shihlin = MLPRegressor()
mlpr_shihlin.fit(total_train_set_shihlin, train_set_shihlin_label)
np.sqrt(mean_squared_error(train_set_shihlin_label, mlpr_shihlin.predict(total_train_set_shihlin)))

# 北投

In [None]:
sgd_beitou = SGDRegressor(penalty = 'elasticnet')
sgd_beitou.fit(total_train_set_beitou, train_set_beitou_label)
np.sqrt(mean_squared_error(train_set_beitou_label, sgd_beitou.predict(total_train_set_beitou)))

In [None]:
rfr_beitou = RandomForestRegressor()
rfr_beitou.fit(total_train_set_beitou, train_set_beitou_label)
np.sqrt(mean_squared_error(train_set_beitou_label, rfr_beitou.predict(total_train_set_beitou)))

In [None]:
svr_beitou = SVR(kernel = 'rbf')
svr_beitou.fit(total_train_set_beitou, train_set_beitou_label)
np.sqrt(mean_squared_error(train_set_beitou_label, svr_beitou.predict(total_train_set_beitou)))

In [None]:
gbr_beitou = GradientBoostingRegressor()
gbr_beitou.fit(total_train_set_beitou, train_set_beitou_label)
np.sqrt(mean_squared_error(train_set_beitou_label, gbr_beitou.predict(total_train_set_beitou)))

In [None]:
mlpr_beitou = MLPRegressor()
mlpr_beitou.fit(total_train_set_beitou, train_set_beitou_label)
np.sqrt(mean_squared_error(train_set_beitou_label, mlpr_shihlin.predict(total_train_set_beitou)))