正規化の効果として、標準化とmini-maxを例に効果を確認

In [1]:
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.cross_decomposition import PLSRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score 
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler

In [2]:
df = pd.read_csv('./input/04_California Housing.csv', encoding = 'ansi')
df.columns

Index(['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
       'Latitude', 'Longitude', 'Price'],
      dtype='object')

In [3]:
X = df[[
    'MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 
    'Population', 'AveOccup','Latitude', 'Longitude'
]]
y = df['Price']

X.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
count,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0
mean,3.870671,28.639486,5.429,1.096675,1425.476744,3.070655,35.631861,-119.569704
std,1.899822,12.585558,2.474173,0.473911,1132.462122,10.38605,2.135952,2.003532
min,0.4999,1.0,0.846154,0.333333,3.0,0.692308,32.54,-124.35
25%,2.5634,18.0,4.440716,1.006079,787.0,2.429741,33.93,-121.8
50%,3.5348,29.0,5.229129,1.04878,1166.0,2.818116,34.26,-118.49
75%,4.74325,37.0,6.052381,1.099526,1725.0,3.282261,37.71,-118.01
max,15.0001,52.0,141.909091,34.066667,35682.0,1243.333333,41.95,-114.31


In [4]:
models = [GradientBoostingRegressor(max_depth=10, n_estimators=300, random_state=12),   
          MLPRegressor(hidden_layer_sizes=(100,100,100,100,))]
model_sign = ['GB ', 'MLP']

In [5]:
def learn(X, y):  
    (X_train, X_test ,y_train, y_test) \
    = train_test_split(X, y, test_size = 0.2, random_state=1)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)#テスト用データの予測
    r2  =r2_score(y_test, y_pred)
    
    return r2

In [6]:
# 正規化

# １）標準化
# sc = preprocessing.StandardScaler()
# X_norm = sc.fit_transform(X)

# ２）標準化ライブラリなし
X_norm = (X - X.mean(axis=0)) / X.std(axis = 0,ddof = 0)

# ３）mini-max
# mms = preprocessing.MinMaxScaler()
# X_norm = mms.fit_transform(X)

In [7]:
## 通常目的変数は不要
# y = np.array(y)
# y = y.reshape(-1, 1)
# y_norm = sc.fit_transform(y)
# y_norm = (y - y.mean(axis=0)) / y.std(axis = 0,ddof = 0)
# y_norm = mms.fit_transform(y)

In [8]:
R2_list = []
R2_norm_list = []

count = 0
for model,ms in zip(models, model_sign):
    
    r2  = learn(X, y)
    R2_list.append(r2)

    r2_norm = learn(X_norm, y)
    R2_norm_list.append(r2_norm)
    count += 1

    print(f'try {count} completed' )

try 1 completed
try 2 completed




In [9]:
df_norm = pd.DataFrame(index = model_sign)
df_norm['normなし'] = R2_list
df_norm['normあり'] = R2_norm_list 
display(round(df_norm, 2))

Unnamed: 0,normなし,normあり
GB,0.84,0.84
MLP,-4.03,0.79
