In [3]:
# score 확인 함수

def get_reg_score(test_y, pred_y):
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    # 평균절대오차 : MAE
    mae = mean_absolute_error(test_y, pred_y)

    # 평균제곱오차 : MSE
    mse = mean_squared_error(test_y, pred_y)
    
    # 평균제곱근오차 : RMSE
    rmse = mean_squared_error(test_y, pred_y, squared=False)

    # 결정계수 : R2
    r2 = r2_score(test_y, pred_y)
    
    print(f'MAE : {mae:.4f}, MSE : {mse:.4f}, RMSE : {rmse:.4f}, R-squared : {r2:.4f}')

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

file='../project/player_real_final.csv'
df=pd.read_csv(file)

df=df[['overall', 'potential','value_euro']]

feature=df[['overall', 'potential']]
target=df['value_euro']


from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(feature, target, 
                                                    test_size=0.2, random_state=7 )
train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, 
                                                    test_size=0.2, random_state=7 )


### HistGradientBoostingRegressor

In [42]:
from sklearn.ensemble import HistGradientBoostingRegressor

hgb_model = HistGradientBoostingRegressor(max_iter=100, random_state=42)
hgb_model.fit(train_x, train_y)

pred_y=hgb_model.predict(train_x)
print('train => ', end='' )
get_reg_score(train_y, pred_y)

pred_y=hgb_model.predict(val_x)
print(' val  => ', end='')
get_reg_score(val_y, pred_y)

pred_y=hgb_model.predict(test_x)
print(' test => ', end='')
get_reg_score(test_y, pred_y)

train => MAE : 526504.9492, MSE : 5584021428334.2256, RMSE : 2363053.4121, R-squared : 0.9236
 val  => MAE : 499245.9930, MSE : 3467572946260.9238, RMSE : 1862142.0317, R-squared : 0.9440
 test => MAE : 553276.3735, MSE : 3685520620103.1240, RMSE : 1919770.9812, R-squared : 0.9511


### DecisionTreeRegressor

In [43]:
from sklearn.tree import DecisionTreeRegressor

dt_model = DecisionTreeRegressor()
dt_model.fit(train_x,train_y)

pred_y=dt_model.predict(train_x)
print('train => ', end='' )
get_reg_score(train_y, pred_y)

pred_y=dt_model.predict(val_x)
print(' val  => ', end='')
get_reg_score(val_y, pred_y)

pred_y=dt_model.predict(test_x)
print(' test => ', end='')
get_reg_score(test_y, pred_y)

train => MAE : 455630.6759, MSE : 3476552463125.5361, RMSE : 1864551.5448, R-squared : 0.9524
 val  => MAE : 472564.8313, MSE : 2983598146527.6348, RMSE : 1727309.5109, R-squared : 0.9518
 test => MAE : 600496.2853, MSE : 8662971365216.1250, RMSE : 2943292.6061, R-squared : 0.8851


### BaggingRegressor

In [5]:
from sklearn.ensemble import BaggingRegressor

bg_model = BaggingRegressor()
bg_model.fit(train_x,train_y)

pred_y=bg_model.predict(train_x)
print('train => ', end='' )
get_reg_score(train_y, pred_y)

pred_y=bg_model.predict(val_x)
print(' val  => ', end='')
get_reg_score(val_y, pred_y)

pred_y=bg_model.predict(test_x)
print(' test => ', end='')
get_reg_score(test_y, pred_y)

train => MAE : 507292.3128, MSE : 4327402739445.0259, RMSE : 2080241.0292, R-squared : 0.9440
 val  => MAE : 450727.1404, MSE : 2067973321402.4341, RMSE : 1438044.9650, R-squared : 0.9587
 test => MAE : 498333.7512, MSE : 5343111249825.3389, RMSE : 2311517.0884, R-squared : 0.9252


### KNeighborsRegressor

In [45]:
from sklearn.neighbors import KNeighborsRegressor

knn_model=KNeighborsRegressor()
knn_model.fit(train_x, train_y)

pred_y=knn_model.predict(train_x)
print('train => ', end='' )
get_reg_score(train_y, pred_y)

pred_y=knn_model.predict(val_x)
print(' val  => ', end='')
get_reg_score(val_y, pred_y)

pred_y=knn_model.predict(test_x)
print(' test => ', end='')
get_reg_score(test_y, pred_y)

train => MAE : 533281.8521, MSE : 5410496086699.8135, RMSE : 2326047.3096, R-squared : 0.9260
 val  => MAE : 508067.9370, MSE : 3490075114333.0571, RMSE : 1868174.2730, R-squared : 0.9437
 test => MAE : 599156.0636, MSE : 5667135586812.4590, RMSE : 2380574.6337, R-squared : 0.9249


### MLPRegressor

In [46]:
help(MLPRegressor)

Help on class MLPRegressor in module sklearn.neural_network._multilayer_perceptron:

class MLPRegressor(sklearn.base.RegressorMixin, BaseMultilayerPerceptron)
 |  MLPRegressor(hidden_layer_sizes=(100,), activation='relu', *, solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10, max_fun=15000)
 |  
 |  Multi-layer Perceptron regressor.
 |  
 |  This model optimizes the squared error using LBFGS or stochastic gradient
 |  descent.
 |  
 |  .. versionadded:: 0.18
 |  
 |  Parameters
 |  ----------
 |  hidden_layer_sizes : array-like of shape(n_layers - 2,), default=(100,)
 |      The ith element represents the number of neurons in the ith
 |      hidden layer.
 |  
 |  activation : {'identity', 

In [47]:
from sklearn.neural_network import MLPRegressor

# MLPRegressor 모델 생성 및 훈련
mlp_model = MLPRegressor(max_iter=10000) # hidden_layer_sizes=(100, 50), activation='relu', solver='adam', alpha=0.0001, max_iter=1000
# mlp_model.set_params(max_iter=10000)
mlp_model.fit(train_x, train_y)

pred_y=mlp_model.predict(train_x)
print('train => ', end='' )
get_reg_score(train_y, pred_y)

pred_y=mlp_model.predict(val_x)
print(' val  => ', end='')
get_reg_score(val_y, pred_y)

pred_y=mlp_model.predict(test_x)
print(' test => ', end='')
get_reg_score(test_y, pred_y)

train => MAE : 3749006.7943, MSE : 68132293448787.5469, RMSE : 8254228.8222, R-squared : 0.0677
 val  => MAE : 3583819.1259, MSE : 57924255960801.1797, RMSE : 7610798.6415, R-squared : 0.0650
 test => MAE : 3881581.1680, MSE : 70123366403758.7188, RMSE : 8373969.5727, R-squared : 0.0702




### 최종모델 => BaggingRegressor
- csv 파일로 저장

In [53]:
file='../project/players_23.csv'
df_=pd.read_csv(file)

In [54]:
feature=df_[['overall', 'potential']]

In [55]:
# bagging
bg_pred_y=bg_model.predict(feature)
df_['value_bg']=bg_pred_y
df_.to_csv('value_bg.csv', index=False)