In [1]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_log_error

def evaluate_reg_all(y_test, y_predict):
    MSE = mean_squared_error(y_test,y_predict,squared=True)
    RMSE = mean_squared_error(y_test,y_predict,squared=False)
    MAE = mean_absolute_error(y_test,y_predict)
    R2 = r2_score(y_test,y_predict)
    
    print(f'MSE: {MSE:.3f}, RMSE: {RMSE:.3F}, MAE: {MAE:.3F}, R^2: {R2:.3F}')

In [6]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline

# 경고메세지 끄기
import warnings
warnings.filterwarnings(action='ignore')

##########데이터 로드

train_df = pd.read_csv('./data/train_pre.csv')
test_df = pd.read_csv('./data/test.csv')

##########데이터 분석

##########데이터 전처리

x_train = train_df.drop(['가격'], axis=1)
x_test = test_df.drop(['가격'], axis=1)
y_train = train_df['가격']
y_test = test_df['가격']

print(x_train.head())
'''
     년식  종류    연비   마력    토크   연료  하이브리드   배기량    중량 변속기
0  2015  대형   6.8  159  23.0  LPG      0  2359  1935  수동
1  2012  소형  13.3  108  13.9  가솔린      0  1396  1035  자동
2  2015  중형  14.4  184  41.0   디젤      0  1995  1792  자동
3  2015  대형  10.9  175  46.0   디젤      0  2497  2210  수동
4  2015  대형   6.4  159  23.0  LPG      0  2359  1935  자동
'''
print(x_train.columns) #Index(['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'], dtype='object')

transformer = make_column_transformer(
    (OneHotEncoder(), ['종류', '연료', '변속기']),
    remainder='passthrough')


##########모델 생성

model = make_pipeline(transformer,RandomForestRegressor())

##########모델 학습

model.fit(x_train, y_train)

##########모델 검증


print(f'훈련_R2: {model.score(x_train, y_train)}') 
print(f'테스트_R2: {model.score(x_test, y_test)}') 

y_predict = model.predict(x_test)
evaluate_reg_all(y_test, y_predict)


##########모델 예측

x_real = [
    [2015, '대형', 6.8, 159, 23, 'LPG', 0,2359, 1935, '수동']
]
x_real_df = pd.DataFrame(x_real, columns=['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'])

y_predict = model.predict(x_real_df)

print(f'예측값: {y_predict[0]}')


     년식   종류    연비   마력    토크   연료  하이브리드   배기량    중량 변속기
0  2015  준중형  11.8  172  21.0  가솔린      0  1999  1300  자동
1  2015  준중형  12.3  204  27.0  가솔린      0  1591  1300  자동
2  2015   소형  15.0  100  13.6  가솔린      0  1368  1035  수동
3  2014   소형  14.0  140  17.0  가솔린      0  1591  1090  자동
4  2015   대형   9.6  175  46.0   디젤      0  2497  1990  자동
Index(['년식', '종류', '연비', '마력', '토크', '연료', '하이브리드', '배기량', '중량', '변속기'], dtype='object')
훈련_R2: 0.9547382706395332
테스트_R2: 0.8009890369448327
MSE: 1234665.963, RMSE: 1111.155, MAE: 414.492, R^2: 0.801
예측값: 1912.85


In [4]:
RandomForestRegressor?

[1;31mInit signature:[0m
[0mRandomForestRegressor[0m[1;33m([0m[1;33m
[0m    [0mn_estimators[0m[1;33m=[0m[1;36m100[0m[1;33m,[0m[1;33m
[0m    [1;33m*[0m[1;33m,[0m[1;33m
[0m    [0mcriterion[0m[1;33m=[0m[1;34m'squared_error'[0m[1;33m,[0m[1;33m
[0m    [0mmax_depth[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmin_samples_split[0m[1;33m=[0m[1;36m2[0m[1;33m,[0m[1;33m
[0m    [0mmin_samples_leaf[0m[1;33m=[0m[1;36m1[0m[1;33m,[0m[1;33m
[0m    [0mmin_weight_fraction_leaf[0m[1;33m=[0m[1;36m0.0[0m[1;33m,[0m[1;33m
[0m    [0mmax_features[0m[1;33m=[0m[1;36m1.0[0m[1;33m,[0m[1;33m
[0m    [0mmax_leaf_nodes[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mmin_impurity_decrease[0m[1;33m=[0m[1;36m0.0[0m[1;33m,[0m[1;33m
[0m    [0mbootstrap[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0moob_score[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mn_jobs[0m[1;33m=[0m[1