In [11]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_log_error

def evaluate_reg_all(y_test, y_predict):
    MSE = mean_squared_error(y_test,y_predict,squared=True)
    RMSE = mean_squared_error(y_test,y_predict,squared=False)
    MAE = mean_absolute_error(y_test,y_predict)
    R2 = r2_score(y_test,y_predict)
    print(f'MSE: {MSE:.3f}, RMSE: {RMSE:.3F}, MAE: {MAE:.3F}, R^2: {R2:.3F}')

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import plot_tree


    
    

# 경고메세지 끄기
import warnings
warnings.filterwarnings(action='ignore')

##########데이터 로드
fish_df = pd.read_csv('./data/fish.csv')

##########데이터 분석

##########데이터 전처리

x_data = fish_df[['길이']]
y_data = fish_df[['무게']]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = 0.3)



##########모델 생성

model = DecisionTreeRegressor(random_state=111,min_samples_leaf=2,max_depth=3)

##########모델 학습

model.fit(x_train, y_train)

##########모델 검증

print(model.score(x_train, y_train)) #0.9371680443381393
print(model.score(x_test, y_test)) #0.8324765337629763

##########모델 예측

# x_test = np.array([
#     [50]
# ])

y_predict = model.predict(x_test)

# mae = mean_absolute_error
# rmse = mean_absolute_error
# mae = mean_absolute_error
# mae = mean_absolute_error

print(y_predict[0]) #1245.423930742852

evaluate_reg_all(y_test, y_predict)

0.9913770435226652
0.9069955778479403
738.3333333333334
MSE: 10050.121, RMSE: 100.250, MAE: 67.294, R^2: 0.907


In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error , r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression


# 경고메세지 끄기
import warnings
warnings.filterwarnings(action='ignore')

##########데이터 로드
fish_df = pd.read_csv('./data/fish.csv')

##########데이터 분석

##########데이터 전처리

x_data = fish_df[['길이']]
y_data = fish_df[['무게']]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = 0.3)



##########모델 생성

model = LinearRegression()

##########모델 학습

model.fit(x_train, y_train)

##########모델 검증

print(model.score(x_train, y_train)) #0.9371680443381393
print(model.score(x_test, y_test)) #0.8324765337629763

##########모델 예측

# x_test = np.array([
#     [50]
# ])

y_predict = model.predict(x_test)

# mae = mean_absolute_error
# rmse = mean_absolute_error
# mae = mean_absolute_error
# mae = mean_absolute_error

print(y_predict[0]) #1245.423930742852

evaluate_reg_all(y_test, y_predict)

0.8935708745824047
0.9721629494511852
[831.08263635]
MSE: 3457.714, RMSE: 58.802, MAE: 47.324, R^2: 0.972


In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error , r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression


# 경고메세지 끄기
import warnings
warnings.filterwarnings(action='ignore')

##########데이터 로드
fish_df = pd.read_csv('./data/fish.csv')

##########데이터 분석

##########데이터 전처리

x_data = fish_df[['길이']]
y_data = fish_df[['무게']]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = 0.3)

from sklearn.preprocessing import PolynomialFeatures

# degree = 2 인 2차 다항식으로 변환하기 위해 PolynomialFeatures를 이용하여 변환
poly = PolynomialFeatures(degree=2, include_bias=False)

poly.fit(x_train)
poly_train = poly.transform(x_train)
poly_test = poly.transform(x_test)

print('변환된 2차 다항식 계수 feature:\n', poly_train)

##########모델 생성

model = LinearRegression()

##########모델 학습

model.fit(poly_train, y_train)

##########모델 검증

print(model.score(poly_train, y_train)) #0.9371680443381393
print(model.score(poly_test, y_test)) #0.8324765337629763

##########모델 예측

# x_test = np.array([
#     [50]
# ])

y_predict = model.predict(poly_test)

# mae = mean_absolute_error
# rmse = mean_absolute_error
# mae = mean_absolute_error
# mae = mean_absolute_error

print(y_predict[0]) #1245.423930742852

evaluate_reg_all(y_test, y_predict)

변환된 2차 다항식 계수 feature:
 [[  32.8  1075.84]
 [  25.6   655.36]
 [  24.    576.  ]
 [  16.2   262.44]
 [  27.5   756.25]
 [  37.   1369.  ]
 [  23.    529.  ]
 [  18.    324.  ]
 [  24.6   605.16]
 [  36.5  1332.25]
 [  37.   1369.  ]
 [  23.5   552.25]
 [  15.    225.  ]
 [  21.    441.  ]
 [  26.5   702.25]
 [  22.    484.  ]
 [  17.4   302.76]
 [  25.    625.  ]
 [  43.5  1892.25]
 [  19.    361.  ]
 [   8.4    70.56]
 [  27.5   756.25]
 [  21.3   453.69]
 [  40.   1600.  ]
 [  42.   1764.  ]
 [  27.3   745.29]
 [  22.    484.  ]
 [  39.   1521.  ]
 [  13.7   187.69]
 [  39.   1521.  ]
 [  43.   1849.  ]
 [  40.   1600.  ]
 [  22.5   506.25]
 [  39.   1521.  ]
 [  22.7   515.29]
 [  35.   1225.  ]
 [  36.   1296.  ]
 [  19.6   384.16]
 [  43.   1849.  ]]
0.9712053231940586
0.9690349799307012
[297.33750302]
MSE: 2609.492, RMSE: 51.083, MAE: 37.509, R^2: 0.969
