In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import warnings

warnings.filterwarnings('ignore')

np.set_printoptions(suppress=True) #수치형값 순정으로 나오게

In [2]:
from sklearn.datasets import load_boston
data = load_boston()

df = pd.DataFrame(data['data'], columns = data['feature_names'])
df['MEDV'] = data['target']
x_train, x_valid, y_train, y_valid = train_test_split(df.drop('MEDV',1), df['MEDV'])

In [None]:
"""
MSE( Mean Squared Error): 예측값과 실제값의 차이에 대한 제곱에 대하여 평균을 낸 값
MAE( Mean Absolute Error): 예측값과 실제값의 차이에 대한 절대값에 대하여 평균을 낸 값
RSME( Root Mean Squared Error): 예측값과 실제값의 차이에 대한 제곱에 대하여 평균을 낸 뒤 루트를 씌운 값
"""

In [3]:
#평가 지표 만들기

In [4]:
pred = np.array([3,4,5])
actual = np.array([1,2,3])

In [6]:
def my_mse(pred, actual):
    return ((pred - actual)**2).mean()

In [7]:
my_mse(pred, actual)

4.0

In [12]:
def my_mae(pred, actual):
    return np.abs(pred - actual).mean()

In [13]:
my_mae(pred, actual)

2.0

In [15]:
def my_rmse(pred, actual):
    return np.sqrt(my_mse(pred, actual))

In [16]:
my_rmse(pred, actual)

2.0

In [None]:
#sklearn 평가 지표 활용하기

In [17]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [18]:
mean_absolute_error(pred, actual)

2.0

In [19]:
mean_squared_error(pred, actual)

4.0

In [20]:
#모델별 성능 확인을 위한 함수

import matplotlib.pyplot as plt
import seaborn as sns

my_predictions = {}

colors = ['r', 'c', 'm', 'y', 'k', 'khaki', 'teal', 'orchid', 'sandybrown',
          'greenyellow', 'dodgerblue', 'deepskyblue', 'rosybrown', 'firebrick',
          'deeppink', 'crimson', 'salmon', 'darkred', 'olivedrab', 'olive', 
          'forestgreen', 'royalblue', 'indigo', 'navy', 'mediumpurple', 'chocolate',
          'gold', 'darkorange', 'seagreen', 'turquoise', 'steelblue', 'slategray', 
          'peru', 'midnightblue', 'slateblue', 'dimgray', 'cadetblue', 'tomato'
         ]

def plot_predictions(name_, pred, actual):
    df = pd.DataFrame({'prediction': pred, 'actual': y_test})
    df = df.sort_values(by='actual').reset_index(drop=True)

    plt.figure(figsize=(12, 9))
    plt.scatter(df.index, df['prediction'], marker='x', color='r')
    plt.scatter(df.index, df['actual'], alpha=0.7, marker='o', color='black')
    plt.title(name_, fontsize=15)
    plt.legend(['prediction', 'actual'], fontsize=12)
    plt.show()

def mse_eval(name_, pred, actual):
    global predictions
    global colors

    plot_predictions(name_, pred, actual)

    mse = mean_squared_error(pred, actual)
    my_predictions[name_] = mse

    y_value = sorted(my_predictions.items(), key=lambda x: x[1], reverse=True)
    
    df = pd.DataFrame(y_value, columns=['model', 'mse'])
    print(df)
    min_ = df['mse'].min() - 10
    max_ = df['mse'].max() + 10
    
    length = len(df)
    
    plt.figure(figsize=(10, length))
    ax = plt.subplot()
    ax.set_yticks(np.arange(len(df)))
    ax.set_yticklabels(df['model'], fontsize=15)
    bars = ax.barh(np.arange(len(df)), df['mse'])
    
    for i, v in enumerate(df['mse']):
        idx = np.random.choice(len(colors))
        bars[i].set_color(colors[idx])
        ax.text(v + 2, i, str(round(v, 3)), color='k', fontsize=15, fontweight='bold')
        
    plt.title('MSE Error', fontsize=18)
    plt.xlim(min_, max_)
    
    plt.show()

def remove_model(name_):
    global my_predictions
    try:
        del my_predictions[name_]
    except KeyError:
        return False
    return True