# 평가지표 작성해보기

In [25]:
import pandas as pd
from sklearn.datasets import load_boston
data = load_boston()
print(data['DESCR'])

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [28]:
df = pd.DataFrame(data['data'], columns=data['feature_names'])

In [29]:
df['MEDV'] = data['target'] # 주택의 중앙값

In [31]:
# 이전에 사용했던 데이터들
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df.drop('MEDV', 1), df['MEDV']) # X 데이터와 Y데이터 삽입

In [32]:
pred = np.array([3, 4, 5])
actual = np.array([1, 2, 3])

In [11]:
def my_mse(pred, actual):
    return ((pred - actual) ** 2).mean()

In [12]:
def my_mae(pred, actual):
    return np.abs(pred - actual).mean()

In [13]:
def my_rmse(pred, actual):
    return np.sqrt(my_mse(pred, actual))

In [15]:
my_mse(pred, actual)

4.0

In [16]:
my_mae(pred, actual)

2.0

In [17]:
my_rmse(pred, actual)

2.0

In [9]:
import numpy as np

pred = np.array([3, 4, 5])
actual = np.array([1, 2, 3])

In [7]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [18]:
my_mae(pred, actual), mean_absolute_error(pred, actual)

(2.0, 2.0)

In [19]:
my_mse(pred, actual), mean_squared_error(pred, actual)

(4.0, 4.0)

모델별 성능 확인을 위한 함수

아래는 sklearn 사용을 사용하여 평가지표 작성


In [1]:
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
my_predictions = []

colors = ['r', 'c', 'm', 'y', 'khak!', 'orchid', 'sandybrown', 'greenyellow', 'dodgerblue', 'deepskyblue',
'deeppink', 'crimson', 'salmon', 'rosybrown',' firebrick', 'forestgreen', 'royalblue', 'indigo', 'navy', 'mediumpurple', 'chocolate', 'gold', 'arkorange', 'seagreen', 'turquoise', 'steelblue', 'slategray', 'peru',
'midhighblue', 'slateblue', 'dimgray', 'cadetblue', 'tomato']


In [33]:
def plot_predictions(name_, pred, actual):
    df = pd.DataFrame({'prediction': pred, 'actual': y_test})
    df = df.sort_values(by='actual').reset_index(drop=True)

    plt.figure(figsize=(12, 9))
    plt.scatter(df.index, df['prediction'], marker=x, color='r')
    plt.scatter(df.index, df['actual'], alpha=0.7, marker='o', color='black')
    plt.title(name_, fontsize=15)
    plt.legend(['prediction', 'actual'], fontsize=12)
    plt.show()

In [36]:
def mse_eval(name_, pred, actual):
    global my_predictions
    global colors

    plot_predictions(name_, pred, actual)

    mse = mean_squared_error(pred, actual)
    my_prediction[name_] = mse

    y_value = sorted(my_prediction.items(), key=lambda x: x[1], reverse=True)

    df = pd.DataFrame(y_value, columns=['model', 'mse'])
    print(df)
    min_ = df['mse'].min() - 10
    max_ = df['mse'].max() + 10

    length = len(df)

    plt.figure(figsize=(10, length))

    ax = plt.subplot()
    ax.set_yticks(np.arange(len(df)))
    ax.set_yticklabels(df['model'], fontsize=15)
    bars = ax.barh(np.arange(len(df)), df['mse'])

    for i, v in enumerate(df['mse']):
        idx = np.random.choice(len(colors))
        bars[i].set_color(colors[idx])
        ax.text(v + 2, i, str(round(v, 3)), color='k', fontsize=15, fontweight='bold')

    plt.title('MSE Error', fontsize=18)
    plt.xlim(min_, max_)

    plt.show()



In [None]:
def remove_model(name_):
    global my_predictions
    try: