# Exemplos de metricas com a Scikit-Learn

In [1]:
# fake data
y_true = [3, 0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]

In [2]:
# RMSE import
from sklearn.metrics import mean_squared_error

# RMSE score
mean_squared_error(y_true, y_pred, squared=False)

0.6123724356957945

In [18]:
from sklearn.datasets import load_boston
# house-prices dataset download
X, y = load_boston(return_X_y=True)
print(X.shape)

(506, 13)


In [4]:
# first row of independent variables dataset
X[0]

array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
       6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
       4.980e+00])

In [5]:
# first row of the target array (house prices)
y[0]

24.0

In [6]:
# Train test sets separation
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)
print(X_train.shape)
print(X_test.shape)

(339, 13)
(167, 13)


In [7]:
# Model training + prediction with Random Forest model

from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X_train, y_train)
pred = regr.predict(X_test)

In [10]:
# comparing test and prediction's first row
print(y[0])
print(pred[0])
print(y_test[0])
# RMSE scoring
print("RMSE = {}".format(mean_squared_error(y_test, pred, squared=False)))

24.0
22.567146908521863
23.6
RMSE = 4.511716537825545


In [11]:
# RMSE score needs a baseline to be compared with

# Compare with Dummy Regressor baseline
import numpy as np
m = np.mean(y_train) #Calcula a média do y train
dummy_pred = [m for i in range(len(y_test))] # cria um array do mesmo tamanho do y test preenchido com a média calculada.
print("RMSE = {}".format(mean_squared_error(y_test, dummy_pred, squared=False)))

RMSE = 8.799980682431661


In [14]:
# Now compare with Linear Regression baseline
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression().fit(X_train, y_train)
pred = lin_reg.predict(X_test)
print("RMSE = {}".format(mean_squared_error(y_test, pred, squared=False)))

RMSE = 4.552364598463056


## R² e MAE

In [15]:
# MAE and R² calculation against a simple example
from sklearn.metrics import r2_score, mean_absolute_error
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
print("R² = {}".format(r2_score(y_true, y_pred)))

R² = 0.9486081370449679


In [16]:
print("MAE = {}".format(mean_absolute_error(y_true, y_pred)))

MAE = 0.5
