<a href="https://colab.research.google.com/github/konrad-l/machine_learning_bootcamp/blob/main/supervised/02_regression/07_regression_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Spis treści:
1. [Import bibliotek](#a0)
2. [Interpretacja graficzna](#a2)
3. [Mean Absolute Error - MAE - Średni błąd bezwzględny](#a3)
4. [Mean Squared Error - MSE - Błąd średniokwadratowy](#a4)
5. [Root Mean Squared Error - RMSE - Pierwiastek błędu średniokwadratowego](#a5)
6. [Max Error - Błąd maksymalny](#a6)
7. [R2 score - współczynnik determinacji](#a7)

### <a name='a0'></a>  Import bibliotek

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

np.random.seed(42)

In [7]:
# generujemy dane, które byłyby w y_test
y_true = 100 + 20 * np.random.randn(50)
y_true

array([105.00985701, 106.92896419,  86.39950557, 104.64507394,
       105.86144947,  85.71297164, 137.31549022, 109.47665842,
        76.17393006, 113.13107217,  80.5063666 , 115.74169207,
       123.17191158,  83.58635363, 119.26752258, 108.25561854,
       116.4412032 , 137.93585965,  95.09223768,  84.92527671,
        82.20971141,  83.6837943 ,  98.45796581, 106.8230395 ,
       105.53381599, 116.54366498, 100.26003784, 129.07068154,
        94.70686334, 154.40338333, 112.51334696,  82.85684887,
        78.58215004, 109.6494483 ,  95.53074429, 114.28000988,
       109.46475249,  98.54342175,  83.06412564,  69.70305551,
        91.06970096, 117.12797589, 104.28187488,  75.08522443,
       103.46361852, 107.70634759,  82.32285128, 103.07450212,
       101.16417437,  77.14059404])

In [8]:
y_pred = y_true + 10 * np.random.randn(50)
y_pred

array([108.58773061, 112.53680945,  97.230018  , 115.18309446,
        92.08475579,  76.33472124, 142.46584289, 114.61451793,
        81.32440692, 151.65838708,  86.2152717 , 127.09734848,
       132.71192922,  90.10026615, 116.11483014, 115.84531074,
       108.71295105, 135.56767359,  90.2386022 ,  85.74401811,
       105.35629707,  65.01114237, 105.32056772,  90.69588078,
       100.81449733, 127.43317095, 100.90283803, 118.29323376,
        87.55382624, 161.19936082, 105.20968064,  85.02143477,
        79.03786844, 103.13344483, 116.97018519, 120.61920011,
        89.21332662, 100.40796489,  76.44626099,  78.22738885,
        83.14449357, 115.98061147, 109.33174767,  83.74277637,
        91.46065445, 104.36133524,  77.57339816,  96.54120979,
       118.81871677,  81.19041115])

In [23]:
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results.head()

Unnamed: 0,y_true,y_pred
0,105.009857,108.587731
1,106.928964,112.536809
2,86.399506,97.230018
3,104.645074,115.183094
4,105.861449,92.084756


In [10]:
results['error'] = results['y_true'] - results['y_pred']
results['error_squared'] = results['error'] ** 2
results.head()

Unnamed: 0,y_true,y_pred,error,error_squared
0,105.009857,108.587731,-3.577874,12.80118
1,106.928964,112.536809,-5.607845,31.447929
2,86.399506,97.230018,-10.830512,117.3
3,104.645074,115.183094,-10.538021,111.049876
4,105.861449,92.084756,13.776694,189.797289


In [14]:
print(f"MAE - mean absolute error: {results['error'].abs().sum() / len(results):.4f}")

print(f"MSE - mean squared error: {results['error_squared'].abs().sum() / len(results):.4f}")

print(f"RMSE - root mean squared error: {np.sqrt(results['error_squared'].sum() / len(results)):.4f}")

MAE - mean absolute error: 8.5137
MSE - mean squared error: 119.1276
RMSE - root mean squared error: 10.9146


### <a name='a2'></a> Interpretacja graficzna

In [22]:
def plot_regression_results(y_true, y_pred):
  results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
  min = results[['y_true', 'y_pred']].min().min()
  max = results[['y_true', 'y_pred']].max().max()

  fig = go.Figure(data=[go.Scatter(x=results['y_true'], y=results['y_pred'], mode='markers'),
                  go.Scatter(x=[min, max], y=[min, max])],
                  layout=go.Layout(showlegend=False, width=800,
                                   xaxis_title='y_true',
                                   yaxis_title='y_pred',
                                   title='Regresja: y_true vs. y_pred'))
  
  fig.show()

plot_regression_results(y_true, y_pred)

In [24]:
y_true = 100 + 20 * np.random.randn(1000)
y_pred = y_true + 10 * np.random.randn(1000)

results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results['error'] = results['y_true'] - results['y_pred']

px.histogram(results, x='error', nbins=50, width=800)

### <a name='a3'></a> Mean Absolute Error - Średni błąd bezwzględny
### $$MAE = \frac{1}{n}\sum_{i=1}^{n}|y_{true} - y_{pred}|$$

In [25]:
def mean_absolute_error(y_true, y_pred):
  return abs(y_true - y_pred).sum() / len(y_true)

mean_absolute_error(y_true, y_pred)                                      

7.794050451644732

In [26]:
from sklearn.metrics import mean_absolute_error

mean_absolute_error(y_true, y_pred)  

7.794050451644732

### <a name='a4'></a> Mean Squared Error - MSE - Błąd średniokwadratowy
### $$MSE = \frac{1}{n}\sum_{i=1}^{n}(y_{true} - y_{pred})^{2}$$

In [31]:
def mean_squared_error(y_true, y_pred):
  return ((y_true - y_pred) ** 2).sum() / len(y_true)

mean_squared_error(y_true, y_pred)                                      

97.28662686533458

In [30]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_true, y_pred)  

97.28662686533458

### <a name='a5'></a> Root Mean Squared Error - RMSE - Pierwiastek błędu średniokwadratowego
### $$RMSE = \sqrt{MSE}$$

In [32]:
def root_mean_squared_error(y_true, y_pred):
  return np.sqrt(((y_true - y_pred) ** 2).sum() / len(y_true))

root_mean_squared_error(y_true, y_pred)                                      

9.863398342626875

In [34]:
np.sqrt(mean_squared_error(y_true, y_pred))

9.863398342626875

### <a name='a6'></a>  Max Error - Błąd maksymalny

$$ME = max(|y\_true - y\_pred|)$$ 

In [37]:
def max_error(y_true, y_pred):
  return abs(y_true - y_pred).max()

max_error(y_true, y_pred)

31.931075678448607

In [38]:
from sklearn.metrics import max_error

max_error(y_true, y_pred)

31.931075678448607

### <a name='a7'></a>  R2 score - współczynnik determinacji
### $$R2\_score = 1 - \frac{\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}}{\sum_{i=1}^{N}(y_{true} - \overline{y_{true}})^{2}}$$

In [40]:
def r2_score(y_true, y_pred):
  numerator = ((y_true - y_pred) ** 2).sum()
  denominator = ((y_true - y_true.mean()) ** 2).sum()
  try:
    r2 = 1 - numerator / denominator
  except ZeroDivisionError:
    print('Dzielenie przez zero')
  return r2

r2_score(y_true, y_pred)

0.7539597795003936

In [41]:
from sklearn.metrics import r2_score

r2_score(y_true, y_pred)

0.7539597795003936